From 047e4e3753c597628024847a524d44ca67fa1382 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Fri, 17 Apr 2015 01:47:41 -0300 Subject: replaced leg-based parser with handmade parser for source files --- .gitignore | 3 +- Makefile.am | 19 +++--- src/main.c | 10 +-- src/output.c | 39 +++++++++++ src/output.h | 17 +++++ src/source-grammar.h | 22 ------ src/source-grammar.leg | 116 -------------------------------- src/source-parser.c | 147 +++++++++++++++++++++++++++++++++++++++++ src/source-parser.h | 23 +++++++ src/utils/trie.c | 2 + src/utils/utils.h | 1 + tests/check_source_grammar.c | 74 --------------------- tests/check_source_parser.c | 77 +++++++++++++++++++++ tests/check_template_grammar.c | 47 +++++++++++++ 14 files changed, 370 insertions(+), 227 deletions(-) create mode 100644 src/output.c create mode 100644 src/output.h delete mode 100644 src/source-grammar.h delete mode 100644 src/source-grammar.leg create mode 100644 src/source-parser.c create mode 100644 src/source-parser.h delete mode 100644 tests/check_source_grammar.c create mode 100644 tests/check_source_parser.c diff --git a/.gitignore b/.gitignore index b7c5f8e..92256ca 100644 --- a/.gitignore +++ b/.gitignore @@ -40,12 +40,11 @@ Makefile.in /blogc # tests -/tests/check_source_grammar +/tests/check_source_parser /tests/check_template_grammar /tests/check_utils # leg generated source -/src/source-grammar.c /src/template-grammar.c # tarballs diff --git a/Makefile.am b/Makefile.am index b1548bc..3138d25 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,7 +14,6 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \ EXTRA_DIST = \ autogen.sh \ README.md \ - src/source-grammar.leg \ src/template-grammar.leg \ $(NULL) @@ -22,7 +21,8 @@ CLEANFILES = \ $(NULL) noinst_HEADERS = \ - src/source-grammar.h \ + src/output.h \ + src/source-parser.h \ src/template-grammar.h \ src/utils/utils.h \ $(NULL) @@ -43,7 +43,8 @@ check_PROGRAMS = \ libblogc_la_SOURCES = \ - src/source-grammar.c \ + src/output.c \ + src/source-parser.c \ src/template-grammar.c \ src/utils/slist.c \ src/utils/strings.c \ @@ -92,24 +93,24 @@ endif if USE_CMOCKA check_PROGRAMS += \ - tests/check_source_grammar \ + tests/check_source_parser \ tests/check_template_grammar \ tests/check_utils \ $(NULL) -tests_check_source_grammar_SOURCES = \ - tests/check_source_grammar.c \ +tests_check_source_parser_SOURCES = \ + tests/check_source_parser.c \ $(NULL) -tests_check_source_grammar_CFLAGS = \ +tests_check_source_parser_CFLAGS = \ $(CMOCKA_CFLAGS) \ $(NULL) -tests_check_source_grammar_LDFLAGS = \ +tests_check_source_parser_LDFLAGS = \ -no-install \ $(NULL) -tests_check_source_grammar_LDADD = \ +tests_check_source_parser_LDADD = \ $(CMOCKA_LIBS) \ libblogc.la \ $(NULL) diff --git a/src/main.c b/src/main.c index 3dbf325..d05c8d6 100644 --- a/src/main.c +++ b/src/main.c @@ -12,17 +12,19 @@ #include -#include "source-grammar.h" +#include "source-parser.h" +#include int main(int argc, char **argv) { - blogc_source_t *t = blogc_source_parse( - "\n \nBOLA: guda\n\t\n\n\n\n" + const char *a = + "\n \nBOLA : guda\n\t\n\n\n\n" "CHUNDA: asd\n" "----\n" - "{% block single_source %}\nbola\n\nzas\n"); + "{% block single_source %}\nbola\n\nzas\n"; + blogc_source_t *t = blogc_source_parse(a, strlen(a)); printf("%s\n", t->content); printf("Hello, World!\n"); return 0; diff --git a/src/output.c b/src/output.c new file mode 100644 index 0000000..bd96b8e --- /dev/null +++ b/src/output.c @@ -0,0 +1,39 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include "utils/utils.h" +#include "output.h" + + +void +blogc_parser_syntax_error(const char *name, const char *src, size_t src_len, + size_t current) +{ + b_string_t *msg = b_string_new(); + + while (current < src_len) { + char c = src[current]; + + if (c == '\r' || c == '\n') + break; + + b_string_append_c(msg, c); + + current++; + } + + fprintf(stderr, "%s parser error: syntax error near \"%s\"\n", name, + msg->str); + + b_string_free(msg, true); +} diff --git a/src/output.h b/src/output.h new file mode 100644 index 0000000..b2f3cf3 --- /dev/null +++ b/src/output.h @@ -0,0 +1,17 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the LGPL-2 License. + * See the file COPYING. + */ + +#ifndef _OUTPUT_H +#define _OUTPUT_H + +#include + +void blogc_parser_syntax_error(const char *name, const char *src, + size_t src_len, size_t current); + +#endif /* _OUTPUT_H */ diff --git a/src/source-grammar.h b/src/source-grammar.h deleted file mode 100644 index 540c21b..0000000 --- a/src/source-grammar.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2015 Rafael G. Martins - * - * This program can be distributed under the terms of the LGPL-2 License. - * See the file COPYING. - */ - -#ifndef _SOURCE_GRAMAR_H -#define _SOURCE_GRAMAR_H - -#include "utils/utils.h" - -typedef struct { - b_trie_t *config; - char *content; -} blogc_source_t; - -blogc_source_t* blogc_source_parse(const char *tmpl); -void blogc_source_free(blogc_source_t *source); - -#endif /* _SOURCE_GRAMAR_H */ diff --git a/src/source-grammar.leg b/src/source-grammar.leg deleted file mode 100644 index faab9a4..0000000 --- a/src/source-grammar.leg +++ /dev/null @@ -1,116 +0,0 @@ -# -# blogc: A balde compiler. -# Copyright (C) 2015 Rafael G. Martins -# -# This program can be distributed under the terms of the BSD License. -# See the file COPYING. -# - -%{ - -#include -#include "utils/utils.h" -#include "source-grammar.h" - -#define YY_INPUT(buf, result, max_size) \ -{ \ - int yyc = (charbuf && *charbuf != '\0') ? *charbuf++ : EOF; \ - result = (EOF == yyc) ? 0 : (*buf = yyc, 1); \ -} - - -static b_trie_t *config = NULL; -static const char *charbuf = NULL; -static char *key = NULL; -static char *content = NULL; - - -static void -blogc_source_config_key(const char *value) -{ - if (key != NULL) { - fprintf(stderr, "Syntax error: configuration key already set: %s\n", key); - exit(1); - } - key = b_strdup(value); -} - - -static void -blogc_source_config_value(const char *value) -{ - if (key == NULL) { - fprintf(stderr, "Syntax error: configuration value without a key: %s\n", value); - exit(1); - } - b_trie_insert(config, key, b_str_strip(b_strdup(value))); - free(key); - key = NULL; -} - - -static void -blogc_source_content(const char *value) -{ - if (content != NULL) { - fprintf(stderr, "Syntax error: content set twice\n"); - exit(1); - } - content = b_strdup(value); -} - -%} - -page = ( ( - eol )* ( config ( - eol )* )+ '----' '-'* eol content eof ) | anything - { fprintf(stderr, "Syntax error near: %s\n", yytext); exit(1); } - -# Useful rules -eol = '\n' | '\r\n' | '\r' -eof = !. -- = [\t ]* -id = [A-Z][A-Z0-9_]* -anything = < ( !eol . )* > eol - -# Configuration -config_key = < id > { blogc_source_config_key(yytext); } -config_value = < anything > { blogc_source_config_value(yytext); } -config = config_key ':' ' '+ config_value - -# Generic content -content = < ( !eof . )+ > { blogc_source_content(yytext); } - -%% - - -void -blogc_source_free(blogc_source_t *source) -{ - if (source == NULL) - return; - free(source->content); - b_trie_free(source->config); - free(source); -} - - -static void -blogc_source_config_free(void *ptr) -{ - free(ptr); -} - - -blogc_source_t* -blogc_source_parse(const char *tmpl) -{ - charbuf = tmpl; - config = b_trie_new(blogc_source_config_free); - key = NULL; - content = NULL; - while(yyparse()); - blogc_source_t *rv = malloc(sizeof(blogc_source_t)); - rv->content = content; - rv->config = config; - charbuf = NULL; - return rv; -} diff --git a/src/source-parser.c b/src/source-parser.c new file mode 100644 index 0000000..ad7e77d --- /dev/null +++ b/src/source-parser.c @@ -0,0 +1,147 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include + +#include "utils/utils.h" +#include "source-parser.h" +#include "output.h" + + +typedef enum { + SOURCE_START = 1, + SOURCE_CONFIG_KEY, + SOURCE_CONFIG_VALUE_START, + SOURCE_CONFIG_VALUE, + SOURCE_SEPARATOR, + SOURCE_CONTENT_START, + SOURCE_CONTENT, +} blogc_source_parser_state_t; + + +blogc_source_t* +blogc_source_parse(const char *src, size_t src_len) +{ + size_t current = 0; + size_t start = 0; + + bool error = false; + char *key = NULL; + char *tmp = NULL; + b_trie_t *config = b_trie_new(free); + char *content = NULL; + + blogc_source_parser_state_t state = SOURCE_START; + + while (current < src_len) { + char c = src[current]; + + switch (state) { + + case SOURCE_START: + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + break; + if (c >= 'A' && c <= 'Z') { + state = SOURCE_CONFIG_KEY; + start = current; + break; + } + if (c == '-') { + state = SOURCE_SEPARATOR; + break; + } + error = true; + break; + + case SOURCE_CONFIG_KEY: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + key = b_strndup(src + start, current - start); + state = SOURCE_CONFIG_VALUE_START; + break; + } + error = true; + break; + + case SOURCE_CONFIG_VALUE_START: + if (c != '\n' && c != '\r') { + state = SOURCE_CONFIG_VALUE; + start = current; + break; + } + error = true; + break; + + case SOURCE_CONFIG_VALUE: + if (c == '\n' || c == '\r') { + tmp = b_strndup(src + start, current - start); + b_trie_insert(config, key, b_strdup(b_str_strip(tmp))); + free(tmp); + free(key); + key = NULL; + state = SOURCE_START; + } + break; + + case SOURCE_SEPARATOR: + if (c == '-') + break; + if (c == '\n' || c == '\r') { + state = SOURCE_CONTENT_START; + break; + } + error = true; + break; + + case SOURCE_CONTENT_START: + start = current; + state = SOURCE_CONTENT; + break; + + case SOURCE_CONTENT: + if (current == (src_len - 1)) + content = b_strndup(src + start, src_len - start); + break; + } + + if (error) + break; + + current++; + } + + if (error) { + free(key); + free(content); + b_trie_free(config); + blogc_parser_syntax_error("source", src, src_len, current); + return NULL; + } + + blogc_source_t *rv = malloc(sizeof(blogc_source_t)); + rv->config = config; + rv->content = content; + + return rv; +} + + +void +blogc_source_free(blogc_source_t *source) +{ + if (source == NULL) + return; + free(source->content); + b_trie_free(source->config); + free(source); +} diff --git a/src/source-parser.h b/src/source-parser.h new file mode 100644 index 0000000..6f41c14 --- /dev/null +++ b/src/source-parser.h @@ -0,0 +1,23 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the LGPL-2 License. + * See the file COPYING. + */ + +#ifndef _SOURCE_PARSER_H +#define _SOURCE_PARSER_H + +#include +#include "utils/utils.h" + +typedef struct { + b_trie_t *config; + char *content; +} blogc_source_t; + +blogc_source_t* blogc_source_parse(const char *src, size_t src_len); +void blogc_source_free(blogc_source_t *source); + +#endif /* _SOURCE_PARSER_H */ diff --git a/src/utils/trie.c b/src/utils/trie.c index f447860..b92573f 100644 --- a/src/utils/trie.c +++ b/src/utils/trie.c @@ -38,6 +38,8 @@ b_trie_free_node(b_trie_t *trie, b_trie_node_t *node) void b_trie_free(b_trie_t *trie) { + if (trie == NULL) + return; b_trie_free_node(trie, trie->root); free(trie); } diff --git a/src/utils/utils.h b/src/utils/utils.h index 20259d8..55b9c59 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -10,6 +10,7 @@ #define _UTILS_UTILS_H #include +#include #include #define B_STRING_CHUNK_SIZE 128 diff --git a/tests/check_source_grammar.c b/tests/check_source_grammar.c deleted file mode 100644 index b2f581b..0000000 --- a/tests/check_source_grammar.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2015 Rafael G. Martins - * - * This program can be distributed under the terms of the BSD License. - * See the file COPYING. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include "../src/source-grammar.h" - - -static void -test_source_parse(void **state) -{ - blogc_source_t *source = blogc_source_parse( - "VAR1: asd asd\n" - "VAR2: 123chunda\n" - "----------\n" - "# This is a test\n" - "\n" - "bola\n"); - assert_non_null(source); - assert_int_equal(b_trie_size(source->config), 2); - assert_string_equal(b_trie_lookup(source->config, "VAR1"), "asd asd"); - assert_string_equal(b_trie_lookup(source->config, "VAR2"), "123chunda"); - assert_string_equal(source->content, - "# This is a test\n" - "\n" - "bola\n"); - blogc_source_free(source); -} - - -static void -test_source_parse_with_spaces(void **state) -{ - blogc_source_t *source = blogc_source_parse( - "\n \n" - "VAR1: chunda \t \n" - "\n\n" - "BOLA: guda\n" - "----------\n" - "# This is a test\n" - "\n" - "bola\n"); - assert_non_null(source); - assert_int_equal(b_trie_size(source->config), 2); - assert_string_equal(b_trie_lookup(source->config, "VAR1"), "chunda"); - assert_string_equal(b_trie_lookup(source->config, "BOLA"), "guda"); - assert_string_equal(source->content, - "# This is a test\n" - "\n" - "bola\n"); - blogc_source_free(source); -} - - -int -main(void) -{ - const UnitTest tests[] = { - unit_test(test_source_parse), - unit_test(test_source_parse_with_spaces), - }; - return run_tests(tests); -} diff --git a/tests/check_source_parser.c b/tests/check_source_parser.c new file mode 100644 index 0000000..2f5880a --- /dev/null +++ b/tests/check_source_parser.c @@ -0,0 +1,77 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include "../src/source-parser.h" + + +static void +test_source_parse(void **state) +{ + const char *a = + "VAR1: asd asd\n" + "VAR2: 123chunda\n" + "----------\n" + "# This is a test\n" + "\n" + "bola\n"; + blogc_source_t *source = blogc_source_parse(a, strlen(a)); + assert_non_null(source); + assert_int_equal(b_trie_size(source->config), 2); + assert_string_equal(b_trie_lookup(source->config, "VAR1"), "asd asd"); + assert_string_equal(b_trie_lookup(source->config, "VAR2"), "123chunda"); + assert_string_equal(source->content, + "# This is a test\n" + "\n" + "bola\n"); + blogc_source_free(source); +} + + +static void +test_source_parse_with_spaces(void **state) +{ + const char *a = + "\n \n" + "VAR1: chunda \t \n" + "\n\n" + "BOLA: guda\n" + "----------\n" + "# This is a test\n" + "\n" + "bola\n"; + blogc_source_t *source = blogc_source_parse(a, strlen(a)); + assert_non_null(source); + assert_int_equal(b_trie_size(source->config), 2); + assert_string_equal(b_trie_lookup(source->config, "VAR1"), "chunda"); + assert_string_equal(b_trie_lookup(source->config, "BOLA"), "guda"); + assert_string_equal(source->content, + "# This is a test\n" + "\n" + "bola\n"); + blogc_source_free(source); +} + + +int +main(void) +{ + const UnitTest tests[] = { + unit_test(test_source_parse), + unit_test(test_source_parse_with_spaces), + }; + return run_tests(tests); +} diff --git a/tests/check_template_grammar.c b/tests/check_template_grammar.c index ffad8fb..3b4dcca 100644 --- a/tests/check_template_grammar.c +++ b/tests/check_template_grammar.c @@ -88,11 +88,58 @@ test_template_parse(void **state) } +static void +test_template_parse_html(void **state) +{ + b_slist_t *stmts = blogc_template_parse( + "\n" + " \n" + " {% block single_source %}\n" + " My cool blog >> {{ TITLE }}\n" + " {% endblock %}\n" + " {% block multiple_sources %}\n" + " My cool blog - Main page\n" + " {% endblock %}\n" + " \n" + " \n" + "

My cool blog

\n" + " {% block single_source %}\n" + "

{{ TITLE }}

\n" + " {% if DATE %}

Published in: {{ DATE }}

{% endif %}\n" + "
{{ CONTENT }}
\n" + " {% endblock %}\n" + " {% block multiple_sources_once %}
    {% endblock %}\n" + " {% block multiple_sources %}

    " + "{{ TITLE }}{% if DATE %} - {{ DATE }}{% endif %}

    {% endblock %}\n" + " {% block multiple_sources_once %}
{% endblock %}\n" + " \n" + "\n"); + assert_non_null(stmts); + blogc_assert_template_stmt(stmts, "\n \n ", + BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(stmts->next, "single_source", + BLOGC_TEMPLATE_BLOCK_STMT); + blogc_assert_template_stmt(stmts->next->next, + "\n My cool blog >> ", BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(stmts->next->next->next, "TITLE", + BLOGC_TEMPLATE_VARIABLE_STMT); + blogc_assert_template_stmt(stmts->next->next->next->next, + "\n ", BLOGC_TEMPLATE_CONTENT_STMT); + + + + + + blogc_template_free_stmts(stmts); +} + + int main(void) { const UnitTest tests[] = { unit_test(test_template_parse), + unit_test(test_template_parse_html), }; return run_tests(tests); } -- cgit v1.2.3-18-g5258