diff options
author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-04-17 01:47:41 -0300 |
---|---|---|
committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-04-17 01:47:41 -0300 |
commit | 047e4e3753c597628024847a524d44ca67fa1382 (patch) | |
tree | 8c0d63ecd8d395815a4eee155c2ce6283e0e7fab | |
parent | f5da9cc42acc79d7dda78e57fab8a1b8d7aa6a7d (diff) | |
download | blogc-047e4e3753c597628024847a524d44ca67fa1382.tar.gz blogc-047e4e3753c597628024847a524d44ca67fa1382.tar.bz2 blogc-047e4e3753c597628024847a524d44ca67fa1382.zip |
replaced leg-based parser with handmade parser for source files
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile.am | 19 | ||||
-rw-r--r-- | src/main.c | 10 | ||||
-rw-r--r-- | src/output.c | 39 | ||||
-rw-r--r-- | src/output.h | 17 | ||||
-rw-r--r-- | src/source-grammar.leg | 116 | ||||
-rw-r--r-- | src/source-parser.c | 147 | ||||
-rw-r--r-- | src/source-parser.h (renamed from src/source-grammar.h) | 9 | ||||
-rw-r--r-- | src/utils/trie.c | 2 | ||||
-rw-r--r-- | src/utils/utils.h | 1 | ||||
-rw-r--r-- | tests/check_source_parser.c (renamed from tests/check_source_grammar.c) | 13 | ||||
-rw-r--r-- | tests/check_template_grammar.c | 47 |
12 files changed, 283 insertions, 140 deletions
@@ -40,12 +40,11 @@ Makefile.in /blogc # tests -/tests/check_source_grammar +/tests/check_source_parser /tests/check_template_grammar /tests/check_utils # leg generated source -/src/source-grammar.c /src/template-grammar.c # tarballs diff --git a/Makefile.am b/Makefile.am index b1548bc..3138d25 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,7 +14,6 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \ EXTRA_DIST = \ autogen.sh \ README.md \ - src/source-grammar.leg \ src/template-grammar.leg \ $(NULL) @@ -22,7 +21,8 @@ CLEANFILES = \ $(NULL) noinst_HEADERS = \ - src/source-grammar.h \ + src/output.h \ + src/source-parser.h \ src/template-grammar.h \ src/utils/utils.h \ $(NULL) @@ -43,7 +43,8 @@ check_PROGRAMS = \ libblogc_la_SOURCES = \ - src/source-grammar.c \ + src/output.c \ + src/source-parser.c \ src/template-grammar.c \ src/utils/slist.c \ src/utils/strings.c \ @@ -92,24 +93,24 @@ endif if USE_CMOCKA check_PROGRAMS += \ - tests/check_source_grammar \ + tests/check_source_parser \ tests/check_template_grammar \ tests/check_utils \ $(NULL) -tests_check_source_grammar_SOURCES = \ - tests/check_source_grammar.c \ +tests_check_source_parser_SOURCES = \ + tests/check_source_parser.c \ $(NULL) -tests_check_source_grammar_CFLAGS = \ +tests_check_source_parser_CFLAGS = \ $(CMOCKA_CFLAGS) \ $(NULL) -tests_check_source_grammar_LDFLAGS = \ +tests_check_source_parser_LDFLAGS = \ -no-install \ $(NULL) -tests_check_source_grammar_LDADD = \ +tests_check_source_parser_LDADD = \ $(CMOCKA_LIBS) \ libblogc.la \ $(NULL) @@ -12,17 +12,19 @@ #include <stdio.h> -#include "source-grammar.h" +#include "source-parser.h" +#include <string.h> int main(int argc, char **argv) { - blogc_source_t *t = blogc_source_parse( - "\n \nBOLA: guda\n\t\n\n\n\n" + const char *a = + "\n \nBOLA : guda\n\t\n\n\n\n" "CHUNDA: asd\n" "----\n" - "{% block single_source %}\nbola\n\nzas\n"); + "{% block single_source %}\nbola\n\nzas\n"; + blogc_source_t *t = blogc_source_parse(a, strlen(a)); printf("%s\n", t->content); printf("Hello, World!\n"); return 0; diff --git a/src/output.c b/src/output.c new file mode 100644 index 0000000..bd96b8e --- /dev/null +++ b/src/output.c @@ -0,0 +1,39 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdio.h> +#include "utils/utils.h" +#include "output.h" + + +void +blogc_parser_syntax_error(const char *name, const char *src, size_t src_len, + size_t current) +{ + b_string_t *msg = b_string_new(); + + while (current < src_len) { + char c = src[current]; + + if (c == '\r' || c == '\n') + break; + + b_string_append_c(msg, c); + + current++; + } + + fprintf(stderr, "%s parser error: syntax error near \"%s\"\n", name, + msg->str); + + b_string_free(msg, true); +} diff --git a/src/output.h b/src/output.h new file mode 100644 index 0000000..b2f3cf3 --- /dev/null +++ b/src/output.h @@ -0,0 +1,17 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the LGPL-2 License. + * See the file COPYING. + */ + +#ifndef _OUTPUT_H +#define _OUTPUT_H + +#include <stdlib.h> + +void blogc_parser_syntax_error(const char *name, const char *src, + size_t src_len, size_t current); + +#endif /* _OUTPUT_H */ diff --git a/src/source-grammar.leg b/src/source-grammar.leg deleted file mode 100644 index faab9a4..0000000 --- a/src/source-grammar.leg +++ /dev/null @@ -1,116 +0,0 @@ -# -# blogc: A balde compiler. -# Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> -# -# This program can be distributed under the terms of the BSD License. -# See the file COPYING. -# - -%{ - -#include <stdio.h> -#include "utils/utils.h" -#include "source-grammar.h" - -#define YY_INPUT(buf, result, max_size) \ -{ \ - int yyc = (charbuf && *charbuf != '\0') ? *charbuf++ : EOF; \ - result = (EOF == yyc) ? 0 : (*buf = yyc, 1); \ -} - - -static b_trie_t *config = NULL; -static const char *charbuf = NULL; -static char *key = NULL; -static char *content = NULL; - - -static void -blogc_source_config_key(const char *value) -{ - if (key != NULL) { - fprintf(stderr, "Syntax error: configuration key already set: %s\n", key); - exit(1); - } - key = b_strdup(value); -} - - -static void -blogc_source_config_value(const char *value) -{ - if (key == NULL) { - fprintf(stderr, "Syntax error: configuration value without a key: %s\n", value); - exit(1); - } - b_trie_insert(config, key, b_str_strip(b_strdup(value))); - free(key); - key = NULL; -} - - -static void -blogc_source_content(const char *value) -{ - if (content != NULL) { - fprintf(stderr, "Syntax error: content set twice\n"); - exit(1); - } - content = b_strdup(value); -} - -%} - -page = ( ( - eol )* ( config ( - eol )* )+ '----' '-'* eol content eof ) | anything - { fprintf(stderr, "Syntax error near: %s\n", yytext); exit(1); } - -# Useful rules -eol = '\n' | '\r\n' | '\r' -eof = !. -- = [\t ]* -id = [A-Z][A-Z0-9_]* -anything = < ( !eol . )* > eol - -# Configuration -config_key = < id > { blogc_source_config_key(yytext); } -config_value = < anything > { blogc_source_config_value(yytext); } -config = config_key ':' ' '+ config_value - -# Generic content -content = < ( !eof . )+ > { blogc_source_content(yytext); } - -%% - - -void -blogc_source_free(blogc_source_t *source) -{ - if (source == NULL) - return; - free(source->content); - b_trie_free(source->config); - free(source); -} - - -static void -blogc_source_config_free(void *ptr) -{ - free(ptr); -} - - -blogc_source_t* -blogc_source_parse(const char *tmpl) -{ - charbuf = tmpl; - config = b_trie_new(blogc_source_config_free); - key = NULL; - content = NULL; - while(yyparse()); - blogc_source_t *rv = malloc(sizeof(blogc_source_t)); - rv->content = content; - rv->config = config; - charbuf = NULL; - return rv; -} diff --git a/src/source-parser.c b/src/source-parser.c new file mode 100644 index 0000000..ad7e77d --- /dev/null +++ b/src/source-parser.c @@ -0,0 +1,147 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdbool.h> + +#include "utils/utils.h" +#include "source-parser.h" +#include "output.h" + + +typedef enum { + SOURCE_START = 1, + SOURCE_CONFIG_KEY, + SOURCE_CONFIG_VALUE_START, + SOURCE_CONFIG_VALUE, + SOURCE_SEPARATOR, + SOURCE_CONTENT_START, + SOURCE_CONTENT, +} blogc_source_parser_state_t; + + +blogc_source_t* +blogc_source_parse(const char *src, size_t src_len) +{ + size_t current = 0; + size_t start = 0; + + bool error = false; + char *key = NULL; + char *tmp = NULL; + b_trie_t *config = b_trie_new(free); + char *content = NULL; + + blogc_source_parser_state_t state = SOURCE_START; + + while (current < src_len) { + char c = src[current]; + + switch (state) { + + case SOURCE_START: + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + break; + if (c >= 'A' && c <= 'Z') { + state = SOURCE_CONFIG_KEY; + start = current; + break; + } + if (c == '-') { + state = SOURCE_SEPARATOR; + break; + } + error = true; + break; + + case SOURCE_CONFIG_KEY: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + key = b_strndup(src + start, current - start); + state = SOURCE_CONFIG_VALUE_START; + break; + } + error = true; + break; + + case SOURCE_CONFIG_VALUE_START: + if (c != '\n' && c != '\r') { + state = SOURCE_CONFIG_VALUE; + start = current; + break; + } + error = true; + break; + + case SOURCE_CONFIG_VALUE: + if (c == '\n' || c == '\r') { + tmp = b_strndup(src + start, current - start); + b_trie_insert(config, key, b_strdup(b_str_strip(tmp))); + free(tmp); + free(key); + key = NULL; + state = SOURCE_START; + } + break; + + case SOURCE_SEPARATOR: + if (c == '-') + break; + if (c == '\n' || c == '\r') { + state = SOURCE_CONTENT_START; + break; + } + error = true; + break; + + case SOURCE_CONTENT_START: + start = current; + state = SOURCE_CONTENT; + break; + + case SOURCE_CONTENT: + if (current == (src_len - 1)) + content = b_strndup(src + start, src_len - start); + break; + } + + if (error) + break; + + current++; + } + + if (error) { + free(key); + free(content); + b_trie_free(config); + blogc_parser_syntax_error("source", src, src_len, current); + return NULL; + } + + blogc_source_t *rv = malloc(sizeof(blogc_source_t)); + rv->config = config; + rv->content = content; + + return rv; +} + + +void +blogc_source_free(blogc_source_t *source) +{ + if (source == NULL) + return; + free(source->content); + b_trie_free(source->config); + free(source); +} diff --git a/src/source-grammar.h b/src/source-parser.h index 540c21b..6f41c14 100644 --- a/src/source-grammar.h +++ b/src/source-parser.h @@ -6,9 +6,10 @@ * See the file COPYING. */ -#ifndef _SOURCE_GRAMAR_H -#define _SOURCE_GRAMAR_H +#ifndef _SOURCE_PARSER_H +#define _SOURCE_PARSER_H +#include <stdlib.h> #include "utils/utils.h" typedef struct { @@ -16,7 +17,7 @@ typedef struct { char *content; } blogc_source_t; -blogc_source_t* blogc_source_parse(const char *tmpl); +blogc_source_t* blogc_source_parse(const char *src, size_t src_len); void blogc_source_free(blogc_source_t *source); -#endif /* _SOURCE_GRAMAR_H */ +#endif /* _SOURCE_PARSER_H */ diff --git a/src/utils/trie.c b/src/utils/trie.c index f447860..b92573f 100644 --- a/src/utils/trie.c +++ b/src/utils/trie.c @@ -38,6 +38,8 @@ b_trie_free_node(b_trie_t *trie, b_trie_node_t *node) void b_trie_free(b_trie_t *trie) { + if (trie == NULL) + return; b_trie_free_node(trie, trie->root); free(trie); } diff --git a/src/utils/utils.h b/src/utils/utils.h index 20259d8..55b9c59 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -10,6 +10,7 @@ #define _UTILS_UTILS_H #include <stdbool.h> +#include <stdlib.h> #include <stdarg.h> #define B_STRING_CHUNK_SIZE 128 diff --git a/tests/check_source_grammar.c b/tests/check_source_parser.c index b2f581b..2f5880a 100644 --- a/tests/check_source_grammar.c +++ b/tests/check_source_parser.c @@ -14,19 +14,21 @@ #include <stddef.h> #include <setjmp.h> #include <cmocka.h> -#include "../src/source-grammar.h" +#include <string.h> +#include "../src/source-parser.h" static void test_source_parse(void **state) { - blogc_source_t *source = blogc_source_parse( + const char *a = "VAR1: asd asd\n" "VAR2: 123chunda\n" "----------\n" "# This is a test\n" "\n" - "bola\n"); + "bola\n"; + blogc_source_t *source = blogc_source_parse(a, strlen(a)); assert_non_null(source); assert_int_equal(b_trie_size(source->config), 2); assert_string_equal(b_trie_lookup(source->config, "VAR1"), "asd asd"); @@ -42,7 +44,7 @@ test_source_parse(void **state) static void test_source_parse_with_spaces(void **state) { - blogc_source_t *source = blogc_source_parse( + const char *a = "\n \n" "VAR1: chunda \t \n" "\n\n" @@ -50,7 +52,8 @@ test_source_parse_with_spaces(void **state) "----------\n" "# This is a test\n" "\n" - "bola\n"); + "bola\n"; + blogc_source_t *source = blogc_source_parse(a, strlen(a)); assert_non_null(source); assert_int_equal(b_trie_size(source->config), 2); assert_string_equal(b_trie_lookup(source->config, "VAR1"), "chunda"); diff --git a/tests/check_template_grammar.c b/tests/check_template_grammar.c index ffad8fb..3b4dcca 100644 --- a/tests/check_template_grammar.c +++ b/tests/check_template_grammar.c @@ -88,11 +88,58 @@ test_template_parse(void **state) } +static void +test_template_parse_html(void **state) +{ + b_slist_t *stmts = blogc_template_parse( + "<html>\n" + " <head>\n" + " {% block single_source %}\n" + " <title>My cool blog >> {{ TITLE }}</title>\n" + " {% endblock %}\n" + " {% block multiple_sources %}\n" + " <title>My cool blog - Main page</title>\n" + " {% endblock %}\n" + " </head>\n" + " <body>\n" + " <h1>My cool blog</h1>\n" + " {% block single_source %}\n" + " <h2>{{ TITLE }}</h2>\n" + " {% if DATE %}<h4>Published in: {{ DATE }}</h4>{% endif %}\n" + " <pre>{{ CONTENT }}</pre>\n" + " {% endblock %}\n" + " {% block multiple_sources_once %}<ul>{% endblock %}\n" + " {% block multiple_sources %}<p><a href=\"{{ FILENAME }}.html\">" + "{{ TITLE }}</a>{% if DATE %} - {{ DATE }}{% endif %}</p>{% endblock %}\n" + " {% block multiple_sources_once %}</ul>{% endblock %}\n" + " </body>\n" + "</html>\n"); + assert_non_null(stmts); + blogc_assert_template_stmt(stmts, "<html>\n <head>\n ", + BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(stmts->next, "single_source", + BLOGC_TEMPLATE_BLOCK_STMT); + blogc_assert_template_stmt(stmts->next->next, + "\n <title>My cool blog >> ", BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(stmts->next->next->next, "TITLE", + BLOGC_TEMPLATE_VARIABLE_STMT); + blogc_assert_template_stmt(stmts->next->next->next->next, + "</title>\n ", BLOGC_TEMPLATE_CONTENT_STMT); + + + + + + blogc_template_free_stmts(stmts); +} + + int main(void) { const UnitTest tests[] = { unit_test(test_template_parse), + unit_test(test_template_parse_html), }; return run_tests(tests); } |