From f7aa4a3269a21f4d0c83f11a0aef4ccf821ce6e2 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Thu, 14 Jan 2016 03:50:42 +0100 Subject: template-parser: added whitespace cleaners. needs more tests and docs --- src/template-parser.c | 99 +++++++++++++++++++++++++++++++++++++------ src/template-parser.h | 5 +++ src/utils/strings.c | 45 ++++++++++++++++---- src/utils/utils.h | 2 + tests/check_template_parser.c | 48 +++++++++++++++------ tests/check_utils.c | 55 ++++++++++++++++++++++++ 6 files changed, 218 insertions(+), 36 deletions(-) diff --git a/src/template-parser.c b/src/template-parser.c index 0783254..1d9046e 100644 --- a/src/template-parser.c +++ b/src/template-parser.c @@ -22,6 +22,7 @@ typedef enum { TEMPLATE_START = 1, TEMPLATE_OPEN_BRACKET, TEMPLATE_BLOCK_START, + TEMPLATE_BLOCK_START_WHITESPACE_CLEANER, TEMPLATE_BLOCK_TYPE, TEMPLATE_BLOCK_BLOCK_TYPE_START, TEMPLATE_BLOCK_BLOCK_TYPE, @@ -34,6 +35,7 @@ typedef enum { TEMPLATE_BLOCK_IF_VARIABLE_OPERAND, TEMPLATE_BLOCK_FOREACH_START, TEMPLATE_BLOCK_FOREACH_VARIABLE, + TEMPLATE_BLOCK_END_WHITESPACE_CLEANER, TEMPLATE_BLOCK_END, TEMPLATE_VARIABLE_START, TEMPLATE_VARIABLE, @@ -72,6 +74,20 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) b_slist_t *stmts = NULL; blogc_template_stmt_t *stmt = NULL; + /* + * this is a reference to the content of previous node in the singly-linked + * list. The "correct" solution here would be implement a doubly-linked + * list, but here are a few reasons to avoid it: + * + * - i'm too tired to implement it :P + * - template parser never walk backwards, then the list itself does not + * need to know its previous node. + */ + blogc_template_stmt_t *previous = NULL; + + bool lstrip_next = false; + char *tmp = NULL; + blogc_template_parser_state_t state = TEMPLATE_START; blogc_template_parser_block_state_t block_state = BLOCK_CLOSED; blogc_template_stmt_type_t type = BLOGC_TEMPLATE_CONTENT_STMT; @@ -86,10 +102,20 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) if (last) { stmt = b_malloc(sizeof(blogc_template_stmt_t)); stmt->type = type; - stmt->value = b_strndup(src + start, src_len - start); + if (lstrip_next) { + tmp = b_strndup(src + start, src_len - start); + stmt->value = b_strdup(b_str_lstrip(tmp)); + free(tmp); + tmp = NULL; + lstrip_next = false; + } + else { + stmt->value = b_strndup(src + start, src_len - start); + } stmt->op = 0; stmt->value2 = NULL; stmts = b_slist_append(stmts, stmt); + previous = stmt; stmt = NULL; } if (c == '{') { @@ -101,16 +127,26 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) case TEMPLATE_OPEN_BRACKET: if (c == '%' || c == '{') { if (c == '%') - state = TEMPLATE_BLOCK_START; + state = TEMPLATE_BLOCK_START_WHITESPACE_CLEANER; else state = TEMPLATE_VARIABLE_START; if (end > start) { stmt = b_malloc(sizeof(blogc_template_stmt_t)); stmt->type = type; - stmt->value = b_strndup(src + start, end - start); + if (lstrip_next) { + tmp = b_strndup(src + start, end - start); + stmt->value = b_strdup(b_str_lstrip(tmp)); + free(tmp); + tmp = NULL; + lstrip_next = false; + } + else { + stmt->value = b_strndup(src + start, end - start); + } stmt->op = 0; stmt->value2 = NULL; stmts = b_slist_append(stmts, stmt); + previous = stmt; stmt = NULL; } break; @@ -118,6 +154,18 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) state = TEMPLATE_START; break; + case TEMPLATE_BLOCK_START_WHITESPACE_CLEANER: + if (c == '-') { + if ((previous != NULL) && + (previous->type == BLOGC_TEMPLATE_CONTENT_STMT)) + { + previous->value = b_str_rstrip(previous->value); // does not need copy + } + state = TEMPLATE_BLOCK_START; + break; + } + state = TEMPLATE_BLOCK_START; + case TEMPLATE_BLOCK_START: if (c == ' ') break; @@ -126,6 +174,13 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) start = current; break; } + if (c == '-') { + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Duplicated whitespace " + "cleaner before statement."); + break; + } *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, src_len, current, "Invalid statement syntax. Must begin with lowercase letter."); @@ -152,7 +207,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) (0 == strncmp("endblock", src + start, 8))) { if (block_state != BLOCK_CLOSED) { - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; type = BLOGC_TEMPLATE_ENDBLOCK_STMT; block_state = BLOCK_CLOSED; break; @@ -193,7 +248,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) (0 == strncmp("endif", src + start, 5))) { if (if_count > 0) { - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; type = BLOGC_TEMPLATE_ENDIF_STMT; if_count--; break; @@ -223,7 +278,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) (0 == strncmp("endforeach", src + start, 10))) { if (foreach_open) { - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; type = BLOGC_TEMPLATE_ENDFOREACH_STMT; foreach_open = false; break; @@ -264,7 +319,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) { block_state = BLOCK_ENTRY; end = current; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; break; } else if ((current - start == 7) && @@ -272,7 +327,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) { block_state = BLOCK_LISTING; end = current; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; break; } else if ((current - start == 12) && @@ -280,7 +335,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) { block_state = BLOCK_LISTING_ONCE; end = current; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; break; } } @@ -311,7 +366,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) if (type == BLOGC_TEMPLATE_IF_STMT) state = TEMPLATE_BLOCK_IF_OPERATOR_START; else - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; break; } *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, @@ -361,14 +416,14 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) break; if (c == '"' && src[current - 1] == '\\') break; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; end2 = current + 1; break; case TEMPLATE_BLOCK_IF_VARIABLE_OPERAND: if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') break; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; end2 = current; break; @@ -391,7 +446,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) break; if (c == ' ') { end = current; - state = TEMPLATE_BLOCK_END; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; break; } *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, @@ -400,13 +455,28 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) "number or '_'."); break; - case TEMPLATE_BLOCK_END: + case TEMPLATE_BLOCK_END_WHITESPACE_CLEANER: if (c == ' ') break; + if (c == '-') { + lstrip_next = true; + state = TEMPLATE_BLOCK_END; + break; + } + state = TEMPLATE_BLOCK_END; + + case TEMPLATE_BLOCK_END: if (c == '%') { state = TEMPLATE_CLOSE_BRACKET; break; } + if (c == '-') { + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Duplicated whitespace " + "cleaner after statement."); + break; + } *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, src_len, current, "Invalid statement syntax. Must end with '%%}'."); @@ -502,6 +572,7 @@ blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) end2 = 0; } stmts = b_slist_append(stmts, stmt); + previous = stmt; stmt = NULL; state = TEMPLATE_START; type = BLOGC_TEMPLATE_CONTENT_STMT; diff --git a/src/template-parser.h b/src/template-parser.h index 5add574..6cd2c80 100644 --- a/src/template-parser.h +++ b/src/template-parser.h @@ -12,6 +12,11 @@ #include "utils/utils.h" #include "error.h" +/* + * note: whitespace cleaners are NOT added to ast. we fix strings right during + * template parsing. renderer does not need to care about it, for the sake of + * simplicity. + */ typedef enum { BLOGC_TEMPLATE_IFDEF_STMT = 1, BLOGC_TEMPLATE_IFNDEF_STMT, diff --git a/src/utils/strings.c b/src/utils/strings.c index 6f10d56..846ae95 100644 --- a/src/utils/strings.c +++ b/src/utils/strings.c @@ -103,22 +103,44 @@ b_str_ends_with(const char *str, const char *suffix) char* -b_str_strip(char *str) +b_str_lstrip(char *str) { if (str == NULL) - return str; + return NULL; + int i; + size_t str_len = strlen(str); + for (i = 0; i < str_len; i++) { + if ((str[i] != ' ') && (str[i] != '\t') && (str[i] != '\n') && + (str[i] != '\r') && (str[i] != '\t')) + { + str += i; + break; + } + if (i == str_len - 1) { + str += str_len; + break; + } + } + return str; +} + + +char* +b_str_rstrip(char *str) +{ + if (str == NULL) + return NULL; int i; size_t str_len = strlen(str); for (i = str_len - 1; i >= 0; i--) { - if (!isspace(str[i])) { + if ((str[i] != ' ') && (str[i] != '\t') && (str[i] != '\n') && + (str[i] != '\r') && (str[i] != '\t')) + { str[i + 1] = '\0'; break; } - } - str_len = strlen(str); - for (i = 0; i < str_len; i++) { - if (!isspace(str[i])) { - str = str + i; + if (i == 0) { + str[0] = '\0'; break; } } @@ -126,6 +148,13 @@ b_str_strip(char *str) } +char* +b_str_strip(char *str) +{ + return b_str_lstrip(b_str_rstrip(str)); +} + + char** b_str_split(const char *str, char c, unsigned int max_pieces) { diff --git a/src/utils/utils.h b/src/utils/utils.h index 49a7735..dc67497 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -50,6 +50,8 @@ char* b_strdup_vprintf(const char *format, va_list ap); char* b_strdup_printf(const char *format, ...); bool b_str_starts_with(const char *str, const char *prefix); bool b_str_ends_with(const char *str, const char *suffix); +char* b_str_lstrip(char *str); +char* b_str_rstrip(char *str); char* b_str_strip(char *str); char** b_str_split(const char *str, char c, unsigned int max_pieces); char* b_str_replace(const char *str, const char search, const char *replace); diff --git a/tests/check_template_parser.c b/tests/check_template_parser.c index f9fd71a..f655896 100644 --- a/tests/check_template_parser.c +++ b/tests/check_template_parser.c @@ -51,27 +51,27 @@ test_template_parse(void **state) const char *a = "Test\n" "\n" - " {% block entry %}\n" + " {%- block entry -%}\n" "{% ifdef CHUNDA %}\n" "bola\n" "{% endif %}\n" "{% ifndef BOLA %}\n" "bolao\n" - "{% endif %}\n" + "{%- endif %}\n" "{% endblock %}\n" "{% block listing %}{{ BOLA }}{% endblock %}\n" "{% block listing_once %}asd{% endblock %}\n" - "{% foreach BOLA %}hahaha{% endforeach %}\n" + "{%- foreach BOLA %}hahaha{% endforeach %}\n" "{% if BOLA == \"1\\\"0\" %}aee{% endif %}"; blogc_error_t *err = NULL; b_slist_t *stmts = blogc_template_parse(a, strlen(a), &err); assert_null(err); assert_non_null(stmts); - blogc_assert_template_stmt(stmts, "Test\n\n ", + blogc_assert_template_stmt(stmts, "Test", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(stmts->next, "entry", BLOGC_TEMPLATE_BLOCK_STMT); - blogc_assert_template_stmt(stmts->next->next, "\n", + blogc_assert_template_stmt(stmts->next->next, "", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(stmts->next->next->next, "CHUNDA", BLOGC_TEMPLATE_IFDEF_STMT); @@ -83,7 +83,7 @@ test_template_parse(void **state) BLOGC_TEMPLATE_CONTENT_STMT); b_slist_t *tmp = stmts->next->next->next->next->next->next->next; blogc_assert_template_stmt(tmp, "BOLA", BLOGC_TEMPLATE_IFNDEF_STMT); - blogc_assert_template_stmt(tmp->next, "\nbolao\n", BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(tmp->next, "\nbolao", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(tmp->next->next, NULL, BLOGC_TEMPLATE_ENDIF_STMT); blogc_assert_template_stmt(tmp->next->next->next, "\n", BLOGC_TEMPLATE_CONTENT_STMT); @@ -105,7 +105,7 @@ test_template_parse(void **state) blogc_assert_template_stmt(tmp->next->next->next->next->next->next->next->next, NULL, BLOGC_TEMPLATE_ENDBLOCK_STMT); blogc_assert_template_stmt(tmp->next->next->next->next->next->next->next->next->next, - "\n", BLOGC_TEMPLATE_CONTENT_STMT); + "", BLOGC_TEMPLATE_CONTENT_STMT); tmp = tmp->next->next->next->next->next->next->next->next->next->next; blogc_assert_template_stmt(tmp, "BOLA", BLOGC_TEMPLATE_FOREACH_STMT); blogc_assert_template_stmt(tmp->next, "hahaha", @@ -131,27 +131,27 @@ test_template_parse_crlf(void **state) const char *a = "Test\r\n" "\r\n" - " {% block entry %}\r\n" + " {%- block entry -%}\r\n" "{% ifdef CHUNDA %}\r\n" "bola\r\n" "{% endif %}\r\n" "{% ifndef BOLA %}\r\n" "bolao\r\n" - "{% endif %}\r\n" + "{%- endif %}\r\n" "{% endblock %}\r\n" "{% block listing %}{{ BOLA }}{% endblock %}\r\n" "{% block listing_once %}asd{% endblock %}\r\n" - "{% foreach BOLA %}hahaha{% endforeach %}\r\n" + "{%- foreach BOLA %}hahaha{% endforeach %}\r\n" "{% if BOLA == \"1\\\"0\" %}aee{% endif %}"; blogc_error_t *err = NULL; b_slist_t *stmts = blogc_template_parse(a, strlen(a), &err); assert_null(err); assert_non_null(stmts); - blogc_assert_template_stmt(stmts, "Test\r\n\r\n ", + blogc_assert_template_stmt(stmts, "Test", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(stmts->next, "entry", BLOGC_TEMPLATE_BLOCK_STMT); - blogc_assert_template_stmt(stmts->next->next, "\r\n", + blogc_assert_template_stmt(stmts->next->next, "", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(stmts->next->next->next, "CHUNDA", BLOGC_TEMPLATE_IFDEF_STMT); @@ -163,7 +163,7 @@ test_template_parse_crlf(void **state) BLOGC_TEMPLATE_CONTENT_STMT); b_slist_t *tmp = stmts->next->next->next->next->next->next->next; blogc_assert_template_stmt(tmp, "BOLA", BLOGC_TEMPLATE_IFNDEF_STMT); - blogc_assert_template_stmt(tmp->next, "\r\nbolao\r\n", BLOGC_TEMPLATE_CONTENT_STMT); + blogc_assert_template_stmt(tmp->next, "\r\nbolao", BLOGC_TEMPLATE_CONTENT_STMT); blogc_assert_template_stmt(tmp->next->next, NULL, BLOGC_TEMPLATE_ENDIF_STMT); blogc_assert_template_stmt(tmp->next->next->next, "\r\n", BLOGC_TEMPLATE_CONTENT_STMT); @@ -185,7 +185,7 @@ test_template_parse_crlf(void **state) blogc_assert_template_stmt(tmp->next->next->next->next->next->next->next->next, NULL, BLOGC_TEMPLATE_ENDBLOCK_STMT); blogc_assert_template_stmt(tmp->next->next->next->next->next->next->next->next->next, - "\r\n", BLOGC_TEMPLATE_CONTENT_STMT); + "", BLOGC_TEMPLATE_CONTENT_STMT); tmp = tmp->next->next->next->next->next->next->next->next->next->next; blogc_assert_template_stmt(tmp, "BOLA", BLOGC_TEMPLATE_FOREACH_STMT); blogc_assert_template_stmt(tmp->next, "hahaha", @@ -381,6 +381,26 @@ test_template_parse_invalid_block_start(void **state) "Invalid statement syntax. Must begin with lowercase letter.\n" "Error occurred near line 1, position 4: {% ASD %}"); blogc_error_free(err); + a = "{%-- block entry %}\n"; + err = NULL; + stmts = blogc_template_parse(a, strlen(a), &err); + assert_non_null(err); + assert_null(stmts); + assert_int_equal(err->type, BLOGC_ERROR_TEMPLATE_PARSER); + assert_string_equal(err->msg, + "Invalid statement syntax. Duplicated whitespace cleaner before statement.\n" + "Error occurred near line 1, position 4: {%-- block entry %}"); + blogc_error_free(err); + a = "{% block entry --%}\n"; + err = NULL; + stmts = blogc_template_parse(a, strlen(a), &err); + assert_non_null(err); + assert_null(stmts); + assert_int_equal(err->type, BLOGC_ERROR_TEMPLATE_PARSER); + assert_string_equal(err->msg, + "Invalid statement syntax. Duplicated whitespace cleaner after statement.\n" + "Error occurred near line 1, position 17: {% block entry --%}"); + blogc_error_free(err); } diff --git a/tests/check_utils.c b/tests/check_utils.c index cb24625..a511dda 100644 --- a/tests/check_utils.c +++ b/tests/check_utils.c @@ -128,6 +128,50 @@ test_str_ends_with(void **state) } +static void +test_str_lstrip(void **state) +{ + char *str = b_strdup(" \tbola\n \t"); + assert_string_equal(b_str_lstrip(str), "bola\n \t"); + free(str); + str = b_strdup("guda"); + assert_string_equal(b_str_lstrip(str), "guda"); + free(str); + str = b_strdup("\n"); + assert_string_equal(b_str_lstrip(str), ""); + free(str); + str = b_strdup("\t \n"); + assert_string_equal(b_str_lstrip(str), ""); + free(str); + str = b_strdup(""); + assert_string_equal(b_str_lstrip(str), ""); + free(str); + assert_null(b_str_lstrip(NULL)); +} + + +static void +test_str_rstrip(void **state) +{ + char *str = b_strdup(" \tbola\n \t"); + assert_string_equal(b_str_rstrip(str), " \tbola"); + free(str); + str = b_strdup("guda"); + assert_string_equal(b_str_rstrip(str), "guda"); + free(str); + str = b_strdup("\n"); + assert_string_equal(b_str_rstrip(str), ""); + free(str); + str = b_strdup("\t \n"); + assert_string_equal(b_str_rstrip(str), ""); + free(str); + str = b_strdup(""); + assert_string_equal(b_str_rstrip(str), ""); + free(str); + assert_null(b_str_rstrip(NULL)); +} + + static void test_str_strip(void **state) { @@ -137,6 +181,15 @@ test_str_strip(void **state) str = b_strdup("guda"); assert_string_equal(b_str_strip(str), "guda"); free(str); + str = b_strdup("\n"); + assert_string_equal(b_str_strip(str), ""); + free(str); + str = b_strdup("\t \n"); + assert_string_equal(b_str_strip(str), ""); + free(str); + str = b_strdup(""); + assert_string_equal(b_str_strip(str), ""); + free(str); assert_null(b_str_strip(NULL)); } @@ -799,6 +852,8 @@ main(void) unit_test(test_strdup_printf), unit_test(test_str_starts_with), unit_test(test_str_ends_with), + unit_test(test_str_lstrip), + unit_test(test_str_rstrip), unit_test(test_str_strip), unit_test(test_str_split), unit_test(test_str_replace), -- cgit v1.2.3-18-g5258