From 6c36460f20548850b0704a462dd8281c7f5868a7 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Mon, 4 May 2015 12:54:10 -0300 Subject: content-parser: implemented more block elements --- src/content-parser.c | 189 +++++++++++++++++++++++++++++++++++++++++-- src/error.c | 3 + src/error.h | 1 + tests/check_content_parser.c | 16 ++++ 4 files changed, 201 insertions(+), 8 deletions(-) diff --git a/src/content-parser.c b/src/content-parser.c index e083243..75c0f34 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -22,7 +22,6 @@ // expected. feel free to improve the parser and and new features. -// TODO: block elements: list, horizontal rule // TODO: inline elements: links, emphasis, code, images, line breaks // TODO: automatic scaping of html entities // TODO: automatic links @@ -42,6 +41,14 @@ typedef enum { CONTENT_CODE, CONTENT_CODE_START, CONTENT_CODE_END, + CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE, + CONTENT_HORIZONTAL_RULE, + CONTENT_UNORDERED_LIST_START, + CONTENT_UNORDERED_LIST_END, + CONTENT_ORDERED_LIST, + CONTENT_ORDERED_LIST_SPACE, + CONTENT_ORDERED_LIST_START, + CONTENT_ORDERED_LIST_END, CONTENT_PARAGRAPH, CONTENT_PARAGRAPH_END, } blogc_content_parser_state_t; @@ -59,7 +66,12 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) unsigned int header_level = 0; char *prefix = NULL; + size_t prefix_len = 0; char *tmp = NULL; + char *tmp2 = NULL; + char **tmpv = NULL; + + char d; b_slist_t *lines = NULL; @@ -82,6 +94,17 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) state = CONTENT_HEADER; break; } + if (c == '*' || c == '+' || c == '-') { + state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE; + start = current; + d = c; + break; + } + if (c >= '0' && c <= '9') { + state = CONTENT_ORDERED_LIST; + start = current; + break; + } if (c == ' ' || c == '\t') { state = CONTENT_CODE; start = current; @@ -110,7 +133,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) state = CONTENT_HEADER_TITLE_START; break; } - // error; + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed header, no space or tab after '#'"); break; case CONTENT_HEADER_TITLE_START: @@ -121,7 +145,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) state = CONTENT_HEADER_TITLE; break; } - // error; + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Empty header"); break; case CONTENT_HEADER_TITLE: @@ -138,7 +163,6 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) } break; - case CONTENT_HTML: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_HTML_END; @@ -172,13 +196,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) tmp = b_strndup(src + start, end - start); if (b_str_starts_with(tmp, prefix)) { lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); + state = CONTENT_BLOCKQUOTE_END; } else { - // error + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed blockquote, must use same prefix " + "as previous line(s): %s", prefix); } free(tmp); tmp = NULL; - state = CONTENT_BLOCKQUOTE_END; } if (!is_last) break; @@ -226,13 +252,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) tmp = b_strndup(src + start, end - start); if (b_str_starts_with(tmp, prefix)) { lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); + state = CONTENT_CODE_END; } else { - // error + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed code block, must use same prefix " + "as previous line(s): '%s'", prefix); } free(tmp); tmp = NULL; - state = CONTENT_CODE_END; } if (!is_last) break; @@ -260,6 +288,151 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) } break; + case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE: + if (c == d) { + state = CONTENT_HORIZONTAL_RULE; + break; + } + if (c == ' ' || c == '\t') + break; + prefix = b_strndup(src + start, current - start); + state = CONTENT_UNORDERED_LIST_START; + break; + + case CONTENT_HORIZONTAL_RULE: + if (c == d) { + break; + } + if (c == '\n' || c == '\r' || is_last) { + b_string_append(rv, "
\n"); + state = CONTENT_START_LINE; + start = current; + d = '\0'; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed horizontal rule, must use only '%c'", d); + break; + + case CONTENT_UNORDERED_LIST_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last ? src_len : current; + tmp = b_strndup(src + start, end - start); + if (b_str_starts_with(tmp, prefix)) { + lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); + } + else { + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed unordered list, must use same prefix " + "as previous line(s): %s", prefix); + } + free(tmp); + tmp = NULL; + state = CONTENT_UNORDERED_LIST_END; + } + if (!is_last) + break; + + case CONTENT_UNORDERED_LIST_END: + if (c == '\n' || c == '\r' || is_last) { + b_string_append(rv, "\n"); + b_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start = current; + } + else { + start = current; + state = CONTENT_UNORDERED_LIST_START; + } + break; + + case CONTENT_ORDERED_LIST: + if (c >= '0' && c <= '9') + break; + if (c == '.') { + state = CONTENT_ORDERED_LIST_SPACE; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_ORDERED_LIST_SPACE: + if (c == ' ' || c == '\t') + break; + prefix_len = current - start; + state = CONTENT_ORDERED_LIST_START; + + case CONTENT_ORDERED_LIST_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last ? src_len : current; + tmp = b_strndup(src + start, end - start); + if (strlen(tmp) >= prefix_len) { + tmp2 = b_strndup(tmp, prefix_len); + tmpv = b_str_split(tmp2, '.', 2); + free(tmp2); + tmp2 = NULL; + if (b_strv_length(tmpv) != 2) { + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed ordered list, prefix must be a " + "number, followed by a '.', followed by the content. " + "Content must be aligned with content from previous line(s)"); + goto err_li; + } + for (unsigned int i = 0; tmpv[0][i] != '\0'; i++) { + if (!(tmpv[0][i] >= '0' && tmpv[0][i] <= '9')) { + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed ordered list, prefix must be a " + "number, followed by a '.', followed by the content. " + "Content must be aligned with content from previous line(s)"); + goto err_li; + } + } + for (unsigned int i = 0; tmpv[1][i] != '\0'; i++) { + if (!(tmpv[1][i] == ' ' || tmpv[1][i] == '\t')) { + *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, + current, "Malformed ordered list, prefix must be a " + "number, followed by a '.', followed by the content. " + "Content must be aligned with content from previous line(s)"); + goto err_li; + } + } + lines = b_slist_append(lines, b_strdup(tmp + prefix_len)); + state = CONTENT_ORDERED_LIST_END; +err_li: + b_strv_free(tmpv); + tmpv = NULL; + } + free(tmp); + tmp = NULL; + } + if (!is_last) + break; + + case CONTENT_ORDERED_LIST_END: + if (c == '\n' || c == '\r' || is_last) { + b_string_append(rv, "
    \n"); + for (b_slist_t *l = lines; l != NULL; l = l->next) + b_string_append_printf(rv, "
  1. %s
  2. \n", l->data); + b_string_append(rv, "
\n"); + b_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start = current; + } + else { + start = current; + state = CONTENT_ORDERED_LIST_START; + } + break; + case CONTENT_PARAGRAPH: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_PARAGRAPH_END; diff --git a/src/error.c b/src/error.c index 3850304..68c7b00 100644 --- a/src/error.c +++ b/src/error.c @@ -96,6 +96,9 @@ blogc_error_print(blogc_error_t *err) case BLOGC_ERROR_LOADER: tmp = b_strdup("Loader error"); break; + case BLOGC_ERROR_CONTENT_PARSER: + tmp = b_strdup("Content parser error"); + break; default: tmp = b_strdup("Unknown error"); } diff --git a/src/error.h b/src/error.h index 98aeeb9..b4960e2 100644 --- a/src/error.h +++ b/src/error.h @@ -17,6 +17,7 @@ typedef enum { BLOGC_ERROR_SOURCE_PARSER = 1, BLOGC_ERROR_TEMPLATE_PARSER, BLOGC_ERROR_LOADER, + BLOGC_ERROR_CONTENT_PARSER, } blogc_error_type_t; typedef struct { diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index 86ba8d8..df14df3 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -44,6 +44,13 @@ test_content_parse(void **state) " asd\n" " qwewer\n" "\n" + "+++\n" + "1. chunda\n" + "3. fuuuu\n" + "\n" + "+ chunda2\n" + "+ fuuuu2\n" + "\n" "\n" @@ -71,6 +78,15 @@ test_content_parse(void **state) "
bola\n"
         " asd\n"
         "qwewer
\n" + "
\n" + "
    \n" + "
  1. chunda
  2. \n" + "
  3. fuuuu
  4. \n" + "
\n" + "\n" "\n" -- cgit v1.2.3-18-g5258