diff options
-rw-r--r-- | src/content-parser.c | 170 | ||||
-rw-r--r-- | src/content-parser.h | 2 | ||||
-rw-r--r-- | tests/check_content_parser.c | 96 |
3 files changed, 217 insertions, 51 deletions
diff --git a/src/content-parser.c b/src/content-parser.c index e5bb16c..9052aec 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -17,7 +17,7 @@ #include "content-parser.h" // this is a half ass implementation of a markdown-like syntax. bugs are -// expected. feel free to improve the parser and and new features. +// expected. feel free to improve the parser and add new features. typedef enum { @@ -356,6 +356,33 @@ blogc_content_parse_inline(const char *src) } +bool +blogc_is_ordered_list_item(const char *str, size_t prefix_len) +{ + if (str == NULL) + return false; + + if (strlen(str) < 2) + return false; + + size_t i; + + for (i = 0; str[i] >= '0' && str[i] <= '9'; i++); + + if (i == 0) + return false; + if (str[i] != '.') + return false; + + for (i++; i < prefix_len && (str[i] == ' ' || str[i] == '\t'); i++); + + if (str[i] == '\0') + return false; + + return i == prefix_len; +} + + char* blogc_content_parse(const char *src, size_t *end_excerpt) { @@ -373,13 +400,12 @@ blogc_content_parse(const char *src, size_t *end_excerpt) size_t prefix_len = 0; char *tmp = NULL; char *tmp2 = NULL; - char *tmp3 = NULL; char *parsed = NULL; - char **tmpv = NULL; char d = '\0'; b_slist_t *lines = NULL; + b_slist_t *lines2 = NULL; b_string_t *rv = b_string_new(); b_string_t *tmp_str = NULL; @@ -664,22 +690,39 @@ hr: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; tmp = b_strndup(src + start2, end - start2); + tmp2 = b_strdup_printf("%-*s", strlen(prefix), ""); if (b_str_starts_with(tmp, prefix)) { - tmp3 = b_strdup(tmp + strlen(prefix)); - parsed = blogc_content_parse_inline(tmp3); - free(tmp3); - tmp3 = NULL; - lines = b_slist_append(lines, b_strdup(parsed)); - free(parsed); - parsed = NULL; + if (lines2 != NULL) { + tmp_str = b_string_new(); + for (b_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(tmp_str, "%s", l->data); + else + b_string_append_printf(tmp_str, "%s\n", l->data); + } + b_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + b_string_free(tmp_str, true); + lines = b_slist_append(lines, b_strdup(parsed)); + free(parsed); + parsed = NULL; + } + lines2 = b_slist_append(lines2, b_strdup(tmp + strlen(prefix))); + } + else if (b_str_starts_with(tmp, tmp2)) { + lines2 = b_slist_append(lines2, b_strdup(tmp + strlen(prefix))); } else { state = CONTENT_PARAGRAPH_END; free(tmp); tmp = NULL; + free(tmp2); + tmp2 = NULL; free(prefix); prefix = NULL; b_slist_free_full(lines, free); + b_slist_free_full(lines2, free); lines = NULL; if (is_last) goto para; @@ -687,6 +730,8 @@ hr: } free(tmp); tmp = NULL; + free(tmp2); + tmp2 = NULL; state = CONTENT_UNORDERED_LIST_END; } if (!is_last) @@ -694,6 +739,23 @@ hr: case CONTENT_UNORDERED_LIST_END: if (c == '\n' || c == '\r' || is_last) { + if (lines2 != NULL) { + // FIXME: avoid repeting the code below + tmp_str = b_string_new(); + for (b_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(tmp_str, "%s", l->data); + else + b_string_append_printf(tmp_str, "%s\n", l->data); + } + b_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + b_string_free(tmp_str, true); + lines = b_slist_append(lines, b_strdup(parsed)); + free(parsed); + parsed = NULL; + } b_string_append(rv, "<ul>\n"); for (b_slist_t *l = lines; l != NULL; l = l->next) b_string_append_printf(rv, "<li>%s</li>\n", l->data); @@ -733,52 +795,72 @@ hr: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; tmp = b_strndup(src + start2, end - start2); - if (strlen(tmp) >= prefix_len) { - tmp2 = b_strndup(tmp, prefix_len); - tmpv = b_str_split(tmp2, '.', 2); - free(tmp2); - tmp2 = NULL; - if (b_strv_length(tmpv) != 2) { - state = CONTENT_PARAGRAPH_END; - b_strv_free(tmpv); - tmpv = NULL; - free(tmp); - tmp = NULL; - b_slist_free_full(lines, free); - lines = NULL; - goto para; - } - for (unsigned int i = 0; tmpv[0][i] != '\0'; i++) { - if (!(tmpv[0][i] >= '0' && tmpv[0][i] <= '9')) { - state = CONTENT_PARAGRAPH_END; - b_strv_free(tmpv); - tmpv = NULL; - free(tmp); - tmp = NULL; - b_slist_free_full(lines, free); - lines = NULL; - goto para; + tmp2 = b_strdup_printf("%-*s", prefix_len, ""); + if (blogc_is_ordered_list_item(tmp, prefix_len)) { + if (lines2 != NULL) { + tmp_str = b_string_new(); + for (b_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(tmp_str, "%s", l->data); + else + b_string_append_printf(tmp_str, "%s\n", l->data); } + b_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + b_string_free(tmp_str, true); + lines = b_slist_append(lines, b_strdup(parsed)); + free(parsed); + parsed = NULL; } - tmp3 = b_strdup(tmp + prefix_len); - parsed = blogc_content_parse_inline(tmp3); - free(tmp3); - tmp3 = NULL; - lines = b_slist_append(lines, b_strdup(parsed)); - state = CONTENT_ORDERED_LIST_END; + lines2 = b_slist_append(lines2, b_strdup(tmp + prefix_len)); + } + else if (b_str_starts_with(tmp, tmp2)) { + lines2 = b_slist_append(lines2, b_strdup(tmp + prefix_len)); + } + else { + state = CONTENT_PARAGRAPH_END; + free(tmp); + tmp = NULL; + free(tmp2); + tmp2 = NULL; free(parsed); parsed = NULL; - b_strv_free(tmpv); - tmpv = NULL; + b_slist_free_full(lines, free); + b_slist_free_full(lines2, free); + lines = NULL; + if (is_last) + goto para; + break; } free(tmp); tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_ORDERED_LIST_END; } - if (state == CONTENT_PARAGRAPH || !is_last) + if (!is_last) break; case CONTENT_ORDERED_LIST_END: if (c == '\n' || c == '\r' || is_last) { + if (lines2 != NULL) { + // FIXME: avoid repeting the code below + tmp_str = b_string_new(); + for (b_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(tmp_str, "%s", l->data); + else + b_string_append_printf(tmp_str, "%s\n", l->data); + } + b_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + b_string_free(tmp_str, true); + lines = b_slist_append(lines, b_strdup(parsed)); + free(parsed); + parsed = NULL; + } b_string_append(rv, "<ol>\n"); for (b_slist_t *l = lines; l != NULL; l = l->next) b_string_append_printf(rv, "<li>%s</li>\n", l->data); diff --git a/src/content-parser.h b/src/content-parser.h index b98769d..4579719 100644 --- a/src/content-parser.h +++ b/src/content-parser.h @@ -10,8 +10,10 @@ #define _CONTENT_PARSER_H #include <stdlib.h> +#include <stdbool.h> char* blogc_content_parse_inline(const char *src); +bool blogc_is_ordered_list_item(const char *str, size_t prefix_len); char* blogc_content_parse(const char *src, size_t *end_excerpt); #endif /* _CONTENT_PARSER_H */ diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index 1eb2aa5..b9f83b0 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -20,6 +20,24 @@ static void +test_is_ordered_list_item(void **state) +{ + assert_true(blogc_is_ordered_list_item("1.bola", 2)); + assert_true(blogc_is_ordered_list_item("1. bola", 3)); + assert_true(blogc_is_ordered_list_item("12. bola", 4)); + assert_true(blogc_is_ordered_list_item("123. bola", 5)); + assert_true(blogc_is_ordered_list_item("1. bola", 6)); + assert_true(blogc_is_ordered_list_item("1. bola", 5)); + assert_false(blogc_is_ordered_list_item("1bola", 1)); + assert_false(blogc_is_ordered_list_item("12bola", 2)); + assert_false(blogc_is_ordered_list_item("1 . bola", 6)); + assert_false(blogc_is_ordered_list_item("1. bola", 6)); + assert_false(blogc_is_ordered_list_item("1.", 2)); + assert_false(blogc_is_ordered_list_item(NULL, 2)); +} + + +static void test_content_parse(void **state) { size_t l = 0; @@ -368,6 +386,40 @@ test_content_parse_unordered_list(void **state) "</ul>\n" "<p>fuuuu</p>\n"); free(html); + html = blogc_content_parse( + "lol\n" + "\n" + "* asd\n" + " cvb\n" + "* qwe\n" + "* zxc\n" + " 1234\n" + "\n" + "fuuuu\n", NULL); + assert_non_null(html); + assert_string_equal(html, + "<p>lol</p>\n" + "<ul>\n" + "<li>asd\n" + "cvb</li>\n" + "<li>qwe</li>\n" + "<li>zxc\n" + "1234</li>\n" + "</ul>\n" + "<p>fuuuu</p>\n"); + free(html); + html = blogc_content_parse( + "* asd\n" + "* qwe\n" + "* zxc", NULL); + assert_non_null(html); + assert_string_equal(html, + "<ul>\n" + "<li>asd</li>\n" + "<li> qwe</li>\n" + "<li> zxc</li>\n" + "</ul>\n"); + free(html); } @@ -423,21 +475,37 @@ test_content_parse_ordered_list(void **state) "<p>fuuuu</p>\n"); free(html); html = blogc_content_parse( - "1.\nasd\n" - "2. qwe\n", NULL); + "lol\n" + "\n" + "1. asd\n" + " cvb\n" + "2. qwe\n" + "3. zxc\n" + " 1234\n" + "\n" + "fuuuu\n", NULL); assert_non_null(html); assert_string_equal(html, - "<p>1.\n" - "asd</p>\n" + "<p>lol</p>\n" "<ol>\n" + "<li>asd\n" + "cvb</li>\n" "<li>qwe</li>\n" - "</ol>\n"); + "<li>zxc\n" + "1234</li>\n" + "</ol>\n" + "<p>fuuuu</p>\n"); free(html); - html = blogc_content_parse("1.\n", NULL); + html = blogc_content_parse( + "1. asd\n" + "2. qwe\n" + "3. zxc", NULL); assert_non_null(html); assert_string_equal(html, "<ol>\n" - "<li></li>\n" + "<li>asd</li>\n" + "<li> qwe</li>\n" + "<li> zxc</li>\n" "</ol>\n"); free(html); } @@ -719,6 +787,19 @@ test_content_parse_invalid_ordered_list(void **state) "<p>a. asd\n" "2. qwe</p>\n"); free(html); + html = blogc_content_parse( + "1.\nasd\n" + "2. qwe\n", NULL); + assert_non_null(html); + assert_string_equal(html, + "<p>1.\n" + "asd\n" + "2. qwe</p>\n"); + free(html); + html = blogc_content_parse("1.\n", NULL); + assert_non_null(html); + assert_string_equal(html, "<p>1.</p>\n"); + free(html); } @@ -1038,6 +1119,7 @@ int main(void) { const UnitTest tests[] = { + unit_test(test_is_ordered_list_item), unit_test(test_content_parse), unit_test(test_content_parse_with_excerpt), unit_test(test_content_parse_header), |