From 28b02d920aa9347f25bc28f5989919174d9f60a9 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Thu, 7 May 2015 01:37:26 -0300 Subject: content-parser: lots of random stuff --- src/content-parser.c | 197 +++++++++++++++++++++++-------------------- tests/check_content_parser.c | 10 ++- 2 files changed, 112 insertions(+), 95 deletions(-) diff --git a/src/content-parser.c b/src/content-parser.c index 23c8647..b8df868 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -55,11 +55,12 @@ typedef enum { char* blogc_content_parse_inline(const char *src) { - // this function is always called by blogc_content_parse, then its safe to - // assume that src is always nul-terminated. + // this function is always called by blogc_content_parse or by itself, + // then its safe to assume that src is always nul-terminated. size_t src_len = strlen(src); size_t current = 0; + size_t start = 0; b_string_t *rv = b_string_new(); @@ -69,20 +70,40 @@ blogc_content_parse_inline(const char *src) bool open_strong_und = false; bool open_code = false; bool open_code_double = false; - unsigned int link_state = 0; - unsigned int image_state = 0; - size_t link_start = 0; - size_t image_start = 0; + + unsigned int state = 0; + bool is_image = false; char *tmp = NULL; - char *title = NULL; - char *alt = NULL; + char *tmp2 = NULL; + + unsigned int open_bracket = 0; + + bool escape = false; while (current < src_len) { char c = src[current]; bool is_last = current == src_len - 1; + if (escape) { + if (state == 0) + b_string_append_c(rv, c); + current++; + escape = false; + continue; + } + switch (c) { + + case '\\': + if (open_code || open_code_double) { + b_string_append_c(rv, c); + break; + } + if (!escape) + escape = true; + break; + case '*': case '_': if (open_code || open_code_double) { @@ -94,14 +115,16 @@ blogc_content_parse_inline(const char *src) if ((c == '*' && open_strong_ast) || (c == '_' && open_strong_und)) { - b_string_append(rv, ""); + if (state == 0) + b_string_append(rv, ""); if (c == '*') open_strong_ast = false; else open_strong_und = false; } else { - b_string_append(rv, ""); + if (state == 0) + b_string_append(rv, ""); if (c == '*') open_strong_ast = true; else @@ -110,14 +133,16 @@ blogc_content_parse_inline(const char *src) } else { if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { - b_string_append(rv, ""); + if (state == 0) + b_string_append(rv, ""); if (c == '*') open_em_ast = false; else open_em_und = false; } else { - b_string_append(rv, ""); + if (state == 0) + b_string_append(rv, ""); if (c == '*') open_em_ast = true; else @@ -129,62 +154,48 @@ blogc_content_parse_inline(const char *src) case '`': if (!is_last && src[current + 1] == c) { current++; - if (open_code_double) - b_string_append(rv, ""); - else - b_string_append(rv, ""); + if (state == 0) { + if (open_code_double) + b_string_append(rv, ""); + else + b_string_append(rv, ""); + } open_code_double = !open_code_double; } else { - if (open_code) - b_string_append(rv, ""); - else - b_string_append(rv, ""); + if (state == 0) { + if (open_code) + b_string_append(rv, ""); + else + b_string_append(rv, ""); + } open_code = !open_code; } break; - case '[': + case '!': if (open_code || open_code_double) { b_string_append_c(rv, c); break; } - if (link_state == 0 && image_state == 0) { - tmp = strchr(src + current, ']'); - if (tmp != NULL) { - if (strlen(tmp) > 1 && tmp[1] == '(') { - tmp = strchr(tmp, ')'); - if (tmp != NULL) { // this is a link - link_start = current + 1; // its safe - link_state = 1; - break; - } - } - } - b_string_append_c(rv, c); - } + if (state == 0) + is_image = true; break; - case '!': + case '[': if (open_code || open_code_double) { b_string_append_c(rv, c); break; } - if (link_state == 0 && image_state == 0) { - if (!is_last && src[current + 1] == '[') { - tmp = strchr(src + current + 1, ']'); - if (tmp != NULL) { - if (strlen(tmp) > 1 && tmp[1] == '(') { - tmp = strchr(tmp, ')'); - if (tmp != NULL) { // this is an image - image_start = current + 2; // its safe - image_state = 1; - break; - } - } - } - } - b_string_append_c(rv, c); + if (state == 0) { + state = 1; + start = current + 1; + open_bracket = 0; + break; + } + if (state == 1) { + open_bracket++; + break; } break; @@ -193,17 +204,18 @@ blogc_content_parse_inline(const char *src) b_string_append_c(rv, c); break; } - if (link_state == 1) { - link_state = 2; - title = b_strndup(src + link_start, current - link_start); - break; - } - if (image_state == 1) { - image_state = 2; - alt = b_strndup(src + image_start, current - image_start); + if (state == 1) { + if (open_bracket-- == 0) { + state = 2; + tmp = b_strndup(src + start, current - start); + tmp2 = blogc_content_parse_inline(tmp); + free(tmp); + tmp = NULL; + } break; } - b_string_append_c(rv, c); + if (state == 0) + b_string_append_c(rv, c); break; case '(': @@ -211,17 +223,13 @@ blogc_content_parse_inline(const char *src) b_string_append_c(rv, c); break; } - if (link_state == 2) { - link_state = 3; - link_start = current + 1; // its safe + if (state == 2) { + state = 3; + start = current + 1; break; } - if (image_state == 2) { - image_state = 3; - image_start = current + 1; // its safe - break; - } - b_string_append_c(rv, c); + if (state == 0) + b_string_append_c(rv, c); break; case ')': @@ -229,55 +237,58 @@ blogc_content_parse_inline(const char *src) b_string_append_c(rv, c); break; } - if (link_state == 3) { - link_state = 0; - tmp = b_strndup(src + link_start, current - link_start); - b_string_append_printf(rv, "%s", tmp, title); - free(tmp); - tmp = NULL; - free(title); - title = NULL; - break; - } - if (image_state == 3) { - image_state = 0; - tmp = b_strndup(src + image_start, current - image_start); - b_string_append_printf(rv, "\"%s\"", tmp, alt); + if (state == 3) { + state = 0; + tmp = b_strndup(src + start, current - start); + if (is_image) { + b_string_append_printf(rv, "\"%s\"", tmp, tmp2); + } + else { + b_string_append_printf(rv, "%s", tmp, tmp2); + } free(tmp); tmp = NULL; - free(alt); - alt = NULL; + free(tmp2); + tmp2 = NULL; + is_image = false; break; } - b_string_append_c(rv, c); + if (state == 0) + b_string_append_c(rv, c); break; case '&': - b_string_append(rv, "&"); + if (state == 0) + b_string_append(rv, "&"); break; case '<': - b_string_append(rv, "<"); + if (state == 0) + b_string_append(rv, "<"); break; case '>': - b_string_append(rv, ">"); + if (state == 0) + b_string_append(rv, ">"); break; case '"': - b_string_append(rv, """); + if (state == 0) + b_string_append(rv, """); break; case '\'': - b_string_append(rv, "'"); + if (state == 0) + b_string_append(rv, "'"); break; case '/': - b_string_append(rv, "/"); + if (state == 0) + b_string_append(rv, "/"); break; default: - if (link_state == 0 && image_state == 0) + if (state == 0) b_string_append_c(rv, c); } diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index ab1bf49..6fec86c 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -325,8 +325,14 @@ test_content_parse_invalid_code(void **state) void test_content_parse_inline(void **state) { - char *html = blogc_content_parse_inline("**bola***asd* ``chunda``"); - assert_string_equal(html, "bolaasd chunda"); + char *html = blogc_content_parse_inline( + "**bola***asd* [![lol](http://google.com/lol.png) **lol** " + "\\[asd\\]\\(qwe\\)](http://google.com) ``chunda``"); + assert_string_equal(html, + "bolaasd " + " lol [asd](qwe) " + "chunda"); free(html); html = blogc_content_parse_inline("*bola*"); assert_string_equal(html, "bola"); -- cgit v1.2.3-18-g5258