From 903c8313bc1f8a1ce3d97fc944293d79ccdac76c Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Wed, 6 May 2015 02:21:07 -0300 Subject: content-parsed: started inline parser --- src/content-parser.c | 220 ++++++++++++++++++++++++++++++++++++++++++- src/content-parser.h | 1 + tests/check_content_parser.c | 20 +++- 3 files changed, 236 insertions(+), 5 deletions(-) diff --git a/src/content-parser.c b/src/content-parser.c index 8249322..29a20bb 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -54,6 +54,217 @@ typedef enum { } blogc_content_parser_state_t; +char* +blogc_content_parse_inline(const char *src) +{ + // this function is always called by blogc_content_parse, then its safe to + // assume that src is always nul-terminated. + size_t src_len = strlen(src); + + size_t current = 0; + + b_string_t *rv = b_string_new(); + + bool open_em_ast = false; + bool open_strong_ast = false; + bool open_em_und = false; + bool open_strong_und = false; + bool open_code = false; + bool open_code_double = false; + unsigned int link_state = 0; + unsigned int image_state = 0; + size_t link_start = 0; + size_t image_start = 0; + + char *tmp = NULL; + char *title = NULL; + + while (current < src_len) { + char c = src[current]; + bool is_last = current == src_len - 1; + + switch (c) { + case '*': + case '_': + if (!is_last && src[current + 1] == c) { + current++; + if ((c == '*' && open_strong_ast) || + (c == '_' && open_strong_und)) + { + b_string_append(rv, ""); + if (c == '*') + open_strong_ast = false; + else + open_strong_und = false; + } + else { + b_string_append(rv, ""); + if (c == '*') + open_strong_ast = true; + else + open_strong_und = true; + } + } + else { + if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { + b_string_append(rv, ""); + if (c == '*') + open_em_ast = false; + else + open_em_und = false; + } + else { + b_string_append(rv, ""); + if (c == '*') + open_em_ast = true; + else + open_em_und = true; + } + } + break; + + case '`': + if (!is_last && src[current + 1] == c) { + current++; + if (open_code_double) + b_string_append(rv, ""); + else + b_string_append(rv, ""); + open_code_double = !open_code_double; + } + else { + if (open_code) + b_string_append(rv, ""); + else + b_string_append(rv, ""); + open_code = !open_code; + } + break; + + case '[': + if (link_state == 0 && image_state == 0) { + tmp = strchr(src + current, ']'); + if (tmp != NULL) { + if (strlen(tmp) > 1 && tmp[1] == '(') { + tmp = strchr(tmp, ')'); + if (tmp != NULL) { // this is a link + link_start = current + 1; // its safe + link_state = 1; + break; + } + } + } + b_string_append_c(rv, c); + } + break; + + case '!': + if (link_state == 0 && image_state == 0) { + if (!is_last && src[current + 1] == '[') { + tmp = strchr(src + current + 1, ']'); + if (tmp != NULL) { + if (strlen(tmp) > 1 && tmp[1] == '(') { + tmp = strchr(tmp, ')'); + if (tmp != NULL) { // this is an image + image_start = current + 2; // its safe + image_state = 1; + break; + } + } + } + } + b_string_append_c(rv, c); + } + break; + + case ']': + if (link_state == 1) { + link_state = 2; + title = b_strndup(src + link_start, current - link_start); + break; + } + if (image_state == 1) { + image_state = 2; + title = b_strndup(src + image_start, current - image_start); + break; + } + b_string_append_c(rv, c); + break; + + case '(': + if (link_state == 2) { + link_state = 3; + link_start = current + 1; // its safe + break; + } + if (image_state == 2) { + image_state = 3; + image_start = current + 1; // its safe + break; + } + b_string_append_c(rv, c); + break; + + case ')': + if (link_state == 3) { + link_state = 0; + tmp = b_strndup(src + link_start, current - link_start); + b_string_append_printf(rv, "%s", tmp, title); + free(tmp); + tmp = NULL; + free(title); + title = NULL; + break; + } + if (image_state == 3) { + image_state = 0; + tmp = b_strndup(src + image_start, current - image_start); + b_string_append_printf(rv, "\"%s\"", tmp, title); + free(tmp); + tmp = NULL; + free(title); + title = NULL; + break; + } + b_string_append_c(rv, c); + break; + + case '&': + b_string_append(rv, "&"); + break; + + case '<': + b_string_append(rv, "<"); + break; + + case '>': + b_string_append(rv, ">"); + break; + + case '"': + b_string_append(rv, """); + break; + + case '\'': + b_string_append(rv, "'"); + break; + + case '/': + b_string_append(rv, "/"); + break; + + default: + if (link_state == 0 && image_state == 0) + b_string_append_c(rv, c); + } + + current++; + } + + return b_string_free(rv, false); +} + + char* blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) { @@ -69,6 +280,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) size_t prefix_len = 0; char *tmp = NULL; char *tmp2 = NULL; + char *parsed = NULL; char **tmpv = NULL; char d; @@ -150,8 +362,11 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; tmp = b_strndup(src + start, end - start); + parsed = blogc_content_parse_inline(tmp); b_string_append_printf(rv, "%s\n", header_level, - tmp, header_level); + parsed, header_level); + free(parsed); + parsed = NULL; free(tmp); tmp = NULL; state = CONTENT_START_LINE; @@ -316,8 +531,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) d = '\0'; break; } - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed horizontal rule, must use only '%c'", d); + state = CONTENT_PARAGRAPH; break; case CONTENT_UNORDERED_LIST_START: diff --git a/src/content-parser.h b/src/content-parser.h index 0a55fd9..db65332 100644 --- a/src/content-parser.h +++ b/src/content-parser.h @@ -12,6 +12,7 @@ #include #include "error.h" +char* blogc_content_parse_inline(const char *src); char* blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err); diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index b3e7092..a613052 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -56,7 +56,9 @@ test_content_parse(void **state) "\n" "\n" "guda\n" - "yay"; + "yay\n" + "\n" + "**bola**\n"; blogc_error_t *err = NULL; char *html = blogc_content_parse(a, strlen(a), &err); assert_null(err); @@ -91,7 +93,8 @@ test_content_parse(void **state) " chunda\n" "\n" "

guda\n" - "yay

\n"); + "yay

\n" + "

**bola**

\n"); free(html); } @@ -319,6 +322,18 @@ test_content_parse_invalid_code(void **state) } +void +test_content_parse_inline(void **state) +{ + char *html = blogc_content_parse_inline("**bola***asd* ``chunda``"); + assert_string_equal(html, "bolaasd chunda"); + free(html); + html = blogc_content_parse_inline("*bola*"); + assert_string_equal(html, "bola"); + free(html); +} + + int main(void) { @@ -332,6 +347,7 @@ main(void) unit_test(test_content_parse_invalid_header_empty), unit_test(test_content_parse_invalid_blockquote), unit_test(test_content_parse_invalid_code), + unit_test(test_content_parse_inline), }; return run_tests(tests); } -- cgit v1.2.3-18-g5258