diff options
-rw-r--r-- | src/content-parser.c | 220 | ||||
-rw-r--r-- | src/content-parser.h | 1 | ||||
-rw-r--r-- | tests/check_content_parser.c | 20 |
3 files changed, 236 insertions, 5 deletions
diff --git a/src/content-parser.c b/src/content-parser.c index 8249322..29a20bb 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -55,6 +55,217 @@ typedef enum { char* +blogc_content_parse_inline(const char *src) +{ + // this function is always called by blogc_content_parse, then its safe to + // assume that src is always nul-terminated. + size_t src_len = strlen(src); + + size_t current = 0; + + b_string_t *rv = b_string_new(); + + bool open_em_ast = false; + bool open_strong_ast = false; + bool open_em_und = false; + bool open_strong_und = false; + bool open_code = false; + bool open_code_double = false; + unsigned int link_state = 0; + unsigned int image_state = 0; + size_t link_start = 0; + size_t image_start = 0; + + char *tmp = NULL; + char *title = NULL; + + while (current < src_len) { + char c = src[current]; + bool is_last = current == src_len - 1; + + switch (c) { + case '*': + case '_': + if (!is_last && src[current + 1] == c) { + current++; + if ((c == '*' && open_strong_ast) || + (c == '_' && open_strong_und)) + { + b_string_append(rv, "</strong>"); + if (c == '*') + open_strong_ast = false; + else + open_strong_und = false; + } + else { + b_string_append(rv, "<strong>"); + if (c == '*') + open_strong_ast = true; + else + open_strong_und = true; + } + } + else { + if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { + b_string_append(rv, "</em>"); + if (c == '*') + open_em_ast = false; + else + open_em_und = false; + } + else { + b_string_append(rv, "<em>"); + if (c == '*') + open_em_ast = true; + else + open_em_und = true; + } + } + break; + + case '`': + if (!is_last && src[current + 1] == c) { + current++; + if (open_code_double) + b_string_append(rv, "</code>"); + else + b_string_append(rv, "<code>"); + open_code_double = !open_code_double; + } + else { + if (open_code) + b_string_append(rv, "</code>"); + else + b_string_append(rv, "<code>"); + open_code = !open_code; + } + break; + + case '[': + if (link_state == 0 && image_state == 0) { + tmp = strchr(src + current, ']'); + if (tmp != NULL) { + if (strlen(tmp) > 1 && tmp[1] == '(') { + tmp = strchr(tmp, ')'); + if (tmp != NULL) { // this is a link + link_start = current + 1; // its safe + link_state = 1; + break; + } + } + } + b_string_append_c(rv, c); + } + break; + + case '!': + if (link_state == 0 && image_state == 0) { + if (!is_last && src[current + 1] == '[') { + tmp = strchr(src + current + 1, ']'); + if (tmp != NULL) { + if (strlen(tmp) > 1 && tmp[1] == '(') { + tmp = strchr(tmp, ')'); + if (tmp != NULL) { // this is an image + image_start = current + 2; // its safe + image_state = 1; + break; + } + } + } + } + b_string_append_c(rv, c); + } + break; + + case ']': + if (link_state == 1) { + link_state = 2; + title = b_strndup(src + link_start, current - link_start); + break; + } + if (image_state == 1) { + image_state = 2; + title = b_strndup(src + image_start, current - image_start); + break; + } + b_string_append_c(rv, c); + break; + + case '(': + if (link_state == 2) { + link_state = 3; + link_start = current + 1; // its safe + break; + } + if (image_state == 2) { + image_state = 3; + image_start = current + 1; // its safe + break; + } + b_string_append_c(rv, c); + break; + + case ')': + if (link_state == 3) { + link_state = 0; + tmp = b_strndup(src + link_start, current - link_start); + b_string_append_printf(rv, "<a href=\"%s\">%s</a>", tmp, title); + free(tmp); + tmp = NULL; + free(title); + title = NULL; + break; + } + if (image_state == 3) { + image_state = 0; + tmp = b_strndup(src + image_start, current - image_start); + b_string_append_printf(rv, "<img src=\"%s\" alt=\"%s\">", tmp, title); + free(tmp); + tmp = NULL; + free(title); + title = NULL; + break; + } + b_string_append_c(rv, c); + break; + + case '&': + b_string_append(rv, "&"); + break; + + case '<': + b_string_append(rv, "<"); + break; + + case '>': + b_string_append(rv, ">"); + break; + + case '"': + b_string_append(rv, """); + break; + + case '\'': + b_string_append(rv, "'"); + break; + + case '/': + b_string_append(rv, "/"); + break; + + default: + if (link_state == 0 && image_state == 0) + b_string_append_c(rv, c); + } + + current++; + } + + return b_string_free(rv, false); +} + + +char* blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) { if (err == NULL || *err != NULL) @@ -69,6 +280,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) size_t prefix_len = 0; char *tmp = NULL; char *tmp2 = NULL; + char *parsed = NULL; char **tmpv = NULL; char d; @@ -150,8 +362,11 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; tmp = b_strndup(src + start, end - start); + parsed = blogc_content_parse_inline(tmp); b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level, - tmp, header_level); + parsed, header_level); + free(parsed); + parsed = NULL; free(tmp); tmp = NULL; state = CONTENT_START_LINE; @@ -316,8 +531,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) d = '\0'; break; } - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed horizontal rule, must use only '%c'", d); + state = CONTENT_PARAGRAPH; break; case CONTENT_UNORDERED_LIST_START: diff --git a/src/content-parser.h b/src/content-parser.h index 0a55fd9..db65332 100644 --- a/src/content-parser.h +++ b/src/content-parser.h @@ -12,6 +12,7 @@ #include <stdlib.h> #include "error.h" +char* blogc_content_parse_inline(const char *src); char* blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err); diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index b3e7092..a613052 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -56,7 +56,9 @@ test_content_parse(void **state) "</style>\n" "\n" "guda\n" - "yay"; + "yay\n" + "\n" + "**bola**\n"; blogc_error_t *err = NULL; char *html = blogc_content_parse(a, strlen(a), &err); assert_null(err); @@ -91,7 +93,8 @@ test_content_parse(void **state) " chunda\n" "</style>\n" "<p>guda\n" - "yay</p>\n"); + "yay</p>\n" + "<p>**bola**</p>\n"); free(html); } @@ -319,6 +322,18 @@ test_content_parse_invalid_code(void **state) } +void +test_content_parse_inline(void **state) +{ + char *html = blogc_content_parse_inline("**bola***asd* ``chunda``"); + assert_string_equal(html, "<strong>bola</strong><em>asd</em> <code>chunda</code>"); + free(html); + html = blogc_content_parse_inline("*bola*"); + assert_string_equal(html, "<em>bola</em>"); + free(html); +} + + int main(void) { @@ -332,6 +347,7 @@ main(void) unit_test(test_content_parse_invalid_header_empty), unit_test(test_content_parse_invalid_blockquote), unit_test(test_content_parse_invalid_code), + unit_test(test_content_parse_inline), }; return run_tests(tests); } |