From b24efa3885637d953892ccf718662aa693eaa879 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Thu, 7 May 2015 22:35:18 -0300 Subject: content-parser: more random stuff --- src/content-parser.c | 179 ++++++++++++++++++------------------------- src/content-parser.h | 3 +- src/main.c | 3 +- src/source-parser.c | 11 ++- tests/check_content_parser.c | 141 ++++++++++++---------------------- tests/check_source_parser.c | 10 ++- 6 files changed, 141 insertions(+), 206 deletions(-) diff --git a/src/content-parser.c b/src/content-parser.c index b8df868..ea41852 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -15,7 +15,6 @@ #include "utils/utils.h" #include "content-parser.h" -#include "error.h" // this is a half ass implementation of a markdown-like syntax. bugs are @@ -23,7 +22,6 @@ // TODO: inline elements: line breaks -// TODO: error handling typedef enum { @@ -121,56 +119,45 @@ blogc_content_parse_inline(const char *src) open_strong_ast = false; else open_strong_und = false; + break; } - else { - if (state == 0) - b_string_append(rv, ""); - if (c == '*') - open_strong_ast = true; - else - open_strong_und = true; - } + if (state == 0) + b_string_append(rv, ""); + if (c == '*') + open_strong_ast = true; + else + open_strong_und = true; + break; } - else { - if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { - if (state == 0) - b_string_append(rv, ""); - if (c == '*') - open_em_ast = false; - else - open_em_und = false; - } - else { - if (state == 0) - b_string_append(rv, ""); - if (c == '*') - open_em_ast = true; - else - open_em_und = true; - } + if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { + if (state == 0) + b_string_append(rv, ""); + if (c == '*') + open_em_ast = false; + else + open_em_und = false; + break; } + if (state == 0) + b_string_append(rv, ""); + if (c == '*') + open_em_ast = true; + else + open_em_und = true; break; case '`': if (!is_last && src[current + 1] == c) { current++; - if (state == 0) { - if (open_code_double) - b_string_append(rv, ""); - else - b_string_append(rv, ""); - } + if (state == 0) + b_string_append_printf(rv, "<%scode>", + open_code_double ? "/" : ""); open_code_double = !open_code_double; + break; } - else { - if (state == 0) { - if (open_code) - b_string_append(rv, ""); - else - b_string_append(rv, ""); - } - open_code = !open_code; - } + if (state == 0) + b_string_append_printf(rv, "<%scode>", open_code ? "/" : ""); + open_code = !open_code; break; case '!': @@ -240,12 +227,12 @@ blogc_content_parse_inline(const char *src) if (state == 3) { state = 0; tmp = b_strndup(src + start, current - start); - if (is_image) { - b_string_append_printf(rv, "\"%s\"", tmp, tmp2); - } - else { - b_string_append_printf(rv, "%s", tmp, tmp2); - } + if (is_image) + b_string_append_printf(rv, "\"%s\"", + tmp, tmp2); + else + b_string_append_printf(rv, "%s", + tmp, tmp2); free(tmp); tmp = NULL; free(tmp2); @@ -300,13 +287,14 @@ blogc_content_parse_inline(const char *src) char* -blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) +blogc_content_parse(const char *src) { - if (err == NULL || *err != NULL) - return NULL; + // src is always nul-terminated. + size_t src_len = strlen(src); size_t current = 0; size_t start = 0; + size_t start2 = 0; size_t end = 0; unsigned int header_level = 0; @@ -336,38 +324,37 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) case CONTENT_START_LINE: if (c == '\n' || c == '\r' || is_last) break; + start = current; if (c == '#') { header_level = 1; state = CONTENT_HEADER; break; } if (c == '*' || c == '+' || c == '-') { + start2 = current; state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE; - start = current; d = c; break; } if (c >= '0' && c <= '9') { + start2 = current; state = CONTENT_ORDERED_LIST; - start = current; break; } if (c == ' ' || c == '\t') { + start2 = current; state = CONTENT_CODE; - start = current; break; } if (c == '<') { state = CONTENT_HTML; - start = current; break; } if (c == '>') { state = CONTENT_BLOCKQUOTE; - start = current; + start2 = current; break; } - start = current; state = CONTENT_PARAGRAPH; break; @@ -380,8 +367,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) state = CONTENT_HEADER_TITLE_START; break; } - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed header, no space or tab after '#'"); + state = CONTENT_PARAGRAPH; break; case CONTENT_HEADER_TITLE_START: @@ -440,15 +426,13 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) case CONTENT_BLOCKQUOTE_START: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; - tmp = b_strndup(src + start, end - start); + tmp = b_strndup(src + start2, end - start2); if (b_str_starts_with(tmp, prefix)) { lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); state = CONTENT_BLOCKQUOTE_END; } else { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed blockquote, must use same prefix " - "as previous line(s): %s", prefix); + state = CONTENT_PARAGRAPH; free(prefix); prefix = NULL; b_slist_free_full(lines, free); @@ -469,11 +453,9 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) else b_string_append_printf(tmp_str, "%s\n", l->data); } - tmp = blogc_content_parse(tmp_str->str, tmp_str->len, err); - if (*err == NULL) { - b_string_append_printf(rv, "
%s
\n", - tmp); - } + tmp = blogc_content_parse(tmp_str->str); + b_string_append_printf(rv, "
%s
\n", + tmp); free(tmp); tmp = NULL; b_string_free(tmp_str, true); @@ -483,10 +465,10 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) free(prefix); prefix = NULL; state = CONTENT_START_LINE; - start = current; + start2 = current; } else { - start = current; + start2 = current; state = CONTENT_BLOCKQUOTE_START; } break; @@ -501,19 +483,20 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) case CONTENT_CODE_START: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; - tmp = b_strndup(src + start, end - start); + tmp = b_strndup(src + start2, end - start2); if (b_str_starts_with(tmp, prefix)) { lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); state = CONTENT_CODE_END; } else { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed code block, must use same prefix " - "as previous line(s): '%s'", prefix); + state = CONTENT_PARAGRAPH; free(prefix); prefix = NULL; b_slist_free_full(lines, free); lines = NULL; + free(tmp); + tmp = NULL; + break; } free(tmp); tmp = NULL; @@ -536,10 +519,10 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) free(prefix); prefix = NULL; state = CONTENT_START_LINE; - start = current; + start2 = current; } else { - start = current; + start2 = current; state = CONTENT_CODE_START; } break; @@ -572,7 +555,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) case CONTENT_UNORDERED_LIST_START: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; - tmp = b_strndup(src + start, end - start); + tmp = b_strndup(src + start2, end - start2); if (b_str_starts_with(tmp, prefix)) { tmp3 = b_strdup(tmp + strlen(prefix)); parsed = blogc_content_parse_inline(tmp3); @@ -583,9 +566,10 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) parsed = NULL; } else { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed unordered list, must use same prefix " - "as previous line(s): %s", prefix); + state = CONTENT_PARAGRAPH; + free(tmp); + tmp = NULL; + break; } free(tmp); tmp = NULL; @@ -605,10 +589,10 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) free(prefix); prefix = NULL; state = CONTENT_START_LINE; - start = current; + start2 = current; } else { - start = current; + start2 = current; state = CONTENT_UNORDERED_LIST_START; } break; @@ -633,34 +617,25 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) case CONTENT_ORDERED_LIST_START: if (c == '\n' || c == '\r' || is_last) { end = is_last && c != '\n' && c != '\r' ? src_len : current; - tmp = b_strndup(src + start, end - start); + tmp = b_strndup(src + start2, end - start2); if (strlen(tmp) >= prefix_len) { tmp2 = b_strndup(tmp, prefix_len); tmpv = b_str_split(tmp2, '.', 2); free(tmp2); tmp2 = NULL; if (b_strv_length(tmpv) != 2) { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed ordered list, prefix must be a " - "number, followed by a '.', followed by the content. " - "Content must be aligned with content from previous line(s)"); + state = CONTENT_PARAGRAPH; goto err_li; } for (unsigned int i = 0; tmpv[0][i] != '\0'; i++) { if (!(tmpv[0][i] >= '0' && tmpv[0][i] <= '9')) { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed ordered list, prefix must be a " - "number, followed by a '.', followed by the content. " - "Content must be aligned with content from previous line(s)"); + state = CONTENT_PARAGRAPH; goto err_li; } } for (unsigned int i = 0; tmpv[1][i] != '\0'; i++) { if (!(tmpv[1][i] == ' ' || tmpv[1][i] == '\t')) { - *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, - current, "Malformed ordered list, prefix must be a " - "number, followed by a '.', followed by the content. " - "Content must be aligned with content from previous line(s)"); + state = CONTENT_PARAGRAPH; goto err_li; } } @@ -679,7 +654,7 @@ err_li: free(tmp); tmp = NULL; } - if (!is_last) + if (state == CONTENT_PARAGRAPH || !is_last) break; case CONTENT_ORDERED_LIST_END: @@ -693,10 +668,10 @@ err_li: free(prefix); prefix = NULL; state = CONTENT_START_LINE; - start = current; + start2 = current; } else { - start = current; + start2 = current; state = CONTENT_ORDERED_LIST_START; } break; @@ -727,16 +702,8 @@ err_li: } - if (*err != NULL) - break; - current++; } - if (*err != NULL) { - b_string_free(rv, true); - return NULL; - } - return b_string_free(rv, false); } diff --git a/src/content-parser.h b/src/content-parser.h index db65332..5c9ae4b 100644 --- a/src/content-parser.h +++ b/src/content-parser.h @@ -13,7 +13,6 @@ #include "error.h" char* blogc_content_parse_inline(const char *src); -char* blogc_content_parse(const char *src, size_t src_len, - blogc_error_t **err); +char* blogc_content_parse(const char *src); #endif /* _CONTENT_PARSER_H */ diff --git a/src/main.c b/src/main.c index c112c4f..be18050 100644 --- a/src/main.c +++ b/src/main.c @@ -230,7 +230,8 @@ main(int argc, char **argv) } } - fprintf(fp, "%s", out); + if (out != NULL) + fprintf(fp, "%s", out); if (!write_to_stdout) fclose(fp); diff --git a/src/source-parser.c b/src/source-parser.c index 60b5fe5..13741bd 100644 --- a/src/source-parser.c +++ b/src/source-parser.c @@ -14,6 +14,7 @@ #include #include "utils/utils.h" +#include "content-parser.h" #include "source-parser.h" #include "error.h" @@ -125,10 +126,12 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err) state = SOURCE_CONTENT; break; - case SOURCE_CONTENT: - if (current == (src_len - 1)) - b_trie_insert(rv, "CONTENT", - b_strndup(src + start, src_len - start)); + case SOURCE_CONTENT: + if (current == (src_len - 1)) { + tmp = b_strndup(src + start, src_len - start); + b_trie_insert(rv, "RAW_CONTENT", tmp); + b_trie_insert(rv, "CONTENT", blogc_content_parse(tmp)); + } break; } diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index 6fec86c..49f07ba 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -16,14 +16,13 @@ #include #include #include "../src/content-parser.h" -#include "../src/error.h" #include "../src/utils/utils.h" static void test_content_parse(void **state) { - const char *a = + char *html = blogc_content_parse( "# um\n" "## dois\n" "### tres\n" @@ -58,10 +57,7 @@ test_content_parse(void **state) "guda\n" "yay\n" "\n" - "**bola**\n"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + "**bola**\n"); assert_non_null(html); assert_string_equal(html, "

um

\n" @@ -102,27 +98,20 @@ test_content_parse(void **state) void test_content_parse_header(void **state) { - const char *a = "## bola"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + char *html = blogc_content_parse("## bola"); assert_non_null(html); assert_string_equal(html, "

bola

\n"); free(html); - a = "## bola\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + html = blogc_content_parse("## bola\n"); assert_non_null(html); assert_string_equal(html, "

bola

\n"); free(html); - a = + html = blogc_content_parse( "bola\n" "\n" "## bola\n" "\n" - "guda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + "guda\n"); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -135,28 +124,21 @@ test_content_parse_header(void **state) void test_content_parse_html(void **state) { - const char *a = "
\n
"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + char *html = blogc_content_parse("
\n
"); assert_non_null(html); assert_string_equal(html, "
\n
\n"); free(html); - a = "
\n
\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + html = blogc_content_parse("
\n
\n"); assert_non_null(html); assert_string_equal(html, "
\n
\n"); free(html); - a = + html = blogc_content_parse( "bola\n" "\n" "
\n" "
\n" "\n" - "chunda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + "chunda\n"); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -169,34 +151,27 @@ test_content_parse_html(void **state) void test_content_parse_blockquote(void **state) { - const char *a = "> bola\n> guda"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + char *html = blogc_content_parse("> bola\n> guda"); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - a = "> bola\n> guda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + html = blogc_content_parse("> bola\n> guda\n"); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - a = + html = blogc_content_parse( "bola\n" "\n" "> bola\n" "> guda\n" "\n" - "chunda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + "chunda\n"); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -211,32 +186,25 @@ test_content_parse_blockquote(void **state) void test_content_parse_code(void **state) { - const char *a = " bola\n guda"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + char *html = blogc_content_parse(" bola\n guda"); assert_non_null(html); assert_string_equal(html, "
bola\n"
         "guda
\n"); free(html); - a = " bola\n guda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + html = blogc_content_parse(" bola\n guda\n"); assert_non_null(html); assert_string_equal(html, "
bola\n"
         "guda
\n"); free(html); - a = + html = blogc_content_parse( "bola\n" "\n" " bola\n" " guda\n" "\n" - "chunda\n"; - html = blogc_content_parse(a, strlen(a), &err); - assert_null(err); + "chunda\n"); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -250,75 +218,66 @@ test_content_parse_code(void **state) void test_content_parse_invalid_header(void **state) { - const char *a = + char *html = blogc_content_parse( "asd\n" "\n" - "##bola\n"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_non_null(err); - assert_null(html); - assert_int_equal(err->type, BLOGC_ERROR_CONTENT_PARSER); - assert_string_equal(err->msg, - "Malformed header, no space or tab after '#'\n" - "Error occurred near to 'bola'"); - blogc_error_free(err); + "##bola\n"); + assert_non_null(html); + assert_string_equal(html, + "

asd

\n" + "

##bola

\n"); + free(html); } void test_content_parse_invalid_header_empty(void **state) { - const char *a = + char *html = blogc_content_parse( "asd\n" "\n" "##\n" "\n" - "qwe\n"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_non_null(err); - assert_null(html); - assert_int_equal(err->type, BLOGC_ERROR_CONTENT_PARSER); - assert_string_equal(err->msg, - "Malformed header, no space or tab after '#'"); - blogc_error_free(err); + "qwe\n"); + assert_non_null(html); + assert_string_equal(html, + "

asd

\n" + "

##\n" + "\n" + "qwe

\n"); + free(html); } void test_content_parse_invalid_blockquote(void **state) { - const char *a = + char *html = blogc_content_parse( "> asd\n" "> bola\n" - "> foo\n"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_non_null(err); - assert_null(html); - assert_int_equal(err->type, BLOGC_ERROR_CONTENT_PARSER); - assert_string_equal(err->msg, - "Malformed blockquote, must use same prefix as previous line(s): > "); - blogc_error_free(err); + "> foo\n"); + assert_non_null(html); + assert_string_equal(html, + "

> asd\n" + "> bola\n" + "> foo

\n"); + free(html); } void test_content_parse_invalid_code(void **state) { - const char *a = + char *html = blogc_content_parse( " asd\n" " bola\n" - " foo\n"; - blogc_error_t *err = NULL; - char *html = blogc_content_parse(a, strlen(a), &err); - assert_non_null(err); - assert_null(html); - assert_int_equal(err->type, BLOGC_ERROR_CONTENT_PARSER); - assert_string_equal(err->msg, - "Malformed code block, must use same prefix as previous line(s): ' '"); - blogc_error_free(err); + " foo\n"); + assert_non_null(html); + assert_string_equal(html, + "

asd\n" + " bola\n" + " foo

\n"); + free(html); } diff --git a/tests/check_source_parser.c b/tests/check_source_parser.c index 0838f41..c779ed3 100644 --- a/tests/check_source_parser.c +++ b/tests/check_source_parser.c @@ -34,10 +34,13 @@ test_source_parse(void **state) b_trie_t *source = blogc_source_parse(a, strlen(a), &err); assert_null(err); assert_non_null(source); - assert_int_equal(b_trie_size(source), 3); + assert_int_equal(b_trie_size(source), 4); assert_string_equal(b_trie_lookup(source, "VAR1"), "asd asd"); assert_string_equal(b_trie_lookup(source, "VAR2"), "123chunda"); assert_string_equal(b_trie_lookup(source, "CONTENT"), + "

This is a test

\n" + "

bola

\n"); + assert_string_equal(b_trie_lookup(source, "RAW_CONTENT"), "# This is a test\n" "\n" "bola\n"); @@ -61,10 +64,13 @@ test_source_parse_with_spaces(void **state) b_trie_t *source = blogc_source_parse(a, strlen(a), &err); assert_null(err); assert_non_null(source); - assert_int_equal(b_trie_size(source), 3); + assert_int_equal(b_trie_size(source), 4); assert_string_equal(b_trie_lookup(source, "VAR1"), "chunda"); assert_string_equal(b_trie_lookup(source, "BOLA"), "guda"); assert_string_equal(b_trie_lookup(source, "CONTENT"), + "

This is a test

\n" + "

bola

\n"); + assert_string_equal(b_trie_lookup(source, "RAW_CONTENT"), "# This is a test\n" "\n" "bola\n"); -- cgit v1.2.3-18-g5258