From 3aeccf252c8c266b51c1a2cc1dc9e9e3d5bf2f90 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Sat, 13 Jun 2015 03:32:21 -0300 Subject: content-parser: implemented excerpt, and now for good! --- src/content-parser.c | 41 ++++++++- src/content-parser.h | 2 +- src/source-parser.c | 9 +- tests/check_content_parser.c | 210 +++++++++++++++++++++++++++++++++---------- tests/check_loader.c | 3 +- tests/check_source_parser.c | 54 ++++++++++- 6 files changed, 267 insertions(+), 52 deletions(-) diff --git a/src/content-parser.c b/src/content-parser.c index aa76777..fa17ab8 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -22,6 +22,8 @@ typedef enum { CONTENT_START_LINE = 1, + CONTENT_EXCERPT, + CONTENT_EXCERPT_END, CONTENT_HEADER, CONTENT_HEADER_TITLE_START, CONTENT_HEADER_TITLE, @@ -355,7 +357,7 @@ blogc_content_parse_inline(const char *src) char* -blogc_content_parse(const char *src) +blogc_content_parse(const char *src, size_t *end_excerpt) { // src is always nul-terminated. size_t src_len = strlen(src); @@ -364,6 +366,7 @@ blogc_content_parse(const char *src) size_t start = 0; size_t start2 = 0; size_t end = 0; + size_t eend = 0; unsigned int header_level = 0; char *prefix = NULL; @@ -393,6 +396,13 @@ blogc_content_parse(const char *src) if (c == '\n' || c == '\r' || is_last) break; start = current; + if (c == '.') { + if (end_excerpt != NULL) { + eend = rv->len; // fuck it + state = CONTENT_EXCERPT; + break; + } + } if (c == '#') { header_level = 1; state = CONTENT_HEADER; @@ -426,6 +436,33 @@ blogc_content_parse(const char *src) state = CONTENT_PARAGRAPH; break; + case CONTENT_EXCERPT: + if (end_excerpt != NULL) { + if (c == '.') + break; + if (c == '\n' || c == '\r') { + //*end_excerpt = eend; + //state = CONTENT_START_LINE; + state = CONTENT_EXCERPT_END; + break; + } + } + eend = 0; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_EXCERPT_END: + if (end_excerpt != NULL) { + if (c == '\n' || c == '\r') { + *end_excerpt = eend; + state = CONTENT_START_LINE; + break; + } + } + eend = 0; + state = CONTENT_PARAGRAPH_END; + break; + case CONTENT_HEADER: if (c == '#') { header_level += 1; @@ -521,7 +558,7 @@ blogc_content_parse(const char *src) else b_string_append_printf(tmp_str, "%s\n", l->data); } - tmp = blogc_content_parse(tmp_str->str); + tmp = blogc_content_parse(tmp_str->str, NULL); b_string_append_printf(rv, "
%s
\n", tmp); free(tmp); diff --git a/src/content-parser.h b/src/content-parser.h index d0a0732..ec441c6 100644 --- a/src/content-parser.h +++ b/src/content-parser.h @@ -13,6 +13,6 @@ #include "error.h" char* blogc_content_parse_inline(const char *src); -char* blogc_content_parse(const char *src); +char* blogc_content_parse(const char *src, size_t *end_excerpt); #endif /* _CONTENT_PARSER_H */ diff --git a/src/source-parser.c b/src/source-parser.c index f0fca29..050082f 100644 --- a/src/source-parser.c +++ b/src/source-parser.c @@ -38,6 +38,7 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err) size_t current = 0; size_t start = 0; + size_t end_excerpt = 0; char *key = NULL; char *tmp = NULL; @@ -148,7 +149,13 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err) if (current == (src_len - 1)) { tmp = b_strndup(src + start, src_len - start); b_trie_insert(rv, "RAW_CONTENT", tmp); - b_trie_insert(rv, "CONTENT", blogc_content_parse(tmp)); + b_trie_insert(rv, "CONTENT", blogc_content_parse(tmp, &end_excerpt)); + if (end_excerpt != 0) + b_trie_insert(rv, "EXCERPT", + b_strndup(b_trie_lookup(rv, "CONTENT"), end_excerpt)); + else + b_trie_insert(rv, "EXCERPT", + b_strdup(b_trie_lookup(rv, "CONTENT"))); } break; } diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index 9b1d318..f19d7dc 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -22,6 +22,7 @@ static void test_content_parse(void **state) { + size_t l = 0; char *html = blogc_content_parse( "# um\n" "## dois\n" @@ -57,8 +58,9 @@ test_content_parse(void **state) "guda\n" "yay\n" "\n" - "**bola**\n"); + "**bola**\n", &l); assert_non_null(html); + assert_int_equal(l, 0); assert_string_equal(html, "

um

\n" "

dois

\n" @@ -95,14 +97,56 @@ test_content_parse(void **state) } +static void +test_content_parse_with_excerpt(void **state) +{ + size_t l = 0; + char *html = blogc_content_parse( + "# test\n" + "\n" + "chunda\n" + "\n" + "..\n" + "\n" + "guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 28); + assert_string_equal(html, + "

test

\n" + "

chunda

\n" + "

guda\n" + "lol

\n"); + free(html); + l = 0; + html = blogc_content_parse( + "# test\n" + "\n" + "chunda\n" + "\n" + "...\n" + "\n" + "guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 28); + assert_string_equal(html, + "

test

\n" + "

chunda

\n" + "

guda\n" + "lol

\n"); + free(html); +} + + void test_content_parse_header(void **state) { - char *html = blogc_content_parse("## bola"); + char *html = blogc_content_parse("## bola", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n"); free(html); - html = blogc_content_parse("## bola\n"); + html = blogc_content_parse("## bola\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n"); free(html); @@ -111,7 +155,7 @@ test_content_parse_header(void **state) "\n" "## bola\n" "\n" - "guda\n"); + "guda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -124,11 +168,11 @@ test_content_parse_header(void **state) void test_content_parse_html(void **state) { - char *html = blogc_content_parse("
\n
"); + char *html = blogc_content_parse("
\n
", NULL); assert_non_null(html); assert_string_equal(html, "
\n
\n"); free(html); - html = blogc_content_parse("
\n
\n"); + html = blogc_content_parse("
\n
\n", NULL); assert_non_null(html); assert_string_equal(html, "
\n
\n"); free(html); @@ -138,7 +182,7 @@ test_content_parse_html(void **state) "
\n" "
\n" "\n" - "chunda\n"); + "chunda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -151,14 +195,14 @@ test_content_parse_html(void **state) void test_content_parse_blockquote(void **state) { - char *html = blogc_content_parse("> bola\n> guda"); + char *html = blogc_content_parse("> bola\n> guda", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - html = blogc_content_parse("> bola\n> guda\n"); + html = blogc_content_parse("> bola\n> guda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" @@ -171,7 +215,7 @@ test_content_parse_blockquote(void **state) "> bola\n" "> guda\n" "\n" - "chunda\n"); + "chunda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -186,13 +230,13 @@ test_content_parse_blockquote(void **state) void test_content_parse_code(void **state) { - char *html = blogc_content_parse(" bola\n guda"); + char *html = blogc_content_parse(" bola\n guda", NULL); assert_non_null(html); assert_string_equal(html, "
bola\n"
         "guda
\n"); free(html); - html = blogc_content_parse(" bola\n guda\n"); + html = blogc_content_parse(" bola\n guda\n", NULL); assert_non_null(html); assert_string_equal(html, "
bola\n"
@@ -204,7 +248,7 @@ test_content_parse_code(void **state)
         "   bola\n"
         "   guda\n"
         "\n"
-        "chunda\n");
+        "chunda\n", NULL);
     assert_non_null(html);
     assert_string_equal(html,
         "

bola

\n" @@ -218,28 +262,28 @@ test_content_parse_code(void **state) void test_content_parse_horizontal_rule(void **state) { - char *html = blogc_content_parse("bola\nguda\n\n**"); + char *html = blogc_content_parse("bola\nguda\n\n**", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - html = blogc_content_parse("bola\nguda\n\n++++"); + html = blogc_content_parse("bola\nguda\n\n++++", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - html = blogc_content_parse("bola\nguda\n\n--\n"); + html = blogc_content_parse("bola\nguda\n\n--\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" "guda

\n" "
\n"); free(html); - html = blogc_content_parse("bola\nguda\n\n****\n"); + html = blogc_content_parse("bola\nguda\n\n****\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola\n" @@ -251,7 +295,7 @@ test_content_parse_horizontal_rule(void **state) "\n" "**\n" "\n" - "chunda\n"); + "chunda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -263,7 +307,7 @@ test_content_parse_horizontal_rule(void **state) "\n" "----\n" "\n" - "chunda\n"); + "chunda\n", NULL); assert_non_null(html); assert_string_equal(html, "

bola

\n" @@ -281,7 +325,7 @@ test_content_parse_unordered_list(void **state) "\n" "* asd\n" "* qwe\n" - "* zxc"); + "* zxc", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -296,7 +340,7 @@ test_content_parse_unordered_list(void **state) "\n" "* asd\n" "* qwe\n" - "* zxc\n"); + "* zxc\n", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -313,7 +357,7 @@ test_content_parse_unordered_list(void **state) "* qwe\n" "* zxc\n" "\n" - "fuuuu\n"); + "fuuuu\n", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -335,7 +379,7 @@ test_content_parse_ordered_list(void **state) "\n" "1. asd\n" "2. qwe\n" - "3. zxc"); + "3. zxc", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -350,7 +394,7 @@ test_content_parse_ordered_list(void **state) "\n" "1. asd\n" "2. qwe\n" - "3. zxc\n"); + "3. zxc\n", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -367,7 +411,7 @@ test_content_parse_ordered_list(void **state) "2. qwe\n" "3. zxc\n" "\n" - "fuuuu\n"); + "fuuuu\n", NULL); assert_non_null(html); assert_string_equal(html, "

lol

\n" @@ -380,7 +424,7 @@ test_content_parse_ordered_list(void **state) free(html); html = blogc_content_parse( "1.\nasd\n" - "2. qwe\n"); + "2. qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

1.\n" @@ -389,7 +433,7 @@ test_content_parse_ordered_list(void **state) "

  • qwe
  • \n" "\n"); free(html); - html = blogc_content_parse("1.\n"); + html = blogc_content_parse("1.\n", NULL); assert_non_null(html); assert_string_equal(html, "
      \n" @@ -399,13 +443,87 @@ test_content_parse_ordered_list(void **state) } +static void +test_content_parse_invalid_excerpt(void **state) +{ + size_t l = 0; + char *html = blogc_content_parse( + "# test\n" + "\n" + "chunda\n" + "..\n" + "\n" + "guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 0); + assert_string_equal(html, + "

      test

      \n" + "

      chunda\n" + "..

      \n" + "

      guda\n" + "lol

      \n"); + free(html); + l = 0; + html = blogc_content_parse( + "# test\n" + "\n" + "chunda\n" + "\n" + "...\n" + "guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 0); + assert_string_equal(html, + "

      test

      \n" + "

      chunda

      \n" + "

      ...\n" + "guda\n" + "lol

      \n"); + free(html); + l = 0; + html = blogc_content_parse( + "# test\n" + "\n" + "chunda..\n" + "\n" + "guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 0); + assert_string_equal(html, + "

      test

      \n" + "

      chunda..

      \n" + "

      guda\n" + "lol

      \n"); + free(html); + l = 0; + html = blogc_content_parse( + "# test\n" + "\n" + "chunda\n" + "\n" + "...guda\n" + "lol", &l); + assert_non_null(html); + assert_int_equal(l, 0); + assert_string_equal(html, + "

      test

      \n" + "

      chunda

      \n" + "

      ...guda\n" + "lol

      \n"); + free(html); +} + + void test_content_parse_invalid_header(void **state) { char *html = blogc_content_parse( "asd\n" "\n" - "##bola\n"); + "##bola\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd

      \n" @@ -422,7 +540,7 @@ test_content_parse_invalid_header_empty(void **state) "\n" "##\n" "\n" - "qwe\n"); + "qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd

      \n" @@ -439,7 +557,7 @@ test_content_parse_invalid_blockquote(void **state) char *html = blogc_content_parse( "> asd\n" "> bola\n" - "> foo\n"); + "> foo\n", NULL); assert_non_null(html); assert_string_equal(html, "

      > asd\n" @@ -455,7 +573,7 @@ test_content_parse_invalid_code(void **state) char *html = blogc_content_parse( " asd\n" " bola\n" - " foo\n"); + " foo\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd\n" @@ -469,11 +587,11 @@ void test_content_parse_invalid_horizontal_rule(void **state) { // this generates invalid html, but... - char *html = blogc_content_parse("** asd"); + char *html = blogc_content_parse("** asd", NULL); assert_non_null(html); assert_string_equal(html, "

      asd

      \n"); free(html); - html = blogc_content_parse("** asd\n"); + html = blogc_content_parse("** asd\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd

      \n"); free(html); @@ -486,7 +604,7 @@ test_content_parse_invalid_unordered_list(void **state) // more invalid html char *html = blogc_content_parse( "* asd\n" - "1. qwe"); + "1. qwe", NULL); assert_non_null(html); assert_string_equal(html, "

      asd\n" @@ -495,7 +613,7 @@ test_content_parse_invalid_unordered_list(void **state) html = blogc_content_parse( "* asd\n" "1. qwe\n" - "\n"); + "\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd\n" @@ -503,7 +621,7 @@ test_content_parse_invalid_unordered_list(void **state) free(html); html = blogc_content_parse( "* asd\n" - "1. qwe\n"); + "1. qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd\n" @@ -511,7 +629,7 @@ test_content_parse_invalid_unordered_list(void **state) free(html); html = blogc_content_parse( "* asd\n" - "1. qwe\n"); + "1. qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      asd\n" @@ -523,7 +641,7 @@ test_content_parse_invalid_unordered_list(void **state) "* asd\n" "1. qwe\n" "\n" - "poi\n"); + "poi\n", NULL); assert_non_null(html); assert_string_equal(html, "

      chunda

      \n" @@ -540,7 +658,7 @@ test_content_parse_invalid_ordered_list(void **state) // more invalid html char *html = blogc_content_parse( "1. asd\n" - "* qwe"); + "* qwe", NULL); assert_non_null(html); assert_string_equal(html, "

      1. asd\n" @@ -549,7 +667,7 @@ test_content_parse_invalid_ordered_list(void **state) html = blogc_content_parse( "1. asd\n" "* qwe\n" - "\n"); + "\n", NULL); assert_non_null(html); assert_string_equal(html, "

      1. asd\n" @@ -557,7 +675,7 @@ test_content_parse_invalid_ordered_list(void **state) free(html); html = blogc_content_parse( "1. asd\n" - "* qwe\n"); + "* qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      1. asd\n" @@ -565,7 +683,7 @@ test_content_parse_invalid_ordered_list(void **state) free(html); html = blogc_content_parse( "1. asd\n" - "* qwe\n"); + "* qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      1. asd\n" @@ -577,7 +695,7 @@ test_content_parse_invalid_ordered_list(void **state) "1. asd\n" "* qwe\n" "\n" - "poi\n"); + "poi\n", NULL); assert_non_null(html); assert_string_equal(html, "

      chunda

      \n" @@ -587,7 +705,7 @@ test_content_parse_invalid_ordered_list(void **state) free(html); html = blogc_content_parse( "1 asd\n" - "* qwe\n"); + "* qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      1 asd\n" @@ -595,7 +713,7 @@ test_content_parse_invalid_ordered_list(void **state) free(html); html = blogc_content_parse( "a. asd\n" - "2. qwe\n"); + "2. qwe\n", NULL); assert_non_null(html); assert_string_equal(html, "

      a. asd\n" @@ -913,6 +1031,7 @@ main(void) { const UnitTest tests[] = { unit_test(test_content_parse), + unit_test(test_content_parse_with_excerpt), unit_test(test_content_parse_header), unit_test(test_content_parse_html), unit_test(test_content_parse_blockquote), @@ -920,6 +1039,7 @@ main(void) unit_test(test_content_parse_horizontal_rule), unit_test(test_content_parse_unordered_list), unit_test(test_content_parse_ordered_list), + unit_test(test_content_parse_invalid_excerpt), unit_test(test_content_parse_invalid_header), unit_test(test_content_parse_invalid_header_empty), unit_test(test_content_parse_invalid_blockquote), diff --git a/tests/check_loader.c b/tests/check_loader.c index 865d598..f5be3e7 100644 --- a/tests/check_loader.c +++ b/tests/check_loader.c @@ -113,9 +113,10 @@ test_source_parse_from_file(void **state) b_trie_t *t = blogc_source_parse_from_file("bola.txt", &err); assert_null(err); assert_non_null(t); - assert_int_equal(b_trie_size(t), 4); + assert_int_equal(b_trie_size(t), 5); assert_string_equal(b_trie_lookup(t, "ASD"), "123"); assert_string_equal(b_trie_lookup(t, "FILENAME"), "bola"); + assert_string_equal(b_trie_lookup(t, "EXCERPT"), "

      bola

      \n"); assert_string_equal(b_trie_lookup(t, "CONTENT"), "

      bola

      \n"); assert_string_equal(b_trie_lookup(t, "RAW_CONTENT"), "bola"); b_trie_free(t); diff --git a/tests/check_source_parser.c b/tests/check_source_parser.c index 0cec364..79ca10f 100644 --- a/tests/check_source_parser.c +++ b/tests/check_source_parser.c @@ -34,9 +34,12 @@ test_source_parse(void **state) b_trie_t *source = blogc_source_parse(a, strlen(a), &err); assert_null(err); assert_non_null(source); - assert_int_equal(b_trie_size(source), 4); + assert_int_equal(b_trie_size(source), 5); assert_string_equal(b_trie_lookup(source, "VAR1"), "asd asd"); assert_string_equal(b_trie_lookup(source, "VAR2"), "123chunda"); + assert_string_equal(b_trie_lookup(source, "EXCERPT"), + "

      This is a test

      \n" + "

      bola

      \n"); assert_string_equal(b_trie_lookup(source, "CONTENT"), "

      This is a test

      \n" "

      bola

      \n"); @@ -64,9 +67,12 @@ test_source_parse_with_spaces(void **state) b_trie_t *source = blogc_source_parse(a, strlen(a), &err); assert_null(err); assert_non_null(source); - assert_int_equal(b_trie_size(source), 4); + assert_int_equal(b_trie_size(source), 5); assert_string_equal(b_trie_lookup(source, "VAR1"), "chunda"); assert_string_equal(b_trie_lookup(source, "BOLA"), "guda"); + assert_string_equal(b_trie_lookup(source, "EXCERPT"), + "

      This is a test

      \n" + "

      bola

      \n"); assert_string_equal(b_trie_lookup(source, "CONTENT"), "

      This is a test

      \n" "

      bola

      \n"); @@ -78,6 +84,49 @@ test_source_parse_with_spaces(void **state) } +static void +test_source_parse_with_excerpt(void **state) +{ + const char *a = + "VAR1: asd asd\n" + "VAR2: 123chunda\n" + "----------\n" + "# This is a test\n" + "\n" + "bola\n" + "\n" + "...\n" + "\n" + "guda\n" + "yay"; + blogc_error_t *err = NULL; + b_trie_t *source = blogc_source_parse(a, strlen(a), &err); + assert_null(err); + assert_non_null(source); + assert_int_equal(b_trie_size(source), 5); + assert_string_equal(b_trie_lookup(source, "VAR1"), "asd asd"); + assert_string_equal(b_trie_lookup(source, "VAR2"), "123chunda"); + assert_string_equal(b_trie_lookup(source, "EXCERPT"), + "

      This is a test

      \n" + "

      bola

      \n"); + assert_string_equal(b_trie_lookup(source, "CONTENT"), + "

      This is a test

      \n" + "

      bola

      \n" + "

      guda\n" + "yay

      \n"); + assert_string_equal(b_trie_lookup(source, "RAW_CONTENT"), + "# This is a test\n" + "\n" + "bola\n" + "\n" + "...\n" + "\n" + "guda\n" + "yay"); + b_trie_free(source); +} + + static void test_source_parse_config_empty(void **state) { @@ -381,6 +430,7 @@ main(void) const UnitTest tests[] = { unit_test(test_source_parse), unit_test(test_source_parse_with_spaces), + unit_test(test_source_parse_with_excerpt), unit_test(test_source_parse_config_empty), unit_test(test_source_parse_config_invalid_key), unit_test(test_source_parse_config_no_key), -- cgit v1.2.3-18-g5258