aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael G. Martins <rafael@rafaelmartins.eng.br>2016-06-25 01:09:23 +0200
committerRafael G. Martins <rafael@rafaelmartins.eng.br>2016-07-16 02:02:14 +0200
commitfeb3ed33717d04973a052ebca8c6872c517a2acf (patch)
treefc9dcb1c60449f3181aebb889a3dafb11226d92b
parentb88cdf4fd29bb74022ccf752ace910244767f977 (diff)
downloadblogc-feature/content-parser-ast.tar.gz
blogc-feature/content-parser-ast.tar.bz2
blogc-feature/content-parser-ast.zip
content-parser: added ast for content blocks. pending inline and testsfeature/content-parser-ast
-rw-r--r--src/content-parser.c399
-rw-r--r--src/content-parser.h119
-rw-r--r--src/source-parser.c8
-rw-r--r--tests/check_content_parser.c53
4 files changed, 474 insertions, 105 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
index ca0597b..410fc90 100644
--- a/src/content-parser.c
+++ b/src/content-parser.c
@@ -7,6 +7,7 @@
*/
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -673,8 +674,36 @@ blogc_is_ordered_list_item(const char *str, size_t prefix_len)
}
-char*
-blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
+static blogc_content_node_t*
+block_node_new(blogc_content_block_type_t type, char *content, sb_trie_t *parameters)
+{
+ blogc_content_node_t *rv = sb_malloc(sizeof(blogc_content_node_t));
+ rv->node_type = BLOGC_CONTENT_BLOCK;
+ rv->type.block_type = type;
+ rv->content = content;
+ rv->parameters = parameters;
+ rv->child = NULL;
+ rv->next = NULL;
+ return rv;
+}
+
+
+static blogc_content_node_t*
+inline_node_new(blogc_content_inline_type_t type, char *content, sb_trie_t *parameters)
+{
+ blogc_content_node_t *rv = sb_malloc(sizeof(blogc_content_node_t));
+ rv->node_type = BLOGC_CONTENT_INLINE;
+ rv->type.inline_type = type;
+ rv->content = content;
+ rv->parameters = parameters;
+ rv->child = NULL;
+ rv->next = NULL;
+ return rv;
+}
+
+
+blogc_content_node_t*
+blogc_content_parse_ast(const char *src, char **nl)
{
// src is always nul-terminated.
size_t src_len = strlen(src);
@@ -683,7 +712,6 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
size_t start = 0;
size_t start2 = 0;
size_t end = 0;
- size_t eend = 0;
size_t real_end = 0;
unsigned int header_level = 0;
@@ -692,7 +720,6 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
char *tmp = NULL;
char *tmp2 = NULL;
char *parsed = NULL;
- char *slug = NULL;
// this isn't empty because we need some reasonable default value in the
// unlikely case that we need to print some line ending before evaluating
@@ -705,9 +732,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
sb_slist_t *lines = NULL;
sb_slist_t *lines2 = NULL;
- sb_string_t *rv = sb_string_new();
sb_string_t *tmp_str = NULL;
+ blogc_content_node_t *ast = NULL;
+ blogc_content_node_t *last = NULL;
+
blogc_content_parser_state_t state = CONTENT_START_LINE;
while (current < src_len) {
@@ -744,11 +773,8 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
break;
start = current;
if (c == '.') {
- if (end_excerpt != NULL) {
- eend = rv->len; // fuck it
- state = CONTENT_EXCERPT;
- break;
- }
+ state = CONTENT_EXCERPT;
+ break;
}
if (c == '#') {
header_level = 1;
@@ -784,27 +810,28 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
break;
case CONTENT_EXCERPT:
- if (end_excerpt != NULL) {
- if (c == '.')
- break;
- if (c == '\n' || c == '\r') {
- state = CONTENT_EXCERPT_END;
- break;
- }
+ if (c == '.')
+ break;
+ if (c == '\n' || c == '\r') {
+ state = CONTENT_EXCERPT_END;
+ break;
}
- eend = 0;
state = CONTENT_PARAGRAPH;
break;
case CONTENT_EXCERPT_END:
- if (end_excerpt != NULL) {
- if (c == '\n' || c == '\r') {
- *end_excerpt = eend;
- state = CONTENT_START_LINE;
- break;
+ if (c == '\n' || c == '\r') {
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_EXCERPT, NULL, NULL);
+ last = ast;
}
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_EXCERPT, NULL, NULL);
+ last = last->next;
+ }
+ state = CONTENT_START_LINE;
+ break;
}
- eend = 0;
state = CONTENT_PARAGRAPH_END;
break;
@@ -834,18 +861,16 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
end = is_last && c != '\n' && c != '\r' ? src_len :
(real_end != 0 ? real_end : current);
tmp = sb_strndup(src + start, end - start);
- parsed = blogc_content_parse_inline(tmp);
- slug = blogc_slugify(tmp);
- if (slug == NULL)
- sb_string_append_printf(rv, "<h%d>%s</h%d>%s",
- header_level, parsed, header_level, line_ending);
- else
- sb_string_append_printf(rv, "<h%d id=\"%s\">%s</h%d>%s",
- header_level, slug, parsed, header_level,
- line_ending);
- free(slug);
- free(parsed);
- parsed = NULL;
+ sb_trie_t *t = sb_trie_new(free);
+ sb_trie_insert(t, "level", sb_strdup_printf("%d", header_level));
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_HEADER, blogc_content_parse_inline(tmp), t);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_HEADER, blogc_content_parse_inline(tmp), t); // TODO: inline-me
+ last = last->next;
+ }
free(tmp);
tmp = NULL;
state = CONTENT_START_LINE;
@@ -864,10 +889,16 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
case CONTENT_HTML_END:
if (c == '\n' || c == '\r' || is_last) {
- tmp = sb_strndup(src + start, end - start);
- sb_string_append_printf(rv, "%s%s", tmp, line_ending);
- free(tmp);
- tmp = NULL;
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_RAW,
+ sb_strndup(src + start, end - start), NULL);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_RAW,
+ sb_strndup(src + start, end - start), NULL);
+ last = last->next;
+ }
state = CONTENT_START_LINE;
start = current;
}
@@ -915,14 +946,18 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
for (sb_slist_t *l = lines; l != NULL; l = l->next)
sb_string_append_printf(tmp_str, "%s%s", l->data,
line_ending);
- // do not propagate description to blockquote parsing,
- // because we just want paragraphs from first level of
- // content.
- tmp = blogc_content_parse(tmp_str->str, NULL, NULL);
- sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s",
- tmp, line_ending);
- free(tmp);
- tmp = NULL;
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_BLOCKQUOTE,
+ NULL, NULL);
+ ast->child = blogc_content_parse_ast(tmp_str->str, nl);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_BLOCKQUOTE,
+ NULL, NULL);
+ last->next->child = blogc_content_parse_ast(tmp_str->str, nl);
+ last = last->next;
+ }
sb_string_free(tmp_str, true);
tmp_str = NULL;
sb_slist_free_full(lines, free);
@@ -974,17 +1009,25 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
case CONTENT_CODE_END:
if (c == '\n' || c == '\r' || is_last) {
- sb_string_append(rv, "<pre><code>");
+ tmp_str = sb_string_new();
for (sb_slist_t *l = lines; l != NULL; l = l->next) {
- char *tmp_line = blogc_htmlentities(l->data);
if (l->next == NULL)
- sb_string_append_printf(rv, "%s", tmp_line);
+ sb_string_append_printf(tmp_str, "%s", l->data);
else
- sb_string_append_printf(rv, "%s%s", tmp_line,
+ sb_string_append_printf(tmp_str, "%s%s", l->data,
line_ending);
- free(tmp_line);
}
- sb_string_append_printf(rv, "</code></pre>%s", line_ending);
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_CODE,
+ sb_string_free(tmp_str, false), NULL);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_CODE,
+ sb_string_free(tmp_str, false), NULL);
+ last = last->next;
+ }
+ tmp_str = NULL;
sb_slist_free_full(lines, free);
lines = NULL;
free(prefix);
@@ -1016,7 +1059,16 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
break;
}
if (c == '\n' || c == '\r' || is_last) {
- sb_string_append_printf(rv, "<hr />%s", line_ending);
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_HORIZONTAL_RULE,
+ NULL, NULL);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_HORIZONTAL_RULE,
+ NULL, NULL);
+ last = last->next;
+ }
state = CONTENT_START_LINE;
start = current;
d = '\0';
@@ -1098,12 +1150,30 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
free(parsed);
parsed = NULL;
}
- sb_string_append_printf(rv, "<ul>%s", line_ending);
- for (sb_slist_t *l = lines; l != NULL; l = l->next)
- sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
- line_ending);
- sb_string_append_printf(rv, "</ul>%s", line_ending);
- sb_slist_free_full(lines, free);
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_UNORDERED_LIST,
+ NULL, NULL);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_UNORDERED_LIST,
+ NULL, NULL);
+ last = last->next;
+ }
+ blogc_content_node_t *last_list = NULL;
+ for (sb_slist_t *l = lines; l != NULL; l = l->next) {
+ if (last_list == NULL) {
+ last->child = block_node_new(BLOGC_CONTENT_BLOCK_LIST_ITEM,
+ l->data, NULL);
+ last_list = last->child;
+ }
+ else {
+ last_list->next = block_node_new(BLOGC_CONTENT_BLOCK_LIST_ITEM,
+ l->data, NULL);
+ last_list = last_list->next;
+ }
+ }
+ sb_slist_free(lines);
lines = NULL;
free(prefix);
prefix = NULL;
@@ -1209,12 +1279,30 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
free(parsed);
parsed = NULL;
}
- sb_string_append_printf(rv, "<ol>%s", line_ending);
- for (sb_slist_t *l = lines; l != NULL; l = l->next)
- sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
- line_ending);
- sb_string_append_printf(rv, "</ol>%s", line_ending);
- sb_slist_free_full(lines, free);
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_ORDERED_LIST,
+ NULL, NULL);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_ORDERED_LIST,
+ NULL, NULL);
+ last = last->next;
+ }
+ blogc_content_node_t *last_list = NULL;
+ for (sb_slist_t *l = lines; l != NULL; l = l->next) {
+ if (last_list == NULL) {
+ last->child = block_node_new(BLOGC_CONTENT_BLOCK_LIST_ITEM,
+ l->data, NULL);
+ last_list = last->child;
+ }
+ else {
+ last_list->next = block_node_new(BLOGC_CONTENT_BLOCK_LIST_ITEM,
+ l->data, NULL);
+ last_list = last_list->next;
+ }
+ }
+ sb_slist_free(lines);
lines = NULL;
free(prefix);
prefix = NULL;
@@ -1238,16 +1326,19 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
case CONTENT_PARAGRAPH_END:
if (c == '\n' || c == '\r' || is_last) {
- tmp = sb_strndup(src + start, end - start);
- if (description != NULL && *description == NULL)
- *description = blogc_fix_description(tmp);
- parsed = blogc_content_parse_inline(tmp);
- sb_string_append_printf(rv, "<p>%s</p>%s", parsed,
- line_ending);
- free(parsed);
- parsed = NULL;
- free(tmp);
- tmp = NULL;
+ char *tmp2 = sb_strndup(src + start, end - start);
+ sb_trie_t *t = sb_trie_new(free);
+ sb_trie_insert(t, "parsed", blogc_content_parse_inline(tmp2));
+ if (ast == NULL) {
+ ast = block_node_new(BLOGC_CONTENT_BLOCK_PARAGRAPH,
+ tmp2, t);
+ last = ast;
+ }
+ else {
+ last->next = block_node_new(BLOGC_CONTENT_BLOCK_PARAGRAPH,
+ tmp2, t);
+ last = last->next;
+ }
state = CONTENT_START_LINE;
start = current;
}
@@ -1260,5 +1351,159 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
current++;
}
+ if (nl != NULL && *nl == NULL)
+ *nl = sb_strdup(line_ending);
+
+ return ast;
+}
+
+
+void
+blogc_content_free_ast(blogc_content_node_t *ast)
+{
+ if (ast == NULL)
+ return;
+ free(ast->content);
+ sb_trie_free(ast->parameters);
+ blogc_content_free_ast(ast->child);
+ blogc_content_free_ast(ast->next);
+ free(ast);
+}
+
+
+char*
+blogc_content_parse(const char *src, char **excerpt, char **description)
+{
+ char *nl = NULL;
+ blogc_content_node_t *c = blogc_content_parse_ast(src, &nl);
+ char *rv = blogc_content_render_html(c, nl, excerpt, description);
+ free(nl);
+ blogc_content_free_ast(c);
+ return rv;
+}
+
+
+char*
+blogc_content_render_html(blogc_content_node_t *ast, char *nl, char **excerpt,
+ char **description)
+{
+ sb_string_t *rv = sb_string_new();
+ char *tmp = NULL;
+ for (blogc_content_node_t *l = ast; l != NULL; l = l->next) {
+ switch (l->node_type) {
+ case BLOGC_CONTENT_BLOCK:
+ switch (l->type.block_type) {
+ case BLOGC_CONTENT_BLOCK_RAW:
+ sb_string_append_printf(rv, "%s%s", l->content, nl);
+ break;
+ case BLOGC_CONTENT_BLOCK_HEADER:
+ tmp = blogc_slugify(l->content);
+ sb_string_append_printf(rv, "<h%s id=\"%s\">%s</h%s>%s",
+ sb_trie_lookup(l->parameters, "level"), tmp, l->content,
+ sb_trie_lookup(l->parameters, "level"), nl);
+ free(tmp);
+ tmp = NULL;
+ break;
+ case BLOGC_CONTENT_BLOCK_BLOCKQUOTE:
+ tmp = blogc_content_render_html(l->child, nl, NULL, NULL);
+ sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s",
+ tmp, nl);
+ free(tmp);
+ tmp = NULL;
+ break;
+ case BLOGC_CONTENT_BLOCK_CODE:
+ tmp = blogc_htmlentities(l->content);
+ sb_string_append_printf(rv, "<pre><code>%s</code></pre>%s",
+ tmp, nl);
+ free(tmp);
+ tmp = NULL;
+ break;
+ case BLOGC_CONTENT_BLOCK_HORIZONTAL_RULE:
+ sb_string_append_printf(rv, "<hr />%s", nl);
+ break;
+ case BLOGC_CONTENT_BLOCK_UNORDERED_LIST:
+ tmp = blogc_content_render_html(l->child, nl, NULL, NULL);
+ sb_string_append_printf(rv, "<ul>%s%s</ul>%s", nl,
+ tmp, nl);
+ free(tmp);
+ tmp = NULL;
+ break;
+ case BLOGC_CONTENT_BLOCK_ORDERED_LIST:
+ tmp = blogc_content_render_html(l->child, nl, NULL, NULL);
+ sb_string_append_printf(rv, "<ol>%s%s</ol>%s", nl,
+ tmp, nl);
+ free(tmp);
+ tmp = NULL;
+ break;
+ case BLOGC_CONTENT_BLOCK_LIST_ITEM:
+ sb_string_append_printf(rv, "<li>%s</li>%s",
+ l->content, nl);
+ break;
+ case BLOGC_CONTENT_BLOCK_PARAGRAPH:
+ if (description != NULL && *description == NULL)
+ *description = blogc_fix_description(l->content);
+ sb_string_append_printf(rv, "<p>%s</p>%s",
+ sb_trie_lookup(l->parameters, "parsed"), nl);
+ break;
+ case BLOGC_CONTENT_BLOCK_EXCERPT:
+ if (excerpt != NULL && *excerpt == NULL)
+ *excerpt = sb_strdup(rv->str);
+ break;
+ }
+ break;
+ case BLOGC_CONTENT_INLINE:
+ break;
+ }
+ }
return sb_string_free(rv, false);
}
+
+
+void
+blogc_content_debug(blogc_content_node_t *ast)
+{
+ for (blogc_content_node_t *l = ast; l != NULL; l = l->next) {
+ switch (l->node_type) {
+ case BLOGC_CONTENT_BLOCK:
+ fprintf(stderr, "DEBUG: <CONTENT BLOCK ");
+ switch (l->type.block_type) {
+ case BLOGC_CONTENT_BLOCK_RAW:
+ fprintf(stderr, "RAW: `%s`", l->content);
+ break;
+ case BLOGC_CONTENT_BLOCK_HEADER:
+ fprintf(stderr, "HEADER: \"%s\"", l->content);
+ break;
+ case BLOGC_CONTENT_BLOCK_BLOCKQUOTE:
+ fprintf(stderr, "BLOCKQUOTE");
+ break;
+ case BLOGC_CONTENT_BLOCK_CODE:
+ fprintf(stderr, "CODE: `%s`", l->content);
+ break;
+ case BLOGC_CONTENT_BLOCK_HORIZONTAL_RULE:
+ fprintf(stderr, "HORIZONTAL_RULE");
+ break;
+ case BLOGC_CONTENT_BLOCK_UNORDERED_LIST:
+ fprintf(stderr, "UNORDERED_LIST");
+ break;
+ case BLOGC_CONTENT_BLOCK_ORDERED_LIST:
+ fprintf(stderr, "ORDERED_LIST");
+ break;
+ case BLOGC_CONTENT_BLOCK_LIST_ITEM:
+ fprintf(stderr, "LIST_ITEM: `%s`", l->content);
+ break;
+ case BLOGC_CONTENT_BLOCK_PARAGRAPH:
+ fprintf(stderr, "PARAGRAPH: `%s`", l->content);
+ break;
+ case BLOGC_CONTENT_BLOCK_EXCERPT:
+ fprintf(stderr, "EXCERPT");
+ break;
+ }
+ fprintf(stderr, ">\n");
+ if (l->child != NULL)
+ blogc_content_debug(l->child);
+ break;
+ case BLOGC_CONTENT_INLINE:
+ break;
+ }
+ }
+}
diff --git a/src/content-parser.h b/src/content-parser.h
index c1eaf2c..3a72317 100644
--- a/src/content-parser.h
+++ b/src/content-parser.h
@@ -14,14 +14,119 @@
#include "utils.h"
+/*
+ * Raw node
+ *
+ * +-----+
+ * | RAW | -> ...
+ * +-----+
+ *
+ */
+
+/*
+ * Header node
+ *
+ * +--------+
+ * | HEADER | -> ...
+ * +--------+
+ * |
+ * +-----+ +------+ +-----+
+ * | raw | -> | bold | -> | raw | -> ...
+ * +-----+ +------+ +-----+
+ *
+ */
+
+/*
+ * Blockquote node
+ *
+ * +------------+
+ * | BLOCKQUOTE | -> ...
+ * +------------+
+ * |
+ * +-----------+ +--------+ +-----------+
+ * | PARAGRAPH | -> | HEADER | -> | PARAGRAPH | -> ...
+ * +-----------+ +--------+ +-----------+
+ * |
+ * +-----+
+ * | raw | -> ...
+ * +-----+
+ *
+ */
+
+/*
+ * Code node
+ *
+ * +------+
+ * | CODE | -> ...
+ * +------+
+ *
+ */
+
+/*
+ * Horizontal rule node
+ *
+ * +-----------------+
+ * | HORIZONTAL_RULE | -> ...
+ * +-----------------+
+ *
+ */
+
+/*
+ * Unordered list node
+ *
+ * +----------------+
+ * | UNORDERED_LIST | -> ...
+ * +----------------+
+ * |
+ * +-----------+ +-----------+
+ * | LIST_ITEM | -> | LIST_ITEM | -> ...
+ * +-----------+ +-----------+
+ * |
+ * +-----+ +------+
+ * | raw | -> | bold | -> ...
+ * +-----+ +------+
+ *
+ */
+
+/*
+ * Ordered list node
+ *
+ * +--------------+
+ * | ORDERED_LIST | -> ...
+ * +--------------+
+ * |
+ * +-----------+ +-----------+
+ * | LIST_ITEM | -> | LIST_ITEM | -> ...
+ * +-----------+ +-----------+
+ * |
+ * +-----+ +------+
+ * | raw | -> | bold | -> ...
+ * +-----+ +------+
+ *
+ */
+
+/*
+ * Paragraph node
+ *
+ * +-----------+
+ * | PARAGRAPH | -> ...
+ * +-----------+
+ * |
+ * +-----+ +------+ +-----+
+ * | raw | -> | bold | -> | raw |
+ * +-----+ +------+ +-----+
+ *
+ */
+
+
typedef enum {
BLOGC_CONTENT_BLOCK = 1,
BLOGC_CONTENT_INLINE,
} blogc_content_node_type_t;
typedef enum {
- BLOGC_CONTENT_BLOCK_HEADER = 1,
- BLOGC_CONTENT_BLOCK_RAW,
+ BLOGC_CONTENT_BLOCK_RAW = 1,
+ BLOGC_CONTENT_BLOCK_HEADER,
BLOGC_CONTENT_BLOCK_BLOCKQUOTE,
BLOGC_CONTENT_BLOCK_CODE,
BLOGC_CONTENT_BLOCK_HORIZONTAL_RULE,
@@ -29,10 +134,12 @@ typedef enum {
BLOGC_CONTENT_BLOCK_ORDERED_LIST,
BLOGC_CONTENT_BLOCK_LIST_ITEM,
BLOGC_CONTENT_BLOCK_PARAGRAPH,
+ BLOGC_CONTENT_BLOCK_EXCERPT,
} blogc_content_block_type_t;
typedef enum {
- BLOGC_CONTENT_INLINE_LINK = 1,
+ BLOGC_CONTENT_INLINE_RAW = 1,
+ BLOGC_CONTENT_INLINE_LINK,
BLOGC_CONTENT_INLINE_IMAGE,
BLOGC_CONTENT_INLINE_BOLD,
BLOGC_CONTENT_INLINE_ITALIC,
@@ -57,7 +164,11 @@ char* blogc_htmlentities(const char *str);
char* blogc_fix_description(const char *paragraph);
char* blogc_content_parse_inline(const char *src);
bool blogc_is_ordered_list_item(const char *str, size_t prefix_len);
-char* blogc_content_parse(const char *src, size_t *end_excerpt,
+blogc_content_node_t* blogc_content_parse_ast(const char *src, char **nl);
+void blogc_content_free_ast(blogc_content_node_t *ast);
+char* blogc_content_parse(const char *src, char **excerpt, char **description);
+char* blogc_content_render_html(blogc_content_node_t *ast, char *nl, char **excerpt,
char **description);
+void blogc_content_debug(blogc_content_node_t *ast);
#endif /* _CONTENT_PARSER_H */
diff --git a/src/source-parser.c b/src/source-parser.c
index 5d29213..349d3f7 100644
--- a/src/source-parser.c
+++ b/src/source-parser.c
@@ -34,7 +34,6 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err)
size_t current = 0;
size_t start = 0;
- size_t end_excerpt = 0;
char *key = NULL;
char *tmp = NULL;
@@ -150,8 +149,9 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err)
if (current == (src_len - 1)) {
tmp = sb_strndup(src + start, src_len - start);
sb_trie_insert(rv, "RAW_CONTENT", tmp);
+ char *excerpt = NULL;
char *description = NULL;
- content = blogc_content_parse(tmp, &end_excerpt, &description);
+ content = blogc_content_parse(tmp, &excerpt, &description);
if (description != NULL) {
// do not override source-provided description.
if (NULL == sb_trie_lookup(rv, "DESCRIPTION")) {
@@ -163,9 +163,9 @@ blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err)
free(description);
}
}
+ sb_trie_insert(rv, "EXCERPT", excerpt == NULL ?
+ sb_strdup(content) : excerpt);
sb_trie_insert(rv, "CONTENT", content);
- sb_trie_insert(rv, "EXCERPT", end_excerpt == 0 ?
- sb_strdup(content) : sb_strndup(content, end_excerpt));
}
break;
}
diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c
index 79b31c3..fb43201 100644
--- a/tests/check_content_parser.c
+++ b/tests/check_content_parser.c
@@ -122,7 +122,7 @@ test_is_ordered_list_item(void **state)
static void
test_content_parse(void **state)
{
- size_t l = 0;
+ char *l = NULL;
char *d = NULL;
char *html = blogc_content_parse(
"# um\n"
@@ -167,7 +167,7 @@ test_content_parse(void **state)
"\n"
"--- lol\n", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "bola chunda");
assert_string_equal(html,
@@ -214,7 +214,7 @@ test_content_parse(void **state)
static void
test_content_parse_crlf(void **state)
{
- size_t l = 0;
+ char *l = NULL;
char *d = NULL;
char *html = blogc_content_parse(
"# um\r\n"
@@ -259,7 +259,7 @@ test_content_parse_crlf(void **state)
"\r\n"
"--- lol\r\n", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "bola chunda");
assert_string_equal(html,
@@ -306,7 +306,7 @@ test_content_parse_crlf(void **state)
static void
test_content_parse_with_excerpt(void **state)
{
- size_t l = 0;
+ char *l = NULL;
char *d = NULL;
char *html = blogc_content_parse(
"# test\n"
@@ -318,7 +318,10 @@ test_content_parse_with_excerpt(void **state)
"guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 38);
+ assert_non_null(l);
+ assert_string_equal(l,
+ "<h1 id=\"test\">test</h1>\n"
+ "<p>chunda</p>\n");
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,
@@ -327,7 +330,8 @@ test_content_parse_with_excerpt(void **state)
"<p>guda\n"
"lol</p>\n");
free(html);
- l = 0;
+ free(l);
+ l = NULL;
free(d);
d = NULL;
html = blogc_content_parse(
@@ -340,7 +344,10 @@ test_content_parse_with_excerpt(void **state)
"guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 38);
+ assert_non_null(l);
+ assert_string_equal(l,
+ "<h1 id=\"test\">test</h1>\n"
+ "<p>chunda</p>\n");
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,
@@ -349,6 +356,7 @@ test_content_parse_with_excerpt(void **state)
"<p>guda\n"
"lol</p>\n");
free(html);
+ free(l);
free(d);
}
@@ -356,7 +364,7 @@ test_content_parse_with_excerpt(void **state)
static void
test_content_parse_with_excerpt_crlf(void **state)
{
- size_t l = 0;
+ char *l = NULL;
char *d = NULL;
char *html = blogc_content_parse(
"# test\r\n"
@@ -368,7 +376,10 @@ test_content_parse_with_excerpt_crlf(void **state)
"guda\r\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 40);
+ assert_non_null(l);
+ assert_string_equal(l,
+ "<h1 id=\"test\">test</h1>\r\n"
+ "<p>chunda</p>\r\n");
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,
@@ -377,7 +388,8 @@ test_content_parse_with_excerpt_crlf(void **state)
"<p>guda\r\n"
"lol</p>\r\n");
free(html);
- l = 0;
+ free(l);
+ l = NULL;
free(d);
d = NULL;
html = blogc_content_parse(
@@ -390,7 +402,10 @@ test_content_parse_with_excerpt_crlf(void **state)
"guda\r\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 40);
+ assert_non_null(l);
+ assert_string_equal(l,
+ "<h1 id=\"test\">test</h1>\r\n"
+ "<p>chunda</p>\r\n");
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,
@@ -399,6 +414,7 @@ test_content_parse_with_excerpt_crlf(void **state)
"<p>guda\r\n"
"lol</p>\r\n");
free(html);
+ free(l);
free(d);
}
@@ -1306,7 +1322,7 @@ test_content_parse_description_crlf(void **state)
static void
test_content_parse_invalid_excerpt(void **state)
{
- size_t l = 0;
+ char *l = NULL;
char *d = NULL;
char *html = blogc_content_parse(
"# test\n"
@@ -1317,7 +1333,7 @@ test_content_parse_invalid_excerpt(void **state)
"guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "chunda ..");
assert_string_equal(html,
@@ -1327,7 +1343,6 @@ test_content_parse_invalid_excerpt(void **state)
"<p>guda\n"
"lol</p>\n");
free(html);
- l = 0;
free(d);
d = NULL;
html = blogc_content_parse(
@@ -1339,7 +1354,7 @@ test_content_parse_invalid_excerpt(void **state)
"guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,
@@ -1349,7 +1364,6 @@ test_content_parse_invalid_excerpt(void **state)
"guda\n"
"lol</p>\n");
free(html);
- l = 0;
free(d);
d = NULL;
html = blogc_content_parse(
@@ -1360,7 +1374,7 @@ test_content_parse_invalid_excerpt(void **state)
"guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "chunda..");
assert_string_equal(html,
@@ -1369,7 +1383,6 @@ test_content_parse_invalid_excerpt(void **state)
"<p>guda\n"
"lol</p>\n");
free(html);
- l = 0;
free(d);
d = NULL;
html = blogc_content_parse(
@@ -1380,7 +1393,7 @@ test_content_parse_invalid_excerpt(void **state)
"...guda\n"
"lol", &l, &d);
assert_non_null(html);
- assert_int_equal(l, 0);
+ assert_null(l);
assert_non_null(d);
assert_string_equal(d, "chunda");
assert_string_equal(html,