aboutsummaryrefslogtreecommitdiffstats
path: root/src/content-parser.c
diff options
context:
space:
mode:
authorRafael G. Martins <rafael@rafaelmartins.eng.br>2015-05-04 12:54:10 -0300
committerRafael G. Martins <rafael@rafaelmartins.eng.br>2015-05-04 14:03:58 -0300
commit6c36460f20548850b0704a462dd8281c7f5868a7 (patch)
treeca0673721e790e49d287c073b0e38041086ff85f /src/content-parser.c
parent32b7433a3587216a0c4370ed25f9e8cc3f6b8649 (diff)
downloadblogc-6c36460f20548850b0704a462dd8281c7f5868a7.tar.gz
blogc-6c36460f20548850b0704a462dd8281c7f5868a7.tar.bz2
blogc-6c36460f20548850b0704a462dd8281c7f5868a7.zip
content-parser: implemented more block elements
Diffstat (limited to 'src/content-parser.c')
-rw-r--r--src/content-parser.c189
1 files changed, 181 insertions, 8 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
index e083243..75c0f34 100644
--- a/src/content-parser.c
+++ b/src/content-parser.c
@@ -22,7 +22,6 @@
// expected. feel free to improve the parser and and new features.
-// TODO: block elements: list, horizontal rule
// TODO: inline elements: links, emphasis, code, images, line breaks
// TODO: automatic scaping of html entities
// TODO: automatic links
@@ -42,6 +41,14 @@ typedef enum {
CONTENT_CODE,
CONTENT_CODE_START,
CONTENT_CODE_END,
+ CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE,
+ CONTENT_HORIZONTAL_RULE,
+ CONTENT_UNORDERED_LIST_START,
+ CONTENT_UNORDERED_LIST_END,
+ CONTENT_ORDERED_LIST,
+ CONTENT_ORDERED_LIST_SPACE,
+ CONTENT_ORDERED_LIST_START,
+ CONTENT_ORDERED_LIST_END,
CONTENT_PARAGRAPH,
CONTENT_PARAGRAPH_END,
} blogc_content_parser_state_t;
@@ -59,7 +66,12 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
unsigned int header_level = 0;
char *prefix = NULL;
+ size_t prefix_len = 0;
char *tmp = NULL;
+ char *tmp2 = NULL;
+ char **tmpv = NULL;
+
+ char d;
b_slist_t *lines = NULL;
@@ -82,6 +94,17 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
state = CONTENT_HEADER;
break;
}
+ if (c == '*' || c == '+' || c == '-') {
+ state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE;
+ start = current;
+ d = c;
+ break;
+ }
+ if (c >= '0' && c <= '9') {
+ state = CONTENT_ORDERED_LIST;
+ start = current;
+ break;
+ }
if (c == ' ' || c == '\t') {
state = CONTENT_CODE;
start = current;
@@ -110,7 +133,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
state = CONTENT_HEADER_TITLE_START;
break;
}
- // error;
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed header, no space or tab after '#'");
break;
case CONTENT_HEADER_TITLE_START:
@@ -121,7 +145,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
state = CONTENT_HEADER_TITLE;
break;
}
- // error;
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Empty header");
break;
case CONTENT_HEADER_TITLE:
@@ -138,7 +163,6 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
}
break;
-
case CONTENT_HTML:
if (c == '\n' || c == '\r' || is_last) {
state = CONTENT_HTML_END;
@@ -172,13 +196,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
tmp = b_strndup(src + start, end - start);
if (b_str_starts_with(tmp, prefix)) {
lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+ state = CONTENT_BLOCKQUOTE_END;
}
else {
- // error
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed blockquote, must use same prefix "
+ "as previous line(s): %s", prefix);
}
free(tmp);
tmp = NULL;
- state = CONTENT_BLOCKQUOTE_END;
}
if (!is_last)
break;
@@ -226,13 +252,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
tmp = b_strndup(src + start, end - start);
if (b_str_starts_with(tmp, prefix)) {
lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+ state = CONTENT_CODE_END;
}
else {
- // error
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed code block, must use same prefix "
+ "as previous line(s): '%s'", prefix);
}
free(tmp);
tmp = NULL;
- state = CONTENT_CODE_END;
}
if (!is_last)
break;
@@ -260,6 +288,151 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
}
break;
+ case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE:
+ if (c == d) {
+ state = CONTENT_HORIZONTAL_RULE;
+ break;
+ }
+ if (c == ' ' || c == '\t')
+ break;
+ prefix = b_strndup(src + start, current - start);
+ state = CONTENT_UNORDERED_LIST_START;
+ break;
+
+ case CONTENT_HORIZONTAL_RULE:
+ if (c == d) {
+ break;
+ }
+ if (c == '\n' || c == '\r' || is_last) {
+ b_string_append(rv, "<hr />\n");
+ state = CONTENT_START_LINE;
+ start = current;
+ d = '\0';
+ break;
+ }
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed horizontal rule, must use only '%c'", d);
+ break;
+
+ case CONTENT_UNORDERED_LIST_START:
+ if (c == '\n' || c == '\r' || is_last) {
+ end = is_last ? src_len : current;
+ tmp = b_strndup(src + start, end - start);
+ if (b_str_starts_with(tmp, prefix)) {
+ lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+ }
+ else {
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed unordered list, must use same prefix "
+ "as previous line(s): %s", prefix);
+ }
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_UNORDERED_LIST_END;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_UNORDERED_LIST_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ b_string_append(rv, "<ul>\n");
+ for (b_slist_t *l = lines; l != NULL; l = l->next)
+ b_string_append_printf(rv, "<li>%s</li>\n", l->data);
+ b_string_append(rv, "</ul>\n");
+ b_slist_free_full(lines, free);
+ lines = NULL;
+ free(prefix);
+ prefix = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else {
+ start = current;
+ state = CONTENT_UNORDERED_LIST_START;
+ }
+ break;
+
+ case CONTENT_ORDERED_LIST:
+ if (c >= '0' && c <= '9')
+ break;
+ if (c == '.') {
+ state = CONTENT_ORDERED_LIST_SPACE;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_ORDERED_LIST_SPACE:
+ if (c == ' ' || c == '\t')
+ break;
+ prefix_len = current - start;
+ state = CONTENT_ORDERED_LIST_START;
+
+ case CONTENT_ORDERED_LIST_START:
+ if (c == '\n' || c == '\r' || is_last) {
+ end = is_last ? src_len : current;
+ tmp = b_strndup(src + start, end - start);
+ if (strlen(tmp) >= prefix_len) {
+ tmp2 = b_strndup(tmp, prefix_len);
+ tmpv = b_str_split(tmp2, '.', 2);
+ free(tmp2);
+ tmp2 = NULL;
+ if (b_strv_length(tmpv) != 2) {
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed ordered list, prefix must be a "
+ "number, followed by a '.', followed by the content. "
+ "Content must be aligned with content from previous line(s)");
+ goto err_li;
+ }
+ for (unsigned int i = 0; tmpv[0][i] != '\0'; i++) {
+ if (!(tmpv[0][i] >= '0' && tmpv[0][i] <= '9')) {
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed ordered list, prefix must be a "
+ "number, followed by a '.', followed by the content. "
+ "Content must be aligned with content from previous line(s)");
+ goto err_li;
+ }
+ }
+ for (unsigned int i = 0; tmpv[1][i] != '\0'; i++) {
+ if (!(tmpv[1][i] == ' ' || tmpv[1][i] == '\t')) {
+ *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
+ current, "Malformed ordered list, prefix must be a "
+ "number, followed by a '.', followed by the content. "
+ "Content must be aligned with content from previous line(s)");
+ goto err_li;
+ }
+ }
+ lines = b_slist_append(lines, b_strdup(tmp + prefix_len));
+ state = CONTENT_ORDERED_LIST_END;
+err_li:
+ b_strv_free(tmpv);
+ tmpv = NULL;
+ }
+ free(tmp);
+ tmp = NULL;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_ORDERED_LIST_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ b_string_append(rv, "<ol>\n");
+ for (b_slist_t *l = lines; l != NULL; l = l->next)
+ b_string_append_printf(rv, "<li>%s</li>\n", l->data);
+ b_string_append(rv, "</ol>\n");
+ b_slist_free_full(lines, free);
+ lines = NULL;
+ free(prefix);
+ prefix = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else {
+ start = current;
+ state = CONTENT_ORDERED_LIST_START;
+ }
+ break;
+
case CONTENT_PARAGRAPH:
if (c == '\n' || c == '\r' || is_last) {
state = CONTENT_PARAGRAPH_END;