aboutsummaryrefslogtreecommitdiffstats
path: root/src/content-parser.c
diff options
context:
space:
mode:
authorRafael G. Martins <rafael@rafaelmartins.eng.br>2016-09-03 19:57:54 +0200
committerRafael G. Martins <rafael@rafaelmartins.eng.br>2016-09-03 20:28:55 +0200
commit74ca21a41bcb5a49d19e65c9ba88f1f864cb7095 (patch)
tree4774587e47abc0ff20453abbf714b63c36697f26 /src/content-parser.c
parent30ae5fd4f65f48009e6956e42ccc2c9d9ad80901 (diff)
downloadblogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.gz
blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.bz2
blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.zip
*: big code reorganization.
- source and tests are now splitted by target - utils lib is now called common still pending move error.c from blogc to common
Diffstat (limited to 'src/content-parser.c')
-rw-r--r--src/content-parser.c1264
1 files changed, 0 insertions, 1264 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
deleted file mode 100644
index ca0597b..0000000
--- a/src/content-parser.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- * blogc: A blog compiler.
- * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br>
- *
- * This program can be distributed under the terms of the BSD License.
- * See the file LICENSE.
- */
-
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "content-parser.h"
-#include "utils.h"
-
-// this is a half ass implementation of a markdown-like syntax. bugs are
-// expected. feel free to improve the parser and add new features.
-
-
-char*
-blogc_slugify(const char *str)
-{
- if (str == NULL)
- return NULL;
- char *new_str = sb_strdup(str);
- int diff = 'a' - 'A'; // just to avoid magic numbers
- for (size_t i = 0; new_str[i] != '\0'; i++) {
- if (new_str[i] >= 'a' && new_str[i] <= 'z')
- continue;
- if (new_str[i] >= '0' && new_str[i] <= '9')
- continue;
- if (new_str[i] >= 'A' && new_str[i] <= 'Z')
- new_str[i] += diff;
- else
- new_str[i] = '-';
- }
- return new_str;
-}
-
-
-static const char*
-htmlentities(char c)
-{
- switch (c) {
- case '&':
- return "&amp;";
- case '<':
- return "&lt;";
- case '>':
- return "&gt;";
- case '"':
- return "&quot;";
- case '\'':
- return "&#x27;";
- case '/':
- return "&#x2F;";
- }
- return NULL;
-}
-
-
-static void
-htmlentities_append(sb_string_t *str, char c)
-{
- const char *e = htmlentities(c);
- if (e == NULL)
- sb_string_append_c(str, c);
- else
- sb_string_append(str, e);
-}
-
-
-char*
-blogc_htmlentities(const char *str)
-{
- if (str == NULL)
- return NULL;
- sb_string_t *rv = sb_string_new();
- for (size_t i = 0; str[i] != '\0'; i++)
- htmlentities_append(rv, str[i]);
- return sb_string_free(rv, false);
-}
-
-
-char*
-blogc_fix_description(const char *paragraph)
-{
- if (paragraph == NULL)
- return NULL;
- sb_string_t *rv = sb_string_new();
- bool last = false;
- bool newline = false;
- char *tmp = NULL;
- size_t start = 0;
- size_t current = 0;
- while (true) {
- switch (paragraph[current]) {
- case '\0':
- last = true;
- case '\r':
- case '\n':
- if (newline)
- break;
- tmp = sb_strndup(paragraph + start, current - start);
- sb_string_append(rv, sb_str_strip(tmp));
- free(tmp);
- tmp = NULL;
- if (!last)
- sb_string_append_c(rv, ' ');
- start = current + 1;
- newline = true;
- break;
- default:
- newline = false;
- }
- if (last)
- break;
- current++;
- }
- tmp = blogc_htmlentities(sb_str_strip(rv->str));
- sb_string_free(rv, true);
- return tmp;
-}
-
-
-typedef enum {
- CONTENT_START_LINE = 1,
- CONTENT_EXCERPT,
- CONTENT_EXCERPT_END,
- CONTENT_HEADER,
- CONTENT_HEADER_TITLE_START,
- CONTENT_HEADER_TITLE,
- CONTENT_HTML,
- CONTENT_HTML_END,
- CONTENT_BLOCKQUOTE,
- CONTENT_BLOCKQUOTE_START,
- CONTENT_BLOCKQUOTE_END,
- CONTENT_CODE,
- CONTENT_CODE_START,
- CONTENT_CODE_END,
- CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE,
- CONTENT_HORIZONTAL_RULE,
- CONTENT_UNORDERED_LIST_START,
- CONTENT_UNORDERED_LIST_END,
- CONTENT_ORDERED_LIST,
- CONTENT_ORDERED_LIST_SPACE,
- CONTENT_ORDERED_LIST_START,
- CONTENT_ORDERED_LIST_END,
- CONTENT_PARAGRAPH,
- CONTENT_PARAGRAPH_END,
-} blogc_content_parser_state_t;
-
-
-typedef enum {
- CONTENT_INLINE_START = 1,
- CONTENT_INLINE_ASTERISK,
- CONTENT_INLINE_ASTERISK_DOUBLE,
- CONTENT_INLINE_UNDERSCORE,
- CONTENT_INLINE_UNDERSCORE_DOUBLE,
- CONTENT_INLINE_BACKTICKS,
- CONTENT_INLINE_BACKTICKS_DOUBLE,
- CONTENT_INLINE_LINK_START,
- CONTENT_INLINE_LINK_AUTO,
- CONTENT_INLINE_LINK_CONTENT,
- CONTENT_INLINE_LINK_URL_START,
- CONTENT_INLINE_LINK_URL,
- CONTENT_INLINE_IMAGE_START,
- CONTENT_INLINE_IMAGE_ALT,
- CONTENT_INLINE_IMAGE_URL_START,
- CONTENT_INLINE_IMAGE_URL,
- CONTENT_INLINE_ENDASH,
- CONTENT_INLINE_EMDASH,
- CONTENT_INLINE_LINE_BREAK_START,
- CONTENT_INLINE_LINE_BREAK,
-} blogc_content_parser_inline_state_t;
-
-
-static char*
-blogc_content_parse_inline_internal(const char *src, size_t src_len)
-{
- size_t current = 0;
- size_t start = 0;
- size_t count = 0;
-
- const char *tmp = NULL;
- char *tmp2 = NULL;
- char *tmp3 = NULL;
-
- size_t start_link = 0;
- char *link1 = NULL;
-
- sb_string_t *rv = sb_string_new();
-
- blogc_content_parser_inline_state_t state = CONTENT_INLINE_START;
-
- while (current < src_len) {
- char c = src[current];
- bool is_last = current == src_len - 1;
-
- switch (state) {
- case CONTENT_INLINE_START:
- if (is_last) {
- htmlentities_append(rv, c);
- break;
- }
- if (c == '\\') {
- htmlentities_append(rv, src[++current]);
- break;
- }
- if (c == '*') {
- state = CONTENT_INLINE_ASTERISK;
- break;
- }
- if (c == '_') {
- state = CONTENT_INLINE_UNDERSCORE;
- break;
- }
- if (c == '`') {
- state = CONTENT_INLINE_BACKTICKS;
- break;
- }
- if (c == '[') {
- state = CONTENT_INLINE_LINK_START;
- break;
- }
- if (c == '!') {
- state = CONTENT_INLINE_IMAGE_START;
- break;
- }
- if (c == '-') {
- state = CONTENT_INLINE_ENDASH;
- break;
- }
- if (c == ' ') {
- state = CONTENT_INLINE_LINE_BREAK_START;
- break;
- }
- htmlentities_append(rv, c);
- break;
-
- case CONTENT_INLINE_ASTERISK:
- if (c == '*') {
- state = CONTENT_INLINE_ASTERISK_DOUBLE;
- break;
- }
- tmp = sb_str_find(src + current, '*');
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '*');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp2 = blogc_content_parse_inline_internal(
- src + current, (tmp - src) - current);
- sb_string_append_printf(rv, "<em>%s</em>", tmp2);
- current = tmp - src;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_ASTERISK_DOUBLE:
- tmp = src + current;
- do {
- tmp = sb_str_find(tmp, '*');
- if (((tmp - src) < src_len) && *(tmp + 1) == '*') {
- break;
- }
- tmp++;
- } while (tmp != NULL && (tmp - src) < src_len);
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '*');
- sb_string_append_c(rv, '*');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp2 = blogc_content_parse_inline_internal(
- src + current, (tmp - src) - current);
- sb_string_append_printf(rv, "<strong>%s</strong>", tmp2);
- current = tmp - src + 1;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_UNDERSCORE:
- if (c == '_') {
- state = CONTENT_INLINE_UNDERSCORE_DOUBLE;
- break;
- }
- tmp = sb_str_find(src + current, '_');
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '_');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp2 = blogc_content_parse_inline_internal(
- src + current, (tmp - src) - current);
- sb_string_append_printf(rv, "<em>%s</em>", tmp2);
- current = tmp - src;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_UNDERSCORE_DOUBLE:
- tmp = src + current;
- do {
- tmp = sb_str_find(tmp, '_');
- if (((tmp - src) < src_len) && *(tmp + 1) == '_') {
- break;
- }
- tmp++;
- } while (tmp != NULL && (tmp - src) < src_len);
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '_');
- sb_string_append_c(rv, '_');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp2 = blogc_content_parse_inline_internal(
- src + current, (tmp - src) - current);
- sb_string_append_printf(rv, "<strong>%s</strong>", tmp2);
- current = tmp - src + 1;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_BACKTICKS:
- if (c == '`') {
- state = CONTENT_INLINE_BACKTICKS_DOUBLE;
- break;
- }
- tmp = sb_str_find(src + current, '`');
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '`');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp3 = sb_strndup(src + current, (tmp - src) - current);
- tmp2 = blogc_htmlentities(tmp3);
- free(tmp3);
- tmp3 = NULL;
- sb_string_append(rv, "<code>");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append(rv, "</code>");
- current = tmp - src;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_BACKTICKS_DOUBLE:
- tmp = src + current;
- do {
- tmp = sb_str_find(tmp, '`');
- if (((tmp - src) < src_len) && *(tmp + 1) == '`') {
- break;
- }
- tmp++;
- } while (tmp != NULL && (tmp - src) < src_len);
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '`');
- sb_string_append_c(rv, '`');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp3 = sb_strndup(src + current, (tmp - src) - current);
- tmp2 = blogc_htmlentities(tmp3);
- free(tmp3);
- tmp3 = NULL;
- sb_string_append(rv, "<code>");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append(rv, "</code>");
- current = tmp - src + 1;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_LINK_START:
- if (c == '[') {
- state = CONTENT_INLINE_LINK_AUTO;
- break;
- }
- start_link = current;
- count = 1;
- state = CONTENT_INLINE_LINK_CONTENT;
- break;
-
- case CONTENT_INLINE_LINK_AUTO:
- tmp = src + current;
- do {
- tmp = sb_str_find(tmp, ']');
- if (((tmp - src) < src_len) && *(tmp + 1) == ']') {
- break;
- }
- tmp++;
- } while (tmp != NULL && (tmp - src) < src_len);
- if (tmp == NULL || ((tmp - src) >= src_len)) {
- sb_string_append_c(rv, '[');
- sb_string_append_c(rv, '[');
- state = CONTENT_INLINE_START;
- continue;
- }
- tmp2 = sb_strndup(src + current, (tmp - src) - current);
- sb_string_append(rv, "<a href=\"");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append(rv, "\">");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append(rv, "</a>");
- current = tmp - src + 1;
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_INLINE_START;
- break;
-
- case CONTENT_INLINE_LINK_CONTENT:
- if (c == '\\') {
- current++;
- break;
- }
- if (c == '[') { // links can be nested :/
- count++;
- break;
- }
- if (c == ']') {
- if (--count == 0) {
- link1 = sb_strndup(src + start_link, current - start_link);
- state = CONTENT_INLINE_LINK_URL_START;
- }
- }
- break;
-
- case CONTENT_INLINE_LINK_URL_START:
- if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
- break;
- if (c == '(') {
- state = CONTENT_INLINE_LINK_URL;
- start = current + 1;
- break;
- }
- sb_string_append_c(rv, '[');
- state = CONTENT_INLINE_START;
- current = start_link;
- start_link = 0;
- continue;
-
- case CONTENT_INLINE_LINK_URL:
- if (c == '\\') {
- current++;
- break;
- }
- if (c == ')') {
- tmp2 = sb_strndup(src + start, current - start);
- tmp3 = blogc_content_parse_inline(link1);
- free(link1);
- link1 = NULL;
- sb_string_append(rv, "<a href=\"");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append_printf(rv, "\">%s</a>", tmp3);
- free(tmp2);
- tmp2 = NULL;
- free(tmp3);
- tmp3 = NULL;
- state = CONTENT_INLINE_START;
- break;
- }
- break;
-
- case CONTENT_INLINE_IMAGE_START:
- // we use the same variables used for links, because why not?
- if (c == '[') {
- state = CONTENT_INLINE_IMAGE_ALT;
- start_link = current + 1;
- break;
- }
- sb_string_append_c(rv, '!');
- state = CONTENT_INLINE_START;
- continue;
-
- case CONTENT_INLINE_IMAGE_ALT:
- if (c == '\\') {
- current++;
- break;
- }
- if (c == ']') {
- link1 = sb_strndup(src + start_link, current - start_link);
- state = CONTENT_INLINE_IMAGE_URL_START;
- }
- break;
-
- case CONTENT_INLINE_IMAGE_URL_START:
- if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
- break;
- if (c == '(') {
- state = CONTENT_INLINE_IMAGE_URL;
- start = current + 1;
- break;
- }
- sb_string_append_c(rv, '!');
- sb_string_append_c(rv, '[');
- state = CONTENT_INLINE_START;
- current = start_link;
- start_link = 0;
- continue;
-
- case CONTENT_INLINE_IMAGE_URL:
- if (c == '\\') {
- current++;
- break;
- }
- if (c == ')') {
- tmp2 = sb_strndup(src + start, current - start);
- sb_string_append(rv, "<img src=\"");
- sb_string_append_escaped(rv, tmp2);
- sb_string_append(rv, "\" alt=\"");
- sb_string_append_escaped(rv, link1);
- sb_string_append(rv, "\">");
- free(tmp2);
- tmp2 = NULL;
- free(link1);
- link1 = NULL;
- state = CONTENT_INLINE_START;
- break;
- }
- break;
-
- case CONTENT_INLINE_ENDASH:
- if (c == '-') {
- if (is_last) {
- sb_string_append(rv, "&ndash;");
- state = CONTENT_INLINE_START; // wat
- break;
- }
- state = CONTENT_INLINE_EMDASH;
- break;
- }
- sb_string_append_c(rv, '-');
- state = CONTENT_INLINE_START;
- continue;
-
- case CONTENT_INLINE_EMDASH:
- if (c == '-') {
- sb_string_append(rv, "&mdash;");
- state = CONTENT_INLINE_START;
- break;
- }
- sb_string_append(rv, "&ndash;");
- state = CONTENT_INLINE_START;
- continue;
-
- case CONTENT_INLINE_LINE_BREAK_START:
- if (c == ' ') {
- if (is_last) {
- sb_string_append(rv, "<br />");
- state = CONTENT_INLINE_START; // wat
- break;
- }
- count = 2;
- state = CONTENT_INLINE_LINE_BREAK;
- break;
- }
- sb_string_append_c(rv, ' ');
- state = CONTENT_INLINE_START;
- continue;
-
- case CONTENT_INLINE_LINE_BREAK:
- if (c == ' ') {
- if (is_last) {
- sb_string_append(rv, "<br />");
- state = CONTENT_INLINE_START; // wat
- break;
- }
- count++;
- break;
- }
- if (c == '\n' || c == '\r') {
- sb_string_append_printf(rv, "<br />%c", c);
- state = CONTENT_INLINE_START;
- break;
- }
- for (size_t i = 0; i < count; i++)
- sb_string_append_c(rv, ' ');
- state = CONTENT_INLINE_START;
- continue;
- }
- current++;
- }
-
- switch (state) {
-
- // if after the end of the loop we are on any of the following states,
- // we must call the parser again, from start_link
- case CONTENT_INLINE_IMAGE_START:
- case CONTENT_INLINE_IMAGE_ALT:
- case CONTENT_INLINE_IMAGE_URL_START:
- case CONTENT_INLINE_IMAGE_URL:
- sb_string_append_c(rv, '!');
-
- case CONTENT_INLINE_LINK_CONTENT:
- case CONTENT_INLINE_LINK_URL_START:
- case CONTENT_INLINE_LINK_URL:
- tmp2 = blogc_content_parse_inline(src + start_link);
- sb_string_append_c(rv, '[');
- sb_string_append_escaped(rv, tmp2); // no need to free, as it wil be done below.
- break;
-
- // add all the other states here explicitly, so the compiler helps us
- // not missing any new state that should be handled.
- case CONTENT_INLINE_START:
- case CONTENT_INLINE_ASTERISK:
- case CONTENT_INLINE_ASTERISK_DOUBLE:
- case CONTENT_INLINE_UNDERSCORE:
- case CONTENT_INLINE_UNDERSCORE_DOUBLE:
- case CONTENT_INLINE_BACKTICKS:
- case CONTENT_INLINE_BACKTICKS_DOUBLE:
- case CONTENT_INLINE_LINK_START:
- case CONTENT_INLINE_LINK_AUTO:
- case CONTENT_INLINE_ENDASH:
- case CONTENT_INLINE_EMDASH:
- case CONTENT_INLINE_LINE_BREAK_START:
- case CONTENT_INLINE_LINE_BREAK:
- break;
- }
-
- free(tmp2);
- free(tmp3);
- free(link1);
-
- return sb_string_free(rv, false);
-}
-
-
-char*
-blogc_content_parse_inline(const char *src)
-{
- return blogc_content_parse_inline_internal(src, strlen(src));
-}
-
-
-bool
-blogc_is_ordered_list_item(const char *str, size_t prefix_len)
-{
- if (str == NULL)
- return false;
-
- if (strlen(str) < 2)
- return false;
-
- size_t i;
-
- for (i = 0; str[i] >= '0' && str[i] <= '9'; i++);
-
- if (i == 0)
- return false;
- if (str[i] != '.')
- return false;
-
- for (i++; i < prefix_len && (str[i] == ' ' || str[i] == '\t'); i++);
-
- if (str[i] == '\0')
- return false;
-
- return i == prefix_len;
-}
-
-
-char*
-blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
-{
- // src is always nul-terminated.
- size_t src_len = strlen(src);
-
- size_t current = 0;
- size_t start = 0;
- size_t start2 = 0;
- size_t end = 0;
- size_t eend = 0;
- size_t real_end = 0;
-
- unsigned int header_level = 0;
- char *prefix = NULL;
- size_t prefix_len = 0;
- char *tmp = NULL;
- char *tmp2 = NULL;
- char *parsed = NULL;
- char *slug = NULL;
-
- // this isn't empty because we need some reasonable default value in the
- // unlikely case that we need to print some line ending before evaluating
- // the "real" value.
- char line_ending[3] = "\n";
- bool line_ending_found = false;
-
- char d = '\0';
-
- sb_slist_t *lines = NULL;
- sb_slist_t *lines2 = NULL;
-
- sb_string_t *rv = sb_string_new();
- sb_string_t *tmp_str = NULL;
-
- blogc_content_parser_state_t state = CONTENT_START_LINE;
-
- while (current < src_len) {
- char c = src[current];
- bool is_last = current == src_len - 1;
-
- if (c == '\n' || c == '\r') {
- if ((current + 1) < src_len) {
- if ((c == '\n' && src[current + 1] == '\r') ||
- (c == '\r' && src[current + 1] == '\n'))
- {
- if (!line_ending_found) {
- line_ending[0] = c;
- line_ending[1] = src[current + 1];
- line_ending[2] = '\0';
- line_ending_found = true;
- }
- real_end = current;
- c = src[++current];
- is_last = current == src_len - 1;
- }
- }
- if (!line_ending_found) {
- line_ending[0] = c;
- line_ending[1] = '\0';
- line_ending_found = true;
- }
- }
-
- switch (state) {
-
- case CONTENT_START_LINE:
- if (c == '\n' || c == '\r' || is_last)
- break;
- start = current;
- if (c == '.') {
- if (end_excerpt != NULL) {
- eend = rv->len; // fuck it
- state = CONTENT_EXCERPT;
- break;
- }
- }
- if (c == '#') {
- header_level = 1;
- state = CONTENT_HEADER;
- break;
- }
- if (c == '*' || c == '+' || c == '-') {
- start2 = current;
- state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE;
- d = c;
- break;
- }
- if (c >= '0' && c <= '9') {
- start2 = current;
- state = CONTENT_ORDERED_LIST;
- break;
- }
- if (c == ' ' || c == '\t') {
- start2 = current;
- state = CONTENT_CODE;
- break;
- }
- if (c == '<') {
- state = CONTENT_HTML;
- break;
- }
- if (c == '>') {
- state = CONTENT_BLOCKQUOTE;
- start2 = current;
- break;
- }
- state = CONTENT_PARAGRAPH;
- break;
-
- case CONTENT_EXCERPT:
- if (end_excerpt != NULL) {
- if (c == '.')
- break;
- if (c == '\n' || c == '\r') {
- state = CONTENT_EXCERPT_END;
- break;
- }
- }
- eend = 0;
- state = CONTENT_PARAGRAPH;
- break;
-
- case CONTENT_EXCERPT_END:
- if (end_excerpt != NULL) {
- if (c == '\n' || c == '\r') {
- *end_excerpt = eend;
- state = CONTENT_START_LINE;
- break;
- }
- }
- eend = 0;
- state = CONTENT_PARAGRAPH_END;
- break;
-
- case CONTENT_HEADER:
- if (c == '#') {
- header_level += 1;
- break;
- }
- if (c == ' ' || c == '\t') {
- state = CONTENT_HEADER_TITLE_START;
- break;
- }
- state = CONTENT_PARAGRAPH;
- break;
-
- case CONTENT_HEADER_TITLE_START:
- if (c == ' ' || c == '\t')
- break;
- start = current;
- if (c != '\n' && c != '\r') {
- state = CONTENT_HEADER_TITLE;
- break;
- }
-
- case CONTENT_HEADER_TITLE:
- if (c == '\n' || c == '\r' || is_last) {
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- tmp = sb_strndup(src + start, end - start);
- parsed = blogc_content_parse_inline(tmp);
- slug = blogc_slugify(tmp);
- if (slug == NULL)
- sb_string_append_printf(rv, "<h%d>%s</h%d>%s",
- header_level, parsed, header_level, line_ending);
- else
- sb_string_append_printf(rv, "<h%d id=\"%s\">%s</h%d>%s",
- header_level, slug, parsed, header_level,
- line_ending);
- free(slug);
- free(parsed);
- parsed = NULL;
- free(tmp);
- tmp = NULL;
- state = CONTENT_START_LINE;
- start = current;
- }
- break;
-
- case CONTENT_HTML:
- if (c == '\n' || c == '\r' || is_last) {
- state = CONTENT_HTML_END;
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- }
- if (!is_last)
- break;
-
- case CONTENT_HTML_END:
- if (c == '\n' || c == '\r' || is_last) {
- tmp = sb_strndup(src + start, end - start);
- sb_string_append_printf(rv, "%s%s", tmp, line_ending);
- free(tmp);
- tmp = NULL;
- state = CONTENT_START_LINE;
- start = current;
- }
- else
- state = CONTENT_HTML;
- break;
-
- case CONTENT_BLOCKQUOTE:
- if (c == ' ' || c == '\t')
- break;
- prefix = sb_strndup(src + start, current - start);
- state = CONTENT_BLOCKQUOTE_START;
- break;
-
- case CONTENT_BLOCKQUOTE_START:
- if (c == '\n' || c == '\r' || is_last) {
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- tmp = sb_strndup(src + start2, end - start2);
- if (sb_str_starts_with(tmp, prefix)) {
- lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix)));
- state = CONTENT_BLOCKQUOTE_END;
- }
- else {
- state = CONTENT_PARAGRAPH;
- free(prefix);
- prefix = NULL;
- sb_slist_free_full(lines, free);
- lines = NULL;
- if (is_last) {
- free(tmp);
- tmp = NULL;
- continue;
- }
- }
- free(tmp);
- tmp = NULL;
- }
- if (!is_last)
- break;
-
- case CONTENT_BLOCKQUOTE_END:
- if (c == '\n' || c == '\r' || is_last) {
- tmp_str = sb_string_new();
- for (sb_slist_t *l = lines; l != NULL; l = l->next)
- sb_string_append_printf(tmp_str, "%s%s", l->data,
- line_ending);
- // do not propagate description to blockquote parsing,
- // because we just want paragraphs from first level of
- // content.
- tmp = blogc_content_parse(tmp_str->str, NULL, NULL);
- sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s",
- tmp, line_ending);
- free(tmp);
- tmp = NULL;
- sb_string_free(tmp_str, true);
- tmp_str = NULL;
- sb_slist_free_full(lines, free);
- lines = NULL;
- free(prefix);
- prefix = NULL;
- state = CONTENT_START_LINE;
- start2 = current;
- }
- else {
- start2 = current;
- state = CONTENT_BLOCKQUOTE_START;
- }
- break;
-
- case CONTENT_CODE:
- if (c == ' ' || c == '\t')
- break;
- prefix = sb_strndup(src + start, current - start);
- state = CONTENT_CODE_START;
- break;
-
- case CONTENT_CODE_START:
- if (c == '\n' || c == '\r' || is_last) {
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- tmp = sb_strndup(src + start2, end - start2);
- if (sb_str_starts_with(tmp, prefix)) {
- lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix)));
- state = CONTENT_CODE_END;
- }
- else {
- state = CONTENT_PARAGRAPH;
- free(prefix);
- prefix = NULL;
- sb_slist_free_full(lines, free);
- lines = NULL;
- free(tmp);
- tmp = NULL;
- if (is_last)
- continue;
- break;
- }
- free(tmp);
- tmp = NULL;
- }
- if (!is_last)
- break;
-
- case CONTENT_CODE_END:
- if (c == '\n' || c == '\r' || is_last) {
- sb_string_append(rv, "<pre><code>");
- for (sb_slist_t *l = lines; l != NULL; l = l->next) {
- char *tmp_line = blogc_htmlentities(l->data);
- if (l->next == NULL)
- sb_string_append_printf(rv, "%s", tmp_line);
- else
- sb_string_append_printf(rv, "%s%s", tmp_line,
- line_ending);
- free(tmp_line);
- }
- sb_string_append_printf(rv, "</code></pre>%s", line_ending);
- sb_slist_free_full(lines, free);
- lines = NULL;
- free(prefix);
- prefix = NULL;
- state = CONTENT_START_LINE;
- start2 = current;
- }
- else {
- start2 = current;
- state = CONTENT_CODE_START;
- }
- break;
-
- case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE:
- if (c == d) {
- state = CONTENT_HORIZONTAL_RULE;
- if (is_last)
- continue;
- break;
- }
- if (c == ' ' || c == '\t')
- break;
- prefix = sb_strndup(src + start, current - start);
- state = CONTENT_UNORDERED_LIST_START;
- break;
-
- case CONTENT_HORIZONTAL_RULE:
- if (c == d && !is_last) {
- break;
- }
- if (c == '\n' || c == '\r' || is_last) {
- sb_string_append_printf(rv, "<hr />%s", line_ending);
- state = CONTENT_START_LINE;
- start = current;
- d = '\0';
- break;
- }
- state = CONTENT_PARAGRAPH;
- break;
-
- case CONTENT_UNORDERED_LIST_START:
- if (c == '\n' || c == '\r' || is_last) {
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- tmp = sb_strndup(src + start2, end - start2);
- tmp2 = sb_strdup_printf("%-*s", strlen(prefix), "");
- if (sb_str_starts_with(tmp, prefix)) {
- if (lines2 != NULL) {
- tmp_str = sb_string_new();
- for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
- if (l->next == NULL)
- sb_string_append_printf(tmp_str, "%s", l->data);
- else
- sb_string_append_printf(tmp_str, "%s%s", l->data,
- line_ending);
- }
- sb_slist_free_full(lines2, free);
- lines2 = NULL;
- parsed = blogc_content_parse_inline(tmp_str->str);
- sb_string_free(tmp_str, true);
- lines = sb_slist_append(lines, sb_strdup(parsed));
- free(parsed);
- parsed = NULL;
- }
- lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix)));
- }
- else if (sb_str_starts_with(tmp, tmp2)) {
- lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix)));
- }
- else {
- state = CONTENT_PARAGRAPH_END;
- free(tmp);
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- free(prefix);
- prefix = NULL;
- sb_slist_free_full(lines, free);
- sb_slist_free_full(lines2, free);
- lines = NULL;
- if (is_last)
- continue;
- break;
- }
- free(tmp);
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_UNORDERED_LIST_END;
- }
- if (!is_last)
- break;
-
- case CONTENT_UNORDERED_LIST_END:
- if (c == '\n' || c == '\r' || is_last) {
- if (lines2 != NULL) {
- // FIXME: avoid repeting the code below
- tmp_str = sb_string_new();
- for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
- if (l->next == NULL)
- sb_string_append_printf(tmp_str, "%s", l->data);
- else
- sb_string_append_printf(tmp_str, "%s%s", l->data,
- line_ending);
- }
- sb_slist_free_full(lines2, free);
- lines2 = NULL;
- parsed = blogc_content_parse_inline(tmp_str->str);
- sb_string_free(tmp_str, true);
- lines = sb_slist_append(lines, sb_strdup(parsed));
- free(parsed);
- parsed = NULL;
- }
- sb_string_append_printf(rv, "<ul>%s", line_ending);
- for (sb_slist_t *l = lines; l != NULL; l = l->next)
- sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
- line_ending);
- sb_string_append_printf(rv, "</ul>%s", line_ending);
- sb_slist_free_full(lines, free);
- lines = NULL;
- free(prefix);
- prefix = NULL;
- state = CONTENT_START_LINE;
- start2 = current;
- }
- else {
- start2 = current;
- state = CONTENT_UNORDERED_LIST_START;
- }
- break;
-
- case CONTENT_ORDERED_LIST:
- if (c >= '0' && c <= '9')
- break;
- if (c == '.') {
- state = CONTENT_ORDERED_LIST_SPACE;
- break;
- }
- state = CONTENT_PARAGRAPH;
- if (is_last)
- continue;
- break;
-
- case CONTENT_ORDERED_LIST_SPACE:
- if (c == ' ' || c == '\t')
- break;
- prefix_len = current - start;
- state = CONTENT_ORDERED_LIST_START;
- if (c != '\n' && c != '\r' && !is_last)
- break;
-
- case CONTENT_ORDERED_LIST_START:
- if (c == '\n' || c == '\r' || is_last) {
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- tmp = sb_strndup(src + start2, end - start2);
- tmp2 = sb_strdup_printf("%-*s", prefix_len, "");
- if (blogc_is_ordered_list_item(tmp, prefix_len)) {
- if (lines2 != NULL) {
- tmp_str = sb_string_new();
- for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
- if (l->next == NULL)
- sb_string_append_printf(tmp_str, "%s", l->data);
- else
- sb_string_append_printf(tmp_str, "%s%s", l->data,
- line_ending);
- }
- sb_slist_free_full(lines2, free);
- lines2 = NULL;
- parsed = blogc_content_parse_inline(tmp_str->str);
- sb_string_free(tmp_str, true);
- lines = sb_slist_append(lines, sb_strdup(parsed));
- free(parsed);
- parsed = NULL;
- }
- lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len));
- }
- else if (sb_str_starts_with(tmp, tmp2)) {
- lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len));
- }
- else {
- state = CONTENT_PARAGRAPH_END;
- free(tmp);
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- free(parsed);
- parsed = NULL;
- sb_slist_free_full(lines, free);
- sb_slist_free_full(lines2, free);
- lines = NULL;
- if (is_last)
- continue;
- break;
- }
- free(tmp);
- tmp = NULL;
- free(tmp2);
- tmp2 = NULL;
- state = CONTENT_ORDERED_LIST_END;
- }
- if (!is_last)
- break;
-
- case CONTENT_ORDERED_LIST_END:
- if (c == '\n' || c == '\r' || is_last) {
- if (lines2 != NULL) {
- // FIXME: avoid repeting the code below
- tmp_str = sb_string_new();
- for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
- if (l->next == NULL)
- sb_string_append_printf(tmp_str, "%s", l->data);
- else
- sb_string_append_printf(tmp_str, "%s%s", l->data,
- line_ending);
- }
- sb_slist_free_full(lines2, free);
- lines2 = NULL;
- parsed = blogc_content_parse_inline(tmp_str->str);
- sb_string_free(tmp_str, true);
- lines = sb_slist_append(lines, sb_strdup(parsed));
- free(parsed);
- parsed = NULL;
- }
- sb_string_append_printf(rv, "<ol>%s", line_ending);
- for (sb_slist_t *l = lines; l != NULL; l = l->next)
- sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
- line_ending);
- sb_string_append_printf(rv, "</ol>%s", line_ending);
- sb_slist_free_full(lines, free);
- lines = NULL;
- free(prefix);
- prefix = NULL;
- state = CONTENT_START_LINE;
- start2 = current;
- }
- else {
- start2 = current;
- state = CONTENT_ORDERED_LIST_START;
- }
- break;
-
- case CONTENT_PARAGRAPH:
- if (c == '\n' || c == '\r' || is_last) {
- state = CONTENT_PARAGRAPH_END;
- end = is_last && c != '\n' && c != '\r' ? src_len :
- (real_end != 0 ? real_end : current);
- }
- if (!is_last)
- break;
-
- case CONTENT_PARAGRAPH_END:
- if (c == '\n' || c == '\r' || is_last) {
- tmp = sb_strndup(src + start, end - start);
- if (description != NULL && *description == NULL)
- *description = blogc_fix_description(tmp);
- parsed = blogc_content_parse_inline(tmp);
- sb_string_append_printf(rv, "<p>%s</p>%s", parsed,
- line_ending);
- free(parsed);
- parsed = NULL;
- free(tmp);
- tmp = NULL;
- state = CONTENT_START_LINE;
- start = current;
- }
- else
- state = CONTENT_PARAGRAPH;
- break;
-
- }
-
- current++;
- }
-
- return sb_string_free(rv, false);
-}