diff options
author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2016-09-03 19:57:54 +0200 |
---|---|---|
committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2016-09-03 20:28:55 +0200 |
commit | 74ca21a41bcb5a49d19e65c9ba88f1f864cb7095 (patch) | |
tree | 4774587e47abc0ff20453abbf714b63c36697f26 /src/content-parser.c | |
parent | 30ae5fd4f65f48009e6956e42ccc2c9d9ad80901 (diff) | |
download | blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.gz blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.bz2 blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.zip |
*: big code reorganization.
- source and tests are now splitted by target
- utils lib is now called common
still pending move error.c from blogc to common
Diffstat (limited to 'src/content-parser.c')
-rw-r--r-- | src/content-parser.c | 1264 |
1 files changed, 0 insertions, 1264 deletions
diff --git a/src/content-parser.c b/src/content-parser.c deleted file mode 100644 index ca0597b..0000000 --- a/src/content-parser.c +++ /dev/null @@ -1,1264 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> - -#include "content-parser.h" -#include "utils.h" - -// this is a half ass implementation of a markdown-like syntax. bugs are -// expected. feel free to improve the parser and add new features. - - -char* -blogc_slugify(const char *str) -{ - if (str == NULL) - return NULL; - char *new_str = sb_strdup(str); - int diff = 'a' - 'A'; // just to avoid magic numbers - for (size_t i = 0; new_str[i] != '\0'; i++) { - if (new_str[i] >= 'a' && new_str[i] <= 'z') - continue; - if (new_str[i] >= '0' && new_str[i] <= '9') - continue; - if (new_str[i] >= 'A' && new_str[i] <= 'Z') - new_str[i] += diff; - else - new_str[i] = '-'; - } - return new_str; -} - - -static const char* -htmlentities(char c) -{ - switch (c) { - case '&': - return "&"; - case '<': - return "<"; - case '>': - return ">"; - case '"': - return """; - case '\'': - return "'"; - case '/': - return "/"; - } - return NULL; -} - - -static void -htmlentities_append(sb_string_t *str, char c) -{ - const char *e = htmlentities(c); - if (e == NULL) - sb_string_append_c(str, c); - else - sb_string_append(str, e); -} - - -char* -blogc_htmlentities(const char *str) -{ - if (str == NULL) - return NULL; - sb_string_t *rv = sb_string_new(); - for (size_t i = 0; str[i] != '\0'; i++) - htmlentities_append(rv, str[i]); - return sb_string_free(rv, false); -} - - -char* -blogc_fix_description(const char *paragraph) -{ - if (paragraph == NULL) - return NULL; - sb_string_t *rv = sb_string_new(); - bool last = false; - bool newline = false; - char *tmp = NULL; - size_t start = 0; - size_t current = 0; - while (true) { - switch (paragraph[current]) { - case '\0': - last = true; - case '\r': - case '\n': - if (newline) - break; - tmp = sb_strndup(paragraph + start, current - start); - sb_string_append(rv, sb_str_strip(tmp)); - free(tmp); - tmp = NULL; - if (!last) - sb_string_append_c(rv, ' '); - start = current + 1; - newline = true; - break; - default: - newline = false; - } - if (last) - break; - current++; - } - tmp = blogc_htmlentities(sb_str_strip(rv->str)); - sb_string_free(rv, true); - return tmp; -} - - -typedef enum { - CONTENT_START_LINE = 1, - CONTENT_EXCERPT, - CONTENT_EXCERPT_END, - CONTENT_HEADER, - CONTENT_HEADER_TITLE_START, - CONTENT_HEADER_TITLE, - CONTENT_HTML, - CONTENT_HTML_END, - CONTENT_BLOCKQUOTE, - CONTENT_BLOCKQUOTE_START, - CONTENT_BLOCKQUOTE_END, - CONTENT_CODE, - CONTENT_CODE_START, - CONTENT_CODE_END, - CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE, - CONTENT_HORIZONTAL_RULE, - CONTENT_UNORDERED_LIST_START, - CONTENT_UNORDERED_LIST_END, - CONTENT_ORDERED_LIST, - CONTENT_ORDERED_LIST_SPACE, - CONTENT_ORDERED_LIST_START, - CONTENT_ORDERED_LIST_END, - CONTENT_PARAGRAPH, - CONTENT_PARAGRAPH_END, -} blogc_content_parser_state_t; - - -typedef enum { - CONTENT_INLINE_START = 1, - CONTENT_INLINE_ASTERISK, - CONTENT_INLINE_ASTERISK_DOUBLE, - CONTENT_INLINE_UNDERSCORE, - CONTENT_INLINE_UNDERSCORE_DOUBLE, - CONTENT_INLINE_BACKTICKS, - CONTENT_INLINE_BACKTICKS_DOUBLE, - CONTENT_INLINE_LINK_START, - CONTENT_INLINE_LINK_AUTO, - CONTENT_INLINE_LINK_CONTENT, - CONTENT_INLINE_LINK_URL_START, - CONTENT_INLINE_LINK_URL, - CONTENT_INLINE_IMAGE_START, - CONTENT_INLINE_IMAGE_ALT, - CONTENT_INLINE_IMAGE_URL_START, - CONTENT_INLINE_IMAGE_URL, - CONTENT_INLINE_ENDASH, - CONTENT_INLINE_EMDASH, - CONTENT_INLINE_LINE_BREAK_START, - CONTENT_INLINE_LINE_BREAK, -} blogc_content_parser_inline_state_t; - - -static char* -blogc_content_parse_inline_internal(const char *src, size_t src_len) -{ - size_t current = 0; - size_t start = 0; - size_t count = 0; - - const char *tmp = NULL; - char *tmp2 = NULL; - char *tmp3 = NULL; - - size_t start_link = 0; - char *link1 = NULL; - - sb_string_t *rv = sb_string_new(); - - blogc_content_parser_inline_state_t state = CONTENT_INLINE_START; - - while (current < src_len) { - char c = src[current]; - bool is_last = current == src_len - 1; - - switch (state) { - case CONTENT_INLINE_START: - if (is_last) { - htmlentities_append(rv, c); - break; - } - if (c == '\\') { - htmlentities_append(rv, src[++current]); - break; - } - if (c == '*') { - state = CONTENT_INLINE_ASTERISK; - break; - } - if (c == '_') { - state = CONTENT_INLINE_UNDERSCORE; - break; - } - if (c == '`') { - state = CONTENT_INLINE_BACKTICKS; - break; - } - if (c == '[') { - state = CONTENT_INLINE_LINK_START; - break; - } - if (c == '!') { - state = CONTENT_INLINE_IMAGE_START; - break; - } - if (c == '-') { - state = CONTENT_INLINE_ENDASH; - break; - } - if (c == ' ') { - state = CONTENT_INLINE_LINE_BREAK_START; - break; - } - htmlentities_append(rv, c); - break; - - case CONTENT_INLINE_ASTERISK: - if (c == '*') { - state = CONTENT_INLINE_ASTERISK_DOUBLE; - break; - } - tmp = sb_str_find(src + current, '*'); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '*'); - state = CONTENT_INLINE_START; - continue; - } - tmp2 = blogc_content_parse_inline_internal( - src + current, (tmp - src) - current); - sb_string_append_printf(rv, "<em>%s</em>", tmp2); - current = tmp - src; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_ASTERISK_DOUBLE: - tmp = src + current; - do { - tmp = sb_str_find(tmp, '*'); - if (((tmp - src) < src_len) && *(tmp + 1) == '*') { - break; - } - tmp++; - } while (tmp != NULL && (tmp - src) < src_len); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '*'); - sb_string_append_c(rv, '*'); - state = CONTENT_INLINE_START; - continue; - } - tmp2 = blogc_content_parse_inline_internal( - src + current, (tmp - src) - current); - sb_string_append_printf(rv, "<strong>%s</strong>", tmp2); - current = tmp - src + 1; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_UNDERSCORE: - if (c == '_') { - state = CONTENT_INLINE_UNDERSCORE_DOUBLE; - break; - } - tmp = sb_str_find(src + current, '_'); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '_'); - state = CONTENT_INLINE_START; - continue; - } - tmp2 = blogc_content_parse_inline_internal( - src + current, (tmp - src) - current); - sb_string_append_printf(rv, "<em>%s</em>", tmp2); - current = tmp - src; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_UNDERSCORE_DOUBLE: - tmp = src + current; - do { - tmp = sb_str_find(tmp, '_'); - if (((tmp - src) < src_len) && *(tmp + 1) == '_') { - break; - } - tmp++; - } while (tmp != NULL && (tmp - src) < src_len); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '_'); - sb_string_append_c(rv, '_'); - state = CONTENT_INLINE_START; - continue; - } - tmp2 = blogc_content_parse_inline_internal( - src + current, (tmp - src) - current); - sb_string_append_printf(rv, "<strong>%s</strong>", tmp2); - current = tmp - src + 1; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_BACKTICKS: - if (c == '`') { - state = CONTENT_INLINE_BACKTICKS_DOUBLE; - break; - } - tmp = sb_str_find(src + current, '`'); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '`'); - state = CONTENT_INLINE_START; - continue; - } - tmp3 = sb_strndup(src + current, (tmp - src) - current); - tmp2 = blogc_htmlentities(tmp3); - free(tmp3); - tmp3 = NULL; - sb_string_append(rv, "<code>"); - sb_string_append_escaped(rv, tmp2); - sb_string_append(rv, "</code>"); - current = tmp - src; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_BACKTICKS_DOUBLE: - tmp = src + current; - do { - tmp = sb_str_find(tmp, '`'); - if (((tmp - src) < src_len) && *(tmp + 1) == '`') { - break; - } - tmp++; - } while (tmp != NULL && (tmp - src) < src_len); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '`'); - sb_string_append_c(rv, '`'); - state = CONTENT_INLINE_START; - continue; - } - tmp3 = sb_strndup(src + current, (tmp - src) - current); - tmp2 = blogc_htmlentities(tmp3); - free(tmp3); - tmp3 = NULL; - sb_string_append(rv, "<code>"); - sb_string_append_escaped(rv, tmp2); - sb_string_append(rv, "</code>"); - current = tmp - src + 1; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_LINK_START: - if (c == '[') { - state = CONTENT_INLINE_LINK_AUTO; - break; - } - start_link = current; - count = 1; - state = CONTENT_INLINE_LINK_CONTENT; - break; - - case CONTENT_INLINE_LINK_AUTO: - tmp = src + current; - do { - tmp = sb_str_find(tmp, ']'); - if (((tmp - src) < src_len) && *(tmp + 1) == ']') { - break; - } - tmp++; - } while (tmp != NULL && (tmp - src) < src_len); - if (tmp == NULL || ((tmp - src) >= src_len)) { - sb_string_append_c(rv, '['); - sb_string_append_c(rv, '['); - state = CONTENT_INLINE_START; - continue; - } - tmp2 = sb_strndup(src + current, (tmp - src) - current); - sb_string_append(rv, "<a href=\""); - sb_string_append_escaped(rv, tmp2); - sb_string_append(rv, "\">"); - sb_string_append_escaped(rv, tmp2); - sb_string_append(rv, "</a>"); - current = tmp - src + 1; - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_INLINE_START; - break; - - case CONTENT_INLINE_LINK_CONTENT: - if (c == '\\') { - current++; - break; - } - if (c == '[') { // links can be nested :/ - count++; - break; - } - if (c == ']') { - if (--count == 0) { - link1 = sb_strndup(src + start_link, current - start_link); - state = CONTENT_INLINE_LINK_URL_START; - } - } - break; - - case CONTENT_INLINE_LINK_URL_START: - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') - break; - if (c == '(') { - state = CONTENT_INLINE_LINK_URL; - start = current + 1; - break; - } - sb_string_append_c(rv, '['); - state = CONTENT_INLINE_START; - current = start_link; - start_link = 0; - continue; - - case CONTENT_INLINE_LINK_URL: - if (c == '\\') { - current++; - break; - } - if (c == ')') { - tmp2 = sb_strndup(src + start, current - start); - tmp3 = blogc_content_parse_inline(link1); - free(link1); - link1 = NULL; - sb_string_append(rv, "<a href=\""); - sb_string_append_escaped(rv, tmp2); - sb_string_append_printf(rv, "\">%s</a>", tmp3); - free(tmp2); - tmp2 = NULL; - free(tmp3); - tmp3 = NULL; - state = CONTENT_INLINE_START; - break; - } - break; - - case CONTENT_INLINE_IMAGE_START: - // we use the same variables used for links, because why not? - if (c == '[') { - state = CONTENT_INLINE_IMAGE_ALT; - start_link = current + 1; - break; - } - sb_string_append_c(rv, '!'); - state = CONTENT_INLINE_START; - continue; - - case CONTENT_INLINE_IMAGE_ALT: - if (c == '\\') { - current++; - break; - } - if (c == ']') { - link1 = sb_strndup(src + start_link, current - start_link); - state = CONTENT_INLINE_IMAGE_URL_START; - } - break; - - case CONTENT_INLINE_IMAGE_URL_START: - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') - break; - if (c == '(') { - state = CONTENT_INLINE_IMAGE_URL; - start = current + 1; - break; - } - sb_string_append_c(rv, '!'); - sb_string_append_c(rv, '['); - state = CONTENT_INLINE_START; - current = start_link; - start_link = 0; - continue; - - case CONTENT_INLINE_IMAGE_URL: - if (c == '\\') { - current++; - break; - } - if (c == ')') { - tmp2 = sb_strndup(src + start, current - start); - sb_string_append(rv, "<img src=\""); - sb_string_append_escaped(rv, tmp2); - sb_string_append(rv, "\" alt=\""); - sb_string_append_escaped(rv, link1); - sb_string_append(rv, "\">"); - free(tmp2); - tmp2 = NULL; - free(link1); - link1 = NULL; - state = CONTENT_INLINE_START; - break; - } - break; - - case CONTENT_INLINE_ENDASH: - if (c == '-') { - if (is_last) { - sb_string_append(rv, "–"); - state = CONTENT_INLINE_START; // wat - break; - } - state = CONTENT_INLINE_EMDASH; - break; - } - sb_string_append_c(rv, '-'); - state = CONTENT_INLINE_START; - continue; - - case CONTENT_INLINE_EMDASH: - if (c == '-') { - sb_string_append(rv, "—"); - state = CONTENT_INLINE_START; - break; - } - sb_string_append(rv, "–"); - state = CONTENT_INLINE_START; - continue; - - case CONTENT_INLINE_LINE_BREAK_START: - if (c == ' ') { - if (is_last) { - sb_string_append(rv, "<br />"); - state = CONTENT_INLINE_START; // wat - break; - } - count = 2; - state = CONTENT_INLINE_LINE_BREAK; - break; - } - sb_string_append_c(rv, ' '); - state = CONTENT_INLINE_START; - continue; - - case CONTENT_INLINE_LINE_BREAK: - if (c == ' ') { - if (is_last) { - sb_string_append(rv, "<br />"); - state = CONTENT_INLINE_START; // wat - break; - } - count++; - break; - } - if (c == '\n' || c == '\r') { - sb_string_append_printf(rv, "<br />%c", c); - state = CONTENT_INLINE_START; - break; - } - for (size_t i = 0; i < count; i++) - sb_string_append_c(rv, ' '); - state = CONTENT_INLINE_START; - continue; - } - current++; - } - - switch (state) { - - // if after the end of the loop we are on any of the following states, - // we must call the parser again, from start_link - case CONTENT_INLINE_IMAGE_START: - case CONTENT_INLINE_IMAGE_ALT: - case CONTENT_INLINE_IMAGE_URL_START: - case CONTENT_INLINE_IMAGE_URL: - sb_string_append_c(rv, '!'); - - case CONTENT_INLINE_LINK_CONTENT: - case CONTENT_INLINE_LINK_URL_START: - case CONTENT_INLINE_LINK_URL: - tmp2 = blogc_content_parse_inline(src + start_link); - sb_string_append_c(rv, '['); - sb_string_append_escaped(rv, tmp2); // no need to free, as it wil be done below. - break; - - // add all the other states here explicitly, so the compiler helps us - // not missing any new state that should be handled. - case CONTENT_INLINE_START: - case CONTENT_INLINE_ASTERISK: - case CONTENT_INLINE_ASTERISK_DOUBLE: - case CONTENT_INLINE_UNDERSCORE: - case CONTENT_INLINE_UNDERSCORE_DOUBLE: - case CONTENT_INLINE_BACKTICKS: - case CONTENT_INLINE_BACKTICKS_DOUBLE: - case CONTENT_INLINE_LINK_START: - case CONTENT_INLINE_LINK_AUTO: - case CONTENT_INLINE_ENDASH: - case CONTENT_INLINE_EMDASH: - case CONTENT_INLINE_LINE_BREAK_START: - case CONTENT_INLINE_LINE_BREAK: - break; - } - - free(tmp2); - free(tmp3); - free(link1); - - return sb_string_free(rv, false); -} - - -char* -blogc_content_parse_inline(const char *src) -{ - return blogc_content_parse_inline_internal(src, strlen(src)); -} - - -bool -blogc_is_ordered_list_item(const char *str, size_t prefix_len) -{ - if (str == NULL) - return false; - - if (strlen(str) < 2) - return false; - - size_t i; - - for (i = 0; str[i] >= '0' && str[i] <= '9'; i++); - - if (i == 0) - return false; - if (str[i] != '.') - return false; - - for (i++; i < prefix_len && (str[i] == ' ' || str[i] == '\t'); i++); - - if (str[i] == '\0') - return false; - - return i == prefix_len; -} - - -char* -blogc_content_parse(const char *src, size_t *end_excerpt, char **description) -{ - // src is always nul-terminated. - size_t src_len = strlen(src); - - size_t current = 0; - size_t start = 0; - size_t start2 = 0; - size_t end = 0; - size_t eend = 0; - size_t real_end = 0; - - unsigned int header_level = 0; - char *prefix = NULL; - size_t prefix_len = 0; - char *tmp = NULL; - char *tmp2 = NULL; - char *parsed = NULL; - char *slug = NULL; - - // this isn't empty because we need some reasonable default value in the - // unlikely case that we need to print some line ending before evaluating - // the "real" value. - char line_ending[3] = "\n"; - bool line_ending_found = false; - - char d = '\0'; - - sb_slist_t *lines = NULL; - sb_slist_t *lines2 = NULL; - - sb_string_t *rv = sb_string_new(); - sb_string_t *tmp_str = NULL; - - blogc_content_parser_state_t state = CONTENT_START_LINE; - - while (current < src_len) { - char c = src[current]; - bool is_last = current == src_len - 1; - - if (c == '\n' || c == '\r') { - if ((current + 1) < src_len) { - if ((c == '\n' && src[current + 1] == '\r') || - (c == '\r' && src[current + 1] == '\n')) - { - if (!line_ending_found) { - line_ending[0] = c; - line_ending[1] = src[current + 1]; - line_ending[2] = '\0'; - line_ending_found = true; - } - real_end = current; - c = src[++current]; - is_last = current == src_len - 1; - } - } - if (!line_ending_found) { - line_ending[0] = c; - line_ending[1] = '\0'; - line_ending_found = true; - } - } - - switch (state) { - - case CONTENT_START_LINE: - if (c == '\n' || c == '\r' || is_last) - break; - start = current; - if (c == '.') { - if (end_excerpt != NULL) { - eend = rv->len; // fuck it - state = CONTENT_EXCERPT; - break; - } - } - if (c == '#') { - header_level = 1; - state = CONTENT_HEADER; - break; - } - if (c == '*' || c == '+' || c == '-') { - start2 = current; - state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE; - d = c; - break; - } - if (c >= '0' && c <= '9') { - start2 = current; - state = CONTENT_ORDERED_LIST; - break; - } - if (c == ' ' || c == '\t') { - start2 = current; - state = CONTENT_CODE; - break; - } - if (c == '<') { - state = CONTENT_HTML; - break; - } - if (c == '>') { - state = CONTENT_BLOCKQUOTE; - start2 = current; - break; - } - state = CONTENT_PARAGRAPH; - break; - - case CONTENT_EXCERPT: - if (end_excerpt != NULL) { - if (c == '.') - break; - if (c == '\n' || c == '\r') { - state = CONTENT_EXCERPT_END; - break; - } - } - eend = 0; - state = CONTENT_PARAGRAPH; - break; - - case CONTENT_EXCERPT_END: - if (end_excerpt != NULL) { - if (c == '\n' || c == '\r') { - *end_excerpt = eend; - state = CONTENT_START_LINE; - break; - } - } - eend = 0; - state = CONTENT_PARAGRAPH_END; - break; - - case CONTENT_HEADER: - if (c == '#') { - header_level += 1; - break; - } - if (c == ' ' || c == '\t') { - state = CONTENT_HEADER_TITLE_START; - break; - } - state = CONTENT_PARAGRAPH; - break; - - case CONTENT_HEADER_TITLE_START: - if (c == ' ' || c == '\t') - break; - start = current; - if (c != '\n' && c != '\r') { - state = CONTENT_HEADER_TITLE; - break; - } - - case CONTENT_HEADER_TITLE: - if (c == '\n' || c == '\r' || is_last) { - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - tmp = sb_strndup(src + start, end - start); - parsed = blogc_content_parse_inline(tmp); - slug = blogc_slugify(tmp); - if (slug == NULL) - sb_string_append_printf(rv, "<h%d>%s</h%d>%s", - header_level, parsed, header_level, line_ending); - else - sb_string_append_printf(rv, "<h%d id=\"%s\">%s</h%d>%s", - header_level, slug, parsed, header_level, - line_ending); - free(slug); - free(parsed); - parsed = NULL; - free(tmp); - tmp = NULL; - state = CONTENT_START_LINE; - start = current; - } - break; - - case CONTENT_HTML: - if (c == '\n' || c == '\r' || is_last) { - state = CONTENT_HTML_END; - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - } - if (!is_last) - break; - - case CONTENT_HTML_END: - if (c == '\n' || c == '\r' || is_last) { - tmp = sb_strndup(src + start, end - start); - sb_string_append_printf(rv, "%s%s", tmp, line_ending); - free(tmp); - tmp = NULL; - state = CONTENT_START_LINE; - start = current; - } - else - state = CONTENT_HTML; - break; - - case CONTENT_BLOCKQUOTE: - if (c == ' ' || c == '\t') - break; - prefix = sb_strndup(src + start, current - start); - state = CONTENT_BLOCKQUOTE_START; - break; - - case CONTENT_BLOCKQUOTE_START: - if (c == '\n' || c == '\r' || is_last) { - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - tmp = sb_strndup(src + start2, end - start2); - if (sb_str_starts_with(tmp, prefix)) { - lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix))); - state = CONTENT_BLOCKQUOTE_END; - } - else { - state = CONTENT_PARAGRAPH; - free(prefix); - prefix = NULL; - sb_slist_free_full(lines, free); - lines = NULL; - if (is_last) { - free(tmp); - tmp = NULL; - continue; - } - } - free(tmp); - tmp = NULL; - } - if (!is_last) - break; - - case CONTENT_BLOCKQUOTE_END: - if (c == '\n' || c == '\r' || is_last) { - tmp_str = sb_string_new(); - for (sb_slist_t *l = lines; l != NULL; l = l->next) - sb_string_append_printf(tmp_str, "%s%s", l->data, - line_ending); - // do not propagate description to blockquote parsing, - // because we just want paragraphs from first level of - // content. - tmp = blogc_content_parse(tmp_str->str, NULL, NULL); - sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s", - tmp, line_ending); - free(tmp); - tmp = NULL; - sb_string_free(tmp_str, true); - tmp_str = NULL; - sb_slist_free_full(lines, free); - lines = NULL; - free(prefix); - prefix = NULL; - state = CONTENT_START_LINE; - start2 = current; - } - else { - start2 = current; - state = CONTENT_BLOCKQUOTE_START; - } - break; - - case CONTENT_CODE: - if (c == ' ' || c == '\t') - break; - prefix = sb_strndup(src + start, current - start); - state = CONTENT_CODE_START; - break; - - case CONTENT_CODE_START: - if (c == '\n' || c == '\r' || is_last) { - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - tmp = sb_strndup(src + start2, end - start2); - if (sb_str_starts_with(tmp, prefix)) { - lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix))); - state = CONTENT_CODE_END; - } - else { - state = CONTENT_PARAGRAPH; - free(prefix); - prefix = NULL; - sb_slist_free_full(lines, free); - lines = NULL; - free(tmp); - tmp = NULL; - if (is_last) - continue; - break; - } - free(tmp); - tmp = NULL; - } - if (!is_last) - break; - - case CONTENT_CODE_END: - if (c == '\n' || c == '\r' || is_last) { - sb_string_append(rv, "<pre><code>"); - for (sb_slist_t *l = lines; l != NULL; l = l->next) { - char *tmp_line = blogc_htmlentities(l->data); - if (l->next == NULL) - sb_string_append_printf(rv, "%s", tmp_line); - else - sb_string_append_printf(rv, "%s%s", tmp_line, - line_ending); - free(tmp_line); - } - sb_string_append_printf(rv, "</code></pre>%s", line_ending); - sb_slist_free_full(lines, free); - lines = NULL; - free(prefix); - prefix = NULL; - state = CONTENT_START_LINE; - start2 = current; - } - else { - start2 = current; - state = CONTENT_CODE_START; - } - break; - - case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE: - if (c == d) { - state = CONTENT_HORIZONTAL_RULE; - if (is_last) - continue; - break; - } - if (c == ' ' || c == '\t') - break; - prefix = sb_strndup(src + start, current - start); - state = CONTENT_UNORDERED_LIST_START; - break; - - case CONTENT_HORIZONTAL_RULE: - if (c == d && !is_last) { - break; - } - if (c == '\n' || c == '\r' || is_last) { - sb_string_append_printf(rv, "<hr />%s", line_ending); - state = CONTENT_START_LINE; - start = current; - d = '\0'; - break; - } - state = CONTENT_PARAGRAPH; - break; - - case CONTENT_UNORDERED_LIST_START: - if (c == '\n' || c == '\r' || is_last) { - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - tmp = sb_strndup(src + start2, end - start2); - tmp2 = sb_strdup_printf("%-*s", strlen(prefix), ""); - if (sb_str_starts_with(tmp, prefix)) { - if (lines2 != NULL) { - tmp_str = sb_string_new(); - for (sb_slist_t *l = lines2; l != NULL; l = l->next) { - if (l->next == NULL) - sb_string_append_printf(tmp_str, "%s", l->data); - else - sb_string_append_printf(tmp_str, "%s%s", l->data, - line_ending); - } - sb_slist_free_full(lines2, free); - lines2 = NULL; - parsed = blogc_content_parse_inline(tmp_str->str); - sb_string_free(tmp_str, true); - lines = sb_slist_append(lines, sb_strdup(parsed)); - free(parsed); - parsed = NULL; - } - lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix))); - } - else if (sb_str_starts_with(tmp, tmp2)) { - lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix))); - } - else { - state = CONTENT_PARAGRAPH_END; - free(tmp); - tmp = NULL; - free(tmp2); - tmp2 = NULL; - free(prefix); - prefix = NULL; - sb_slist_free_full(lines, free); - sb_slist_free_full(lines2, free); - lines = NULL; - if (is_last) - continue; - break; - } - free(tmp); - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_UNORDERED_LIST_END; - } - if (!is_last) - break; - - case CONTENT_UNORDERED_LIST_END: - if (c == '\n' || c == '\r' || is_last) { - if (lines2 != NULL) { - // FIXME: avoid repeting the code below - tmp_str = sb_string_new(); - for (sb_slist_t *l = lines2; l != NULL; l = l->next) { - if (l->next == NULL) - sb_string_append_printf(tmp_str, "%s", l->data); - else - sb_string_append_printf(tmp_str, "%s%s", l->data, - line_ending); - } - sb_slist_free_full(lines2, free); - lines2 = NULL; - parsed = blogc_content_parse_inline(tmp_str->str); - sb_string_free(tmp_str, true); - lines = sb_slist_append(lines, sb_strdup(parsed)); - free(parsed); - parsed = NULL; - } - sb_string_append_printf(rv, "<ul>%s", line_ending); - for (sb_slist_t *l = lines; l != NULL; l = l->next) - sb_string_append_printf(rv, "<li>%s</li>%s", l->data, - line_ending); - sb_string_append_printf(rv, "</ul>%s", line_ending); - sb_slist_free_full(lines, free); - lines = NULL; - free(prefix); - prefix = NULL; - state = CONTENT_START_LINE; - start2 = current; - } - else { - start2 = current; - state = CONTENT_UNORDERED_LIST_START; - } - break; - - case CONTENT_ORDERED_LIST: - if (c >= '0' && c <= '9') - break; - if (c == '.') { - state = CONTENT_ORDERED_LIST_SPACE; - break; - } - state = CONTENT_PARAGRAPH; - if (is_last) - continue; - break; - - case CONTENT_ORDERED_LIST_SPACE: - if (c == ' ' || c == '\t') - break; - prefix_len = current - start; - state = CONTENT_ORDERED_LIST_START; - if (c != '\n' && c != '\r' && !is_last) - break; - - case CONTENT_ORDERED_LIST_START: - if (c == '\n' || c == '\r' || is_last) { - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - tmp = sb_strndup(src + start2, end - start2); - tmp2 = sb_strdup_printf("%-*s", prefix_len, ""); - if (blogc_is_ordered_list_item(tmp, prefix_len)) { - if (lines2 != NULL) { - tmp_str = sb_string_new(); - for (sb_slist_t *l = lines2; l != NULL; l = l->next) { - if (l->next == NULL) - sb_string_append_printf(tmp_str, "%s", l->data); - else - sb_string_append_printf(tmp_str, "%s%s", l->data, - line_ending); - } - sb_slist_free_full(lines2, free); - lines2 = NULL; - parsed = blogc_content_parse_inline(tmp_str->str); - sb_string_free(tmp_str, true); - lines = sb_slist_append(lines, sb_strdup(parsed)); - free(parsed); - parsed = NULL; - } - lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len)); - } - else if (sb_str_starts_with(tmp, tmp2)) { - lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len)); - } - else { - state = CONTENT_PARAGRAPH_END; - free(tmp); - tmp = NULL; - free(tmp2); - tmp2 = NULL; - free(parsed); - parsed = NULL; - sb_slist_free_full(lines, free); - sb_slist_free_full(lines2, free); - lines = NULL; - if (is_last) - continue; - break; - } - free(tmp); - tmp = NULL; - free(tmp2); - tmp2 = NULL; - state = CONTENT_ORDERED_LIST_END; - } - if (!is_last) - break; - - case CONTENT_ORDERED_LIST_END: - if (c == '\n' || c == '\r' || is_last) { - if (lines2 != NULL) { - // FIXME: avoid repeting the code below - tmp_str = sb_string_new(); - for (sb_slist_t *l = lines2; l != NULL; l = l->next) { - if (l->next == NULL) - sb_string_append_printf(tmp_str, "%s", l->data); - else - sb_string_append_printf(tmp_str, "%s%s", l->data, - line_ending); - } - sb_slist_free_full(lines2, free); - lines2 = NULL; - parsed = blogc_content_parse_inline(tmp_str->str); - sb_string_free(tmp_str, true); - lines = sb_slist_append(lines, sb_strdup(parsed)); - free(parsed); - parsed = NULL; - } - sb_string_append_printf(rv, "<ol>%s", line_ending); - for (sb_slist_t *l = lines; l != NULL; l = l->next) - sb_string_append_printf(rv, "<li>%s</li>%s", l->data, - line_ending); - sb_string_append_printf(rv, "</ol>%s", line_ending); - sb_slist_free_full(lines, free); - lines = NULL; - free(prefix); - prefix = NULL; - state = CONTENT_START_LINE; - start2 = current; - } - else { - start2 = current; - state = CONTENT_ORDERED_LIST_START; - } - break; - - case CONTENT_PARAGRAPH: - if (c == '\n' || c == '\r' || is_last) { - state = CONTENT_PARAGRAPH_END; - end = is_last && c != '\n' && c != '\r' ? src_len : - (real_end != 0 ? real_end : current); - } - if (!is_last) - break; - - case CONTENT_PARAGRAPH_END: - if (c == '\n' || c == '\r' || is_last) { - tmp = sb_strndup(src + start, end - start); - if (description != NULL && *description == NULL) - *description = blogc_fix_description(tmp); - parsed = blogc_content_parse_inline(tmp); - sb_string_append_printf(rv, "<p>%s</p>%s", parsed, - line_ending); - free(parsed); - parsed = NULL; - free(tmp); - tmp = NULL; - state = CONTENT_START_LINE; - start = current; - } - else - state = CONTENT_PARAGRAPH; - break; - - } - - current++; - } - - return sb_string_free(rv, false); -} |