*: big code reorganization.

- source and tests are now splitted by target - utils lib is now called common still pending move error.c from blogc to common
author: Rafael G. Martins <rafael@rafaelmartins.eng.br> 2016-09-03 19:57:54 +0200
committer: Rafael G. Martins <rafael@rafaelmartins.eng.br> 2016-09-03 20:28:55 +0200
commit: 74ca21a41bcb5a49d19e65c9ba88f1f864cb7095 (patch)
tree: 4774587e47abc0ff20453abbf714b63c36697f26 /src/content-parser.c
parent: 30ae5fd4f65f48009e6956e42ccc2c9d9ad80901 (diff)
download: blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.gz
blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.bz2
blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.zip
1 files changed, 0 insertions, 1264 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
deleted file mode 100644
index ca0597b..0000000
--- a/src/content-parser.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- * blogc: A blog compiler.
- * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br>
- *
- * This program can be distributed under the terms of the BSD License.
- * See the file LICENSE.
- */
-
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "content-parser.h"
-#include "utils.h"
-
-// this is a half ass implementation of a markdown-like syntax. bugs are
-// expected. feel free to improve the parser and add new features.
-
-
-char*
-blogc_slugify(const char *str)
-{
-    if (str == NULL)
-        return NULL;
-    char *new_str = sb_strdup(str);
-    int diff = 'a' - 'A';  // just to avoid magic numbers
-    for (size_t i = 0; new_str[i] != '\0'; i++) {
-        if (new_str[i] >= 'a' && new_str[i] <= 'z')
-            continue;
-        if (new_str[i] >= '0' && new_str[i] <= '9')
-            continue;
-        if (new_str[i] >= 'A' && new_str[i] <= 'Z')
-            new_str[i] += diff;
-        else
-            new_str[i] = '-';
-    }
-    return new_str;
-}
-
-
-static const char*
-htmlentities(char c)
-{
-    switch (c) {
-        case '&':
-            return "&amp;";
-        case '<':
-            return "&lt;";
-        case '>':
-            return "&gt;";
-        case '"':
-            return "&quot;";
-        case '\'':
-            return "&#x27;";
-        case '/':
-            return "&#x2F;";
-    }
-    return NULL;
-}
-
-
-static void
-htmlentities_append(sb_string_t *str, char c)
-{
-    const char *e = htmlentities(c);
-    if (e == NULL)
-        sb_string_append_c(str, c);
-    else
-        sb_string_append(str, e);
-}
-
-
-char*
-blogc_htmlentities(const char *str)
-{
-    if (str == NULL)
-        return NULL;
-    sb_string_t *rv = sb_string_new();
-    for (size_t i = 0; str[i] != '\0'; i++)
-        htmlentities_append(rv, str[i]);
-    return sb_string_free(rv, false);
-}
-
-
-char*
-blogc_fix_description(const char *paragraph)
-{
-    if (paragraph == NULL)
-        return NULL;
-    sb_string_t *rv = sb_string_new();
-    bool last = false;
-    bool newline = false;
-    char *tmp = NULL;
-    size_t start = 0;
-    size_t current = 0;
-    while (true) {
-        switch (paragraph[current]) {
-            case '\0':
-                last = true;
-            case '\r':
-            case '\n':
-                if (newline)
-                    break;
-                tmp = sb_strndup(paragraph + start, current - start);
-                sb_string_append(rv, sb_str_strip(tmp));
-                free(tmp);
-                tmp = NULL;
-                if (!last)
-                    sb_string_append_c(rv, ' ');
-                start = current + 1;
-                newline = true;
-                break;
-            default:
-                newline = false;
-        }
-        if (last)
-            break;
-        current++;
-    }
-    tmp = blogc_htmlentities(sb_str_strip(rv->str));
-    sb_string_free(rv, true);
-    return tmp;
-}
-
-
-typedef enum {
-    CONTENT_START_LINE = 1,
-    CONTENT_EXCERPT,
-    CONTENT_EXCERPT_END,
-    CONTENT_HEADER,
-    CONTENT_HEADER_TITLE_START,
-    CONTENT_HEADER_TITLE,
-    CONTENT_HTML,
-    CONTENT_HTML_END,
-    CONTENT_BLOCKQUOTE,
-    CONTENT_BLOCKQUOTE_START,
-    CONTENT_BLOCKQUOTE_END,
-    CONTENT_CODE,
-    CONTENT_CODE_START,
-    CONTENT_CODE_END,
-    CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE,
-    CONTENT_HORIZONTAL_RULE,
-    CONTENT_UNORDERED_LIST_START,
-    CONTENT_UNORDERED_LIST_END,
-    CONTENT_ORDERED_LIST,
-    CONTENT_ORDERED_LIST_SPACE,
-    CONTENT_ORDERED_LIST_START,
-    CONTENT_ORDERED_LIST_END,
-    CONTENT_PARAGRAPH,
-    CONTENT_PARAGRAPH_END,
-} blogc_content_parser_state_t;
-
-
-typedef enum {
-    CONTENT_INLINE_START = 1,
-    CONTENT_INLINE_ASTERISK,
-    CONTENT_INLINE_ASTERISK_DOUBLE,
-    CONTENT_INLINE_UNDERSCORE,
-    CONTENT_INLINE_UNDERSCORE_DOUBLE,
-    CONTENT_INLINE_BACKTICKS,
-    CONTENT_INLINE_BACKTICKS_DOUBLE,
-    CONTENT_INLINE_LINK_START,
-    CONTENT_INLINE_LINK_AUTO,
-    CONTENT_INLINE_LINK_CONTENT,
-    CONTENT_INLINE_LINK_URL_START,
-    CONTENT_INLINE_LINK_URL,
-    CONTENT_INLINE_IMAGE_START,
-    CONTENT_INLINE_IMAGE_ALT,
-    CONTENT_INLINE_IMAGE_URL_START,
-    CONTENT_INLINE_IMAGE_URL,
-    CONTENT_INLINE_ENDASH,
-    CONTENT_INLINE_EMDASH,
-    CONTENT_INLINE_LINE_BREAK_START,
-    CONTENT_INLINE_LINE_BREAK,
-} blogc_content_parser_inline_state_t;
-
-
-static char*
-blogc_content_parse_inline_internal(const char *src, size_t src_len)
-{
-    size_t current = 0;
-    size_t start = 0;
-    size_t count = 0;
-
-    const char *tmp = NULL;
-    char *tmp2 = NULL;
-    char *tmp3 = NULL;
-
-    size_t start_link = 0;
-    char *link1 = NULL;
-
-    sb_string_t *rv = sb_string_new();
-
-    blogc_content_parser_inline_state_t state = CONTENT_INLINE_START;
-
-    while (current < src_len) {
-        char c = src[current];
-        bool is_last = current == src_len - 1;
-
-        switch (state) {
-            case CONTENT_INLINE_START:
-                if (is_last) {
-                    htmlentities_append(rv, c);
-                    break;
-                }
-                if (c == '\\') {
-                    htmlentities_append(rv, src[++current]);
-                    break;
-                }
-                if (c == '*') {
-                    state = CONTENT_INLINE_ASTERISK;
-                    break;
-                }
-                if (c == '_') {
-                    state = CONTENT_INLINE_UNDERSCORE;
-                    break;
-                }
-                if (c == '`') {
-                    state = CONTENT_INLINE_BACKTICKS;
-                    break;
-                }
-                if (c == '[') {
-                    state = CONTENT_INLINE_LINK_START;
-                    break;
-                }
-                if (c == '!') {
-                    state = CONTENT_INLINE_IMAGE_START;
-                    break;
-                }
-                if (c == '-') {
-                    state = CONTENT_INLINE_ENDASH;
-                    break;
-                }
-                if (c == ' ') {
-                    state = CONTENT_INLINE_LINE_BREAK_START;
-                    break;
-                }
-                htmlentities_append(rv, c);
-                break;
-
-            case CONTENT_INLINE_ASTERISK:
-                if (c == '*') {
-                    state = CONTENT_INLINE_ASTERISK_DOUBLE;
-                    break;
-                }
-                tmp = sb_str_find(src + current, '*');
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '*');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp2 = blogc_content_parse_inline_internal(
-                    src + current, (tmp - src) - current);
-                sb_string_append_printf(rv, "<em>%s</em>", tmp2);
-                current = tmp - src;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_ASTERISK_DOUBLE:
-                tmp = src + current;
-                do {
-                    tmp = sb_str_find(tmp, '*');
-                    if (((tmp - src) < src_len) && *(tmp + 1) == '*') {
-                        break;
-                    }
-                    tmp++;
-                } while (tmp != NULL && (tmp - src) < src_len);
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '*');
-                    sb_string_append_c(rv, '*');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp2 = blogc_content_parse_inline_internal(
-                    src + current, (tmp - src) - current);
-                sb_string_append_printf(rv, "<strong>%s</strong>", tmp2);
-                current = tmp - src + 1;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_UNDERSCORE:
-                if (c == '_') {
-                    state = CONTENT_INLINE_UNDERSCORE_DOUBLE;
-                    break;
-                }
-                tmp = sb_str_find(src + current, '_');
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '_');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp2 = blogc_content_parse_inline_internal(
-                    src + current, (tmp - src) - current);
-                sb_string_append_printf(rv, "<em>%s</em>", tmp2);
-                current = tmp - src;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_UNDERSCORE_DOUBLE:
-                tmp = src + current;
-                do {
-                    tmp = sb_str_find(tmp, '_');
-                    if (((tmp - src) < src_len) && *(tmp + 1) == '_') {
-                        break;
-                    }
-                    tmp++;
-                } while (tmp != NULL && (tmp - src) < src_len);
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '_');
-                    sb_string_append_c(rv, '_');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp2 = blogc_content_parse_inline_internal(
-                    src + current, (tmp - src) - current);
-                sb_string_append_printf(rv, "<strong>%s</strong>", tmp2);
-                current = tmp - src + 1;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_BACKTICKS:
-                if (c == '`') {
-                    state = CONTENT_INLINE_BACKTICKS_DOUBLE;
-                    break;
-                }
-                tmp = sb_str_find(src + current, '`');
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '`');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp3 = sb_strndup(src + current, (tmp - src) - current);
-                tmp2 = blogc_htmlentities(tmp3);
-                free(tmp3);
-                tmp3 = NULL;
-                sb_string_append(rv, "<code>");
-                sb_string_append_escaped(rv, tmp2);
-                sb_string_append(rv, "</code>");
-                current = tmp - src;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_BACKTICKS_DOUBLE:
-                tmp = src + current;
-                do {
-                    tmp = sb_str_find(tmp, '`');
-                    if (((tmp - src) < src_len) && *(tmp + 1) == '`') {
-                        break;
-                    }
-                    tmp++;
-                } while (tmp != NULL && (tmp - src) < src_len);
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '`');
-                    sb_string_append_c(rv, '`');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp3 = sb_strndup(src + current, (tmp - src) - current);
-                tmp2 = blogc_htmlentities(tmp3);
-                free(tmp3);
-                tmp3 = NULL;
-                sb_string_append(rv, "<code>");
-                sb_string_append_escaped(rv, tmp2);
-                sb_string_append(rv, "</code>");
-                current = tmp - src + 1;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_LINK_START:
-                if (c == '[') {
-                    state = CONTENT_INLINE_LINK_AUTO;
-                    break;
-                }
-                start_link = current;
-                count = 1;
-                state = CONTENT_INLINE_LINK_CONTENT;
-                break;
-
-            case CONTENT_INLINE_LINK_AUTO:
-                tmp = src + current;
-                do {
-                    tmp = sb_str_find(tmp, ']');
-                    if (((tmp - src) < src_len) && *(tmp + 1) == ']') {
-                        break;
-                    }
-                    tmp++;
-                } while (tmp != NULL && (tmp - src) < src_len);
-                if (tmp == NULL || ((tmp - src) >= src_len)) {
-                    sb_string_append_c(rv, '[');
-                    sb_string_append_c(rv, '[');
-                    state = CONTENT_INLINE_START;
-                    continue;
-                }
-                tmp2 = sb_strndup(src + current, (tmp - src) - current);
-                sb_string_append(rv, "<a href=\"");
-                sb_string_append_escaped(rv, tmp2);
-                sb_string_append(rv, "\">");
-                sb_string_append_escaped(rv, tmp2);
-                sb_string_append(rv, "</a>");
-                current = tmp - src + 1;
-                tmp = NULL;
-                free(tmp2);
-                tmp2 = NULL;
-                state = CONTENT_INLINE_START;
-                break;
-
-            case CONTENT_INLINE_LINK_CONTENT:
-                if (c == '\\') {
-                    current++;
-                    break;
-                }
-                if (c == '[') {  // links can be nested :/
-                    count++;
-                    break;
-                }
-                if (c == ']') {
-                    if (--count == 0) {
-                        link1 = sb_strndup(src + start_link, current - start_link);
-                        state = CONTENT_INLINE_LINK_URL_START;
-                    }
-                }
-                break;
-
-            case CONTENT_INLINE_LINK_URL_START:
-                if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
-                    break;
-                if (c == '(') {
-                    state = CONTENT_INLINE_LINK_URL;
-                    start = current + 1;
-                    break;
-                }
-                sb_string_append_c(rv, '[');
-                state = CONTENT_INLINE_START;
-                current = start_link;
-                start_link = 0;
-                continue;
-
-            case CONTENT_INLINE_LINK_URL:
-                if (c == '\\') {
-                    current++;
-                    break;
-                }
-                if (c == ')') {
-                    tmp2 = sb_strndup(src + start, current - start);
-                    tmp3 = blogc_content_parse_inline(link1);
-                    free(link1);
-                    link1 = NULL;
-                    sb_string_append(rv, "<a href=\"");
-                    sb_string_append_escaped(rv, tmp2);
-                    sb_string_append_printf(rv, "\">%s</a>", tmp3);
-                    free(tmp2);
-                    tmp2 = NULL;
-                    free(tmp3);
-                    tmp3 = NULL;
-                    state = CONTENT_INLINE_START;
-                    break;
-                }
-                break;
-
-            case CONTENT_INLINE_IMAGE_START:
-                // we use the same variables used for links, because why not?
-                if (c == '[') {
-                    state = CONTENT_INLINE_IMAGE_ALT;
-                    start_link = current + 1;
-                    break;
-                }
-                sb_string_append_c(rv, '!');
-                state = CONTENT_INLINE_START;
-                continue;
-
-            case CONTENT_INLINE_IMAGE_ALT:
-                if (c == '\\') {
-                    current++;
-                    break;
-                }
-                if (c == ']') {
-                    link1 = sb_strndup(src + start_link, current - start_link);
-                    state = CONTENT_INLINE_IMAGE_URL_START;
-                }
-                break;
-
-            case CONTENT_INLINE_IMAGE_URL_START:
-                if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
-                    break;
-                if (c == '(') {
-                    state = CONTENT_INLINE_IMAGE_URL;
-                    start = current + 1;
-                    break;
-                }
-                sb_string_append_c(rv, '!');
-                sb_string_append_c(rv, '[');
-                state = CONTENT_INLINE_START;
-                current = start_link;
-                start_link = 0;
-                continue;
-
-            case CONTENT_INLINE_IMAGE_URL:
-                if (c == '\\') {
-                    current++;
-                    break;
-                }
-                if (c == ')') {
-                    tmp2 = sb_strndup(src + start, current - start);
-                    sb_string_append(rv, "<img src=\"");
-                    sb_string_append_escaped(rv, tmp2);
-                    sb_string_append(rv, "\" alt=\"");
-                    sb_string_append_escaped(rv, link1);
-                    sb_string_append(rv, "\">");
-                    free(tmp2);
-                    tmp2 = NULL;
-                    free(link1);
-                    link1 = NULL;
-                    state = CONTENT_INLINE_START;
-                    break;
-                }
-                break;
-
-            case CONTENT_INLINE_ENDASH:
-                if (c == '-') {
-                    if (is_last) {
-                        sb_string_append(rv, "&ndash;");
-                        state = CONTENT_INLINE_START;  // wat
-                        break;
-                    }
-                    state = CONTENT_INLINE_EMDASH;
-                    break;
-                }
-                sb_string_append_c(rv, '-');
-                state = CONTENT_INLINE_START;
-                continue;
-
-            case CONTENT_INLINE_EMDASH:
-                if (c == '-') {
-                    sb_string_append(rv, "&mdash;");
-                    state = CONTENT_INLINE_START;
-                    break;
-                }
-                sb_string_append(rv, "&ndash;");
-                state = CONTENT_INLINE_START;
-                continue;
-
-            case CONTENT_INLINE_LINE_BREAK_START:
-                if (c == ' ') {
-                    if (is_last) {
-                        sb_string_append(rv, "<br />");
-                        state = CONTENT_INLINE_START;  // wat
-                        break;
-                    }
-                    count = 2;
-                    state = CONTENT_INLINE_LINE_BREAK;
-                    break;
-                }
-                sb_string_append_c(rv, ' ');
-                state = CONTENT_INLINE_START;
-                continue;
-
-            case CONTENT_INLINE_LINE_BREAK:
-                if (c == ' ') {
-                    if (is_last) {
-                        sb_string_append(rv, "<br />");
-                        state = CONTENT_INLINE_START;  // wat
-                        break;
-                    }
-                    count++;
-                    break;
-                }
-                if (c == '\n' || c == '\r') {
-                    sb_string_append_printf(rv, "<br />%c", c);
-                    state = CONTENT_INLINE_START;
-                    break;
-                }
-                for (size_t i = 0; i < count; i++)
-                    sb_string_append_c(rv, ' ');
-                state = CONTENT_INLINE_START;
-                continue;
-        }
-        current++;
-    }
-
-    switch (state) {
-
-        // if after the end of the loop we are on any of the following states,
-        // we must call the parser again, from start_link
-        case CONTENT_INLINE_IMAGE_START:
-        case CONTENT_INLINE_IMAGE_ALT:
-        case CONTENT_INLINE_IMAGE_URL_START:
-        case CONTENT_INLINE_IMAGE_URL:
-            sb_string_append_c(rv, '!');
-
-        case CONTENT_INLINE_LINK_CONTENT:
-        case CONTENT_INLINE_LINK_URL_START:
-        case CONTENT_INLINE_LINK_URL:
-            tmp2 = blogc_content_parse_inline(src + start_link);
-            sb_string_append_c(rv, '[');
-            sb_string_append_escaped(rv, tmp2);  // no need to free, as it wil be done below.
-            break;
-
-        // add all the other states here explicitly, so the compiler helps us
-        // not missing any new state that should be handled.
-        case CONTENT_INLINE_START:
-        case CONTENT_INLINE_ASTERISK:
-        case CONTENT_INLINE_ASTERISK_DOUBLE:
-        case CONTENT_INLINE_UNDERSCORE:
-        case CONTENT_INLINE_UNDERSCORE_DOUBLE:
-        case CONTENT_INLINE_BACKTICKS:
-        case CONTENT_INLINE_BACKTICKS_DOUBLE:
-        case CONTENT_INLINE_LINK_START:
-        case CONTENT_INLINE_LINK_AUTO:
-        case CONTENT_INLINE_ENDASH:
-        case CONTENT_INLINE_EMDASH:
-        case CONTENT_INLINE_LINE_BREAK_START:
-        case CONTENT_INLINE_LINE_BREAK:
-            break;
-    }
-
-    free(tmp2);
-    free(tmp3);
-    free(link1);
-
-    return sb_string_free(rv, false);
-}
-
-
-char*
-blogc_content_parse_inline(const char *src)
-{
-    return blogc_content_parse_inline_internal(src, strlen(src));
-}
-
-
-bool
-blogc_is_ordered_list_item(const char *str, size_t prefix_len)
-{
-    if (str == NULL)
-        return false;
-
-    if (strlen(str) < 2)
-        return false;
-
-    size_t i;
-
-    for (i = 0; str[i] >= '0' && str[i] <= '9'; i++);
-
-    if (i == 0)
-        return false;
-    if (str[i] != '.')
-        return false;
-
-    for (i++; i < prefix_len && (str[i] == ' ' || str[i] == '\t'); i++);
-
-    if (str[i] == '\0')
-        return false;
-
-    return i == prefix_len;
-}
-
-
-char*
-blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
-{
-    // src is always nul-terminated.
-    size_t src_len = strlen(src);
-
-    size_t current = 0;
-    size_t start = 0;
-    size_t start2 = 0;
-    size_t end = 0;
-    size_t eend = 0;
-    size_t real_end = 0;
-
-    unsigned int header_level = 0;
-    char *prefix = NULL;
-    size_t prefix_len = 0;
-    char *tmp = NULL;
-    char *tmp2 = NULL;
-    char *parsed = NULL;
-    char *slug = NULL;
-
-    // this isn't empty because we need some reasonable default value in the
-    // unlikely case that we need to print some line ending before evaluating
-    // the "real" value.
-    char line_ending[3] = "\n";
-    bool line_ending_found = false;
-
-    char d = '\0';
-
-    sb_slist_t *lines = NULL;
-    sb_slist_t *lines2 = NULL;
-
-    sb_string_t *rv = sb_string_new();
-    sb_string_t *tmp_str = NULL;
-
-    blogc_content_parser_state_t state = CONTENT_START_LINE;
-
-    while (current < src_len) {
-        char c = src[current];
-        bool is_last = current == src_len - 1;
-
-        if (c == '\n' || c == '\r') {
-            if ((current + 1) < src_len) {
-                if ((c == '\n' && src[current + 1] == '\r') ||
-                    (c == '\r' && src[current + 1] == '\n'))
-                {
-                    if (!line_ending_found) {
-                        line_ending[0] = c;
-                        line_ending[1] = src[current + 1];
-                        line_ending[2] = '\0';
-                        line_ending_found = true;
-                    }
-                    real_end = current;
-                    c = src[++current];
-                    is_last = current == src_len - 1;
-                }
-            }
-            if (!line_ending_found) {
-                line_ending[0] = c;
-                line_ending[1] = '\0';
-                line_ending_found = true;
-            }
-        }
-
-        switch (state) {
-
-            case CONTENT_START_LINE:
-                if (c == '\n' || c == '\r' || is_last)
-                    break;
-                start = current;
-                if (c == '.') {
-                    if (end_excerpt != NULL) {
-                        eend = rv->len;  // fuck it
-                        state = CONTENT_EXCERPT;
-                        break;
-                    }
-                }
-                if (c == '#') {
-                    header_level = 1;
-                    state = CONTENT_HEADER;
-                    break;
-                }
-                if (c == '*' || c == '+' || c == '-') {
-                    start2 = current;
-                    state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE;
-                    d = c;
-                    break;
-                }
-                if (c >= '0' && c <= '9') {
-                    start2 = current;
-                    state = CONTENT_ORDERED_LIST;
-                    break;
-                }
-                if (c == ' ' || c == '\t') {
-                    start2 = current;
-                    state = CONTENT_CODE;
-                    break;
-                }
-                if (c == '<') {
-                    state = CONTENT_HTML;
-                    break;
-                }
-                if (c == '>') {
-                    state = CONTENT_BLOCKQUOTE;
-                    start2 = current;
-                    break;
-                }
-                state = CONTENT_PARAGRAPH;
-                break;
-
-            case CONTENT_EXCERPT:
-                if (end_excerpt != NULL) {
-                    if (c == '.')
-                        break;
-                    if (c == '\n' || c == '\r') {
-                        state = CONTENT_EXCERPT_END;
-                        break;
-                    }
-                }
-                eend = 0;
-                state = CONTENT_PARAGRAPH;
-                break;
-
-            case CONTENT_EXCERPT_END:
-                if (end_excerpt != NULL) {
-                    if (c == '\n' || c == '\r') {
-                        *end_excerpt = eend;
-                        state = CONTENT_START_LINE;
-                        break;
-                    }
-                }
-                eend = 0;
-                state = CONTENT_PARAGRAPH_END;
-                break;
-
-            case CONTENT_HEADER:
-                if (c == '#') {
-                    header_level += 1;
-                    break;
-                }
-                if (c == ' ' || c == '\t') {
-                    state = CONTENT_HEADER_TITLE_START;
-                    break;
-                }
-                state = CONTENT_PARAGRAPH;
-                break;
-
-            case CONTENT_HEADER_TITLE_START:
-                if (c == ' ' || c == '\t')
-                    break;
-                start = current;
-                if (c != '\n' && c != '\r') {
-                    state = CONTENT_HEADER_TITLE;
-                    break;
-                }
-
-            case CONTENT_HEADER_TITLE:
-                if (c == '\n' || c == '\r' || is_last) {
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                    tmp = sb_strndup(src + start, end - start);
-                    parsed = blogc_content_parse_inline(tmp);
-                    slug = blogc_slugify(tmp);
-                    if (slug == NULL)
-                        sb_string_append_printf(rv, "<h%d>%s</h%d>%s",
-                            header_level, parsed, header_level, line_ending);
-                    else
-                        sb_string_append_printf(rv, "<h%d id=\"%s\">%s</h%d>%s",
-                            header_level, slug, parsed, header_level,
-                            line_ending);
-                    free(slug);
-                    free(parsed);
-                    parsed = NULL;
-                    free(tmp);
-                    tmp = NULL;
-                    state = CONTENT_START_LINE;
-                    start = current;
-                }
-                break;
-
-            case CONTENT_HTML:
-                if (c == '\n' || c == '\r' || is_last) {
-                    state = CONTENT_HTML_END;
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_HTML_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    tmp = sb_strndup(src + start, end - start);
-                    sb_string_append_printf(rv, "%s%s", tmp, line_ending);
-                    free(tmp);
-                    tmp = NULL;
-                    state = CONTENT_START_LINE;
-                    start = current;
-                }
-                else
-                    state = CONTENT_HTML;
-                break;
-
-            case CONTENT_BLOCKQUOTE:
-                if (c == ' ' || c == '\t')
-                    break;
-                prefix = sb_strndup(src + start, current - start);
-                state = CONTENT_BLOCKQUOTE_START;
-                break;
-
-            case CONTENT_BLOCKQUOTE_START:
-                if (c == '\n' || c == '\r' || is_last) {
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                    tmp = sb_strndup(src + start2, end - start2);
-                    if (sb_str_starts_with(tmp, prefix)) {
-                        lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix)));
-                        state = CONTENT_BLOCKQUOTE_END;
-                    }
-                    else {
-                        state = CONTENT_PARAGRAPH;
-                        free(prefix);
-                        prefix = NULL;
-                        sb_slist_free_full(lines, free);
-                        lines = NULL;
-                        if (is_last) {
-                            free(tmp);
-                            tmp = NULL;
-                            continue;
-                        }
-                    }
-                    free(tmp);
-                    tmp = NULL;
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_BLOCKQUOTE_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    tmp_str = sb_string_new();
-                    for (sb_slist_t *l = lines; l != NULL; l = l->next)
-                        sb_string_append_printf(tmp_str, "%s%s", l->data,
-                            line_ending);
-                    // do not propagate description to blockquote parsing,
-                    // because we just want paragraphs from first level of
-                    // content.
-                    tmp = blogc_content_parse(tmp_str->str, NULL, NULL);
-                    sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s",
-                        tmp, line_ending);
-                    free(tmp);
-                    tmp = NULL;
-                    sb_string_free(tmp_str, true);
-                    tmp_str = NULL;
-                    sb_slist_free_full(lines, free);
-                    lines = NULL;
-                    free(prefix);
-                    prefix = NULL;
-                    state = CONTENT_START_LINE;
-                    start2 = current;
-                }
-                else {
-                    start2 = current;
-                    state = CONTENT_BLOCKQUOTE_START;
-                }
-                break;
-
-            case CONTENT_CODE:
-                if (c == ' ' || c == '\t')
-                    break;
-                prefix = sb_strndup(src + start, current - start);
-                state = CONTENT_CODE_START;
-                break;
-
-            case CONTENT_CODE_START:
-                if (c == '\n' || c == '\r' || is_last) {
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                    tmp = sb_strndup(src + start2, end - start2);
-                    if (sb_str_starts_with(tmp, prefix)) {
-                        lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix)));
-                        state = CONTENT_CODE_END;
-                    }
-                    else {
-                        state = CONTENT_PARAGRAPH;
-                        free(prefix);
-                        prefix = NULL;
-                        sb_slist_free_full(lines, free);
-                        lines = NULL;
-                        free(tmp);
-                        tmp = NULL;
-                        if (is_last)
-                            continue;
-                        break;
-                    }
-                    free(tmp);
-                    tmp = NULL;
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_CODE_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    sb_string_append(rv, "<pre><code>");
-                    for (sb_slist_t *l = lines; l != NULL; l = l->next) {
-                        char *tmp_line = blogc_htmlentities(l->data);
-                        if (l->next == NULL)
-                            sb_string_append_printf(rv, "%s", tmp_line);
-                        else
-                            sb_string_append_printf(rv, "%s%s", tmp_line,
-                                line_ending);
-                        free(tmp_line);
-                    }
-                    sb_string_append_printf(rv, "</code></pre>%s", line_ending);
-                    sb_slist_free_full(lines, free);
-                    lines = NULL;
-                    free(prefix);
-                    prefix = NULL;
-                    state = CONTENT_START_LINE;
-                    start2 = current;
-                }
-                else {
-                    start2 = current;
-                    state = CONTENT_CODE_START;
-                }
-                break;
-
-            case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE:
-                if (c == d) {
-                    state = CONTENT_HORIZONTAL_RULE;
-                    if (is_last)
-                        continue;
-                    break;
-                }
-                if (c == ' ' || c == '\t')
-                    break;
-                prefix = sb_strndup(src + start, current - start);
-                state = CONTENT_UNORDERED_LIST_START;
-                break;
-
-            case CONTENT_HORIZONTAL_RULE:
-                if (c == d && !is_last) {
-                    break;
-                }
-                if (c == '\n' || c == '\r' || is_last) {
-                    sb_string_append_printf(rv, "<hr />%s", line_ending);
-                    state = CONTENT_START_LINE;
-                    start = current;
-                    d = '\0';
-                    break;
-                }
-                state = CONTENT_PARAGRAPH;
-                break;
-
-            case CONTENT_UNORDERED_LIST_START:
-                if (c == '\n' || c == '\r' || is_last) {
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                    tmp = sb_strndup(src + start2, end - start2);
-                    tmp2 = sb_strdup_printf("%-*s", strlen(prefix), "");
-                    if (sb_str_starts_with(tmp, prefix)) {
-                        if (lines2 != NULL) {
-                            tmp_str = sb_string_new();
-                            for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
-                                if (l->next == NULL)
-                                    sb_string_append_printf(tmp_str, "%s", l->data);
-                                else
-                                    sb_string_append_printf(tmp_str, "%s%s", l->data,
-                                        line_ending);
-                            }
-                            sb_slist_free_full(lines2, free);
-                            lines2 = NULL;
-                            parsed = blogc_content_parse_inline(tmp_str->str);
-                            sb_string_free(tmp_str, true);
-                            lines = sb_slist_append(lines, sb_strdup(parsed));
-                            free(parsed);
-                            parsed = NULL;
-                        }
-                        lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix)));
-                    }
-                    else if (sb_str_starts_with(tmp, tmp2)) {
-                        lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix)));
-                    }
-                    else {
-                        state = CONTENT_PARAGRAPH_END;
-                        free(tmp);
-                        tmp = NULL;
-                        free(tmp2);
-                        tmp2 = NULL;
-                        free(prefix);
-                        prefix = NULL;
-                        sb_slist_free_full(lines, free);
-                        sb_slist_free_full(lines2, free);
-                        lines = NULL;
-                        if (is_last)
-                            continue;
-                        break;
-                    }
-                    free(tmp);
-                    tmp = NULL;
-                    free(tmp2);
-                    tmp2 = NULL;
-                    state = CONTENT_UNORDERED_LIST_END;
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_UNORDERED_LIST_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    if (lines2 != NULL) {
-                        // FIXME: avoid repeting the code below
-                        tmp_str = sb_string_new();
-                        for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
-                            if (l->next == NULL)
-                                sb_string_append_printf(tmp_str, "%s", l->data);
-                            else
-                                sb_string_append_printf(tmp_str, "%s%s", l->data,
-                                    line_ending);
-                        }
-                        sb_slist_free_full(lines2, free);
-                        lines2 = NULL;
-                        parsed = blogc_content_parse_inline(tmp_str->str);
-                        sb_string_free(tmp_str, true);
-                        lines = sb_slist_append(lines, sb_strdup(parsed));
-                        free(parsed);
-                        parsed = NULL;
-                    }
-                    sb_string_append_printf(rv, "<ul>%s", line_ending);
-                    for (sb_slist_t *l = lines; l != NULL; l = l->next)
-                        sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
-                            line_ending);
-                    sb_string_append_printf(rv, "</ul>%s", line_ending);
-                    sb_slist_free_full(lines, free);
-                    lines = NULL;
-                    free(prefix);
-                    prefix = NULL;
-                    state = CONTENT_START_LINE;
-                    start2 = current;
-                }
-                else {
-                    start2 = current;
-                    state = CONTENT_UNORDERED_LIST_START;
-                }
-                break;
-
-            case CONTENT_ORDERED_LIST:
-                if (c >= '0' && c <= '9')
-                    break;
-                if (c == '.') {
-                    state = CONTENT_ORDERED_LIST_SPACE;
-                    break;
-                }
-                state = CONTENT_PARAGRAPH;
-                if (is_last)
-                    continue;
-                break;
-
-            case CONTENT_ORDERED_LIST_SPACE:
-                if (c == ' ' || c == '\t')
-                    break;
-                prefix_len = current - start;
-                state = CONTENT_ORDERED_LIST_START;
-                if (c != '\n' && c != '\r' && !is_last)
-                    break;
-
-            case CONTENT_ORDERED_LIST_START:
-                if (c == '\n' || c == '\r' || is_last) {
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                    tmp = sb_strndup(src + start2, end - start2);
-                    tmp2 = sb_strdup_printf("%-*s", prefix_len, "");
-                    if (blogc_is_ordered_list_item(tmp, prefix_len)) {
-                        if (lines2 != NULL) {
-                            tmp_str = sb_string_new();
-                            for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
-                                if (l->next == NULL)
-                                    sb_string_append_printf(tmp_str, "%s", l->data);
-                                else
-                                    sb_string_append_printf(tmp_str, "%s%s", l->data,
-                                        line_ending);
-                            }
-                            sb_slist_free_full(lines2, free);
-                            lines2 = NULL;
-                            parsed = blogc_content_parse_inline(tmp_str->str);
-                            sb_string_free(tmp_str, true);
-                            lines = sb_slist_append(lines, sb_strdup(parsed));
-                            free(parsed);
-                            parsed = NULL;
-                        }
-                        lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len));
-                    }
-                    else if (sb_str_starts_with(tmp, tmp2)) {
-                        lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len));
-                    }
-                    else {
-                        state = CONTENT_PARAGRAPH_END;
-                        free(tmp);
-                        tmp = NULL;
-                        free(tmp2);
-                        tmp2 = NULL;
-                        free(parsed);
-                        parsed = NULL;
-                        sb_slist_free_full(lines, free);
-                        sb_slist_free_full(lines2, free);
-                        lines = NULL;
-                        if (is_last)
-                            continue;
-                        break;
-                    }
-                    free(tmp);
-                    tmp = NULL;
-                    free(tmp2);
-                    tmp2 = NULL;
-                    state = CONTENT_ORDERED_LIST_END;
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_ORDERED_LIST_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    if (lines2 != NULL) {
-                        // FIXME: avoid repeting the code below
-                        tmp_str = sb_string_new();
-                        for (sb_slist_t *l = lines2; l != NULL; l = l->next) {
-                            if (l->next == NULL)
-                                sb_string_append_printf(tmp_str, "%s", l->data);
-                            else
-                                sb_string_append_printf(tmp_str, "%s%s", l->data,
-                                    line_ending);
-                        }
-                        sb_slist_free_full(lines2, free);
-                        lines2 = NULL;
-                        parsed = blogc_content_parse_inline(tmp_str->str);
-                        sb_string_free(tmp_str, true);
-                        lines = sb_slist_append(lines, sb_strdup(parsed));
-                        free(parsed);
-                        parsed = NULL;
-                    }
-                    sb_string_append_printf(rv, "<ol>%s", line_ending);
-                    for (sb_slist_t *l = lines; l != NULL; l = l->next)
-                        sb_string_append_printf(rv, "<li>%s</li>%s", l->data,
-                            line_ending);
-                    sb_string_append_printf(rv, "</ol>%s", line_ending);
-                    sb_slist_free_full(lines, free);
-                    lines = NULL;
-                    free(prefix);
-                    prefix = NULL;
-                    state = CONTENT_START_LINE;
-                    start2 = current;
-                }
-                else {
-                    start2 = current;
-                    state = CONTENT_ORDERED_LIST_START;
-                }
-                break;
-
-            case CONTENT_PARAGRAPH:
-                if (c == '\n' || c == '\r' || is_last) {
-                    state = CONTENT_PARAGRAPH_END;
-                    end = is_last && c != '\n' && c != '\r' ? src_len :
-                        (real_end != 0 ? real_end : current);
-                }
-                if (!is_last)
-                    break;
-
-            case CONTENT_PARAGRAPH_END:
-                if (c == '\n' || c == '\r' || is_last) {
-                    tmp = sb_strndup(src + start, end - start);
-                    if (description != NULL && *description == NULL)
-                        *description = blogc_fix_description(tmp);
-                    parsed = blogc_content_parse_inline(tmp);
-                    sb_string_append_printf(rv, "<p>%s</p>%s", parsed,
-                        line_ending);
-                    free(parsed);
-                    parsed = NULL;
-                    free(tmp);
-                    tmp = NULL;
-                    state = CONTENT_START_LINE;
-                    start = current;
-                }
-                else
-                    state = CONTENT_PARAGRAPH;
-                break;
-
-        }
-
-        current++;
-    }
-
-    return sb_string_free(rv, false);
-}
author	Rafael G. Martins <rafael@rafaelmartins.eng.br>	2016-09-03 19:57:54 +0200
committer	Rafael G. Martins <rafael@rafaelmartins.eng.br>	2016-09-03 20:28:55 +0200
commit	74ca21a41bcb5a49d19e65c9ba88f1f864cb7095 (patch)
tree	4774587e47abc0ff20453abbf714b63c36697f26 /src/content-parser.c
parent	30ae5fd4f65f48009e6956e42ccc2c9d9ad80901 (diff)
download	blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.gz blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.tar.bz2 blogc-74ca21a41bcb5a49d19e65c9ba88f1f864cb7095.zip