5 files changed, 425 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index bf68a82..c08b333 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,6 +41,7 @@ Makefile.in
 /blogc
 
 # tests
+/tests/check_content_parser
 /tests/check_error
 /tests/check_loader
 /tests/check_renderer
diff --git a/Makefile.am b/Makefile.am
index 0f5c5e8..4537fcb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,6 +19,7 @@ CLEANFILES = \
 	$(NULL)
 
 noinst_HEADERS = \
+	src/content-parser.h \
 	src/error.h \
 	src/loader.h \
 	src/renderer.h \
@@ -43,6 +44,7 @@ check_PROGRAMS = \
 
 
 libblogc_la_SOURCES = \
+	src/content-parser.c \
 	src/error.c \
 	src/loader.c \
 	src/renderer.c \
@@ -82,6 +84,7 @@ blogc_LDADD = \
 if USE_CMOCKA
 
 check_PROGRAMS += \
+	tests/check_content_parser \
 	tests/check_error \
 	tests/check_loader \
 	tests/check_renderer \
@@ -124,6 +127,23 @@ tests_check_loader_LDADD = \
 	libblogc.la \
 	$(NULL)
 
+tests_check_content_parser_SOURCES = \
+	tests/check_content_parser.c \
+	$(NULL)
+
+tests_check_content_parser_CFLAGS = \
+	$(CMOCKA_CFLAGS) \
+	$(NULL)
+
+tests_check_content_parser_LDFLAGS = \
+	-no-install \
+	$(NULL)
+
+tests_check_content_parser_LDADD = \
+	$(CMOCKA_LIBS) \
+	libblogc.la \
+	$(NULL)
+
 tests_check_renderer_SOURCES = \
 	tests/check_renderer.c \
 	$(NULL)
diff --git a/src/content-parser.c b/src/content-parser.c
new file mode 100644
index 0000000..5448d96
--- /dev/null
+++ b/src/content-parser.c
@@ -0,0 +1,296 @@
+/*
+ * blogc: A blog compiler.
+ * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br>
+ *
+ * This program can be distributed under the terms of the BSD License.
+ * See the file COPYING.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "utils/utils.h"
+#include "content-parser.h"
+#include "error.h"
+
+
+// this is a half ass implementation of a markdown-like syntax. bugs are
+// expected. feel free to improve the parser and and new features.
+
+
+// TODO: block elements: list, horizontal rule
+// TODO: inline elements: links, emphasis, code, images
+// TODO: error handling
+
+
+typedef enum {
+    CONTENT_START_LINE = 1,
+    CONTENT_HEADER,
+    CONTENT_HEADER_TITLE_START,
+    CONTENT_HEADER_TITLE,
+    CONTENT_HTML,
+    CONTENT_HTML_END,
+    CONTENT_BLOCKQUOTE,
+    CONTENT_BLOCKQUOTE_START,
+    CONTENT_BLOCKQUOTE_END,
+    CONTENT_CODE,
+    CONTENT_CODE_START,
+    CONTENT_CODE_END,
+    CONTENT_PARAGRAPH,
+    CONTENT_PARAGRAPH_END,
+} blogc_content_parser_state_t;
+
+
+char*
+blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
+{
+    if (err == NULL || *err != NULL)
+        return NULL;
+
+    size_t current = 0;
+    size_t start = 0;
+    size_t end = 0;
+
+    unsigned int header_level = 0;
+    char *prefix = NULL;
+    char *tmp = NULL;
+
+    b_slist_t *lines = NULL;
+
+    b_string_t *rv = b_string_new();
+    b_string_t *tmp_str = NULL;
+
+    blogc_content_parser_state_t state = CONTENT_START_LINE;
+
+    while (current < src_len) {
+        char c = src[current];
+        bool is_last = current == src_len - 1;
+
+        switch (state) {
+
+            case CONTENT_START_LINE:
+                if (c == '\n' || c == '\r')
+                    break;
+                if (c == '#') {
+                    header_level = 1;
+                    state = CONTENT_HEADER;
+                    break;
+                }
+                if (c == ' ' || c == '\t') {
+                    state = CONTENT_CODE;
+                    start = current;
+                    break;
+                }
+                if (c == '<') {
+                    state = CONTENT_HTML;
+                    start = current;
+                    break;
+                }
+                if (c == '>') {
+                    state = CONTENT_BLOCKQUOTE;
+                    start = current;
+                    break;
+                }
+                start = current;
+                state = CONTENT_PARAGRAPH;
+                break;
+
+            case CONTENT_HEADER:
+                if (c == '#') {
+                    header_level += 1;
+                    break;
+                }
+                if (c == ' ' || c == '\t') {
+                    state = CONTENT_HEADER_TITLE_START;
+                    break;
+                }
+                // error;
+                break;
+
+            case CONTENT_HEADER_TITLE_START:
+                if (c == ' ' || c == '\t')
+                    break;
+                if (c != '\n' || c != '\r') {
+                    start = current;
+                    state = CONTENT_HEADER_TITLE;
+                    break;
+                }
+                // error;
+                break;
+
+            case CONTENT_HEADER_TITLE:
+                if (c == '\n' || c == '\r' || is_last) {
+                    end = is_last ? src_len : current;
+                    tmp = b_strndup(src + start, end - start);
+                    b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level,
+                        tmp, header_level);
+                    free(tmp);
+                    tmp = NULL;
+                    state = CONTENT_START_LINE;
+                    start = current;
+                    break;
+                }
+                break;
+
+
+            case CONTENT_HTML:
+                if (c == '\n' || c == '\r' || is_last) {
+                    state = CONTENT_HTML_END;
+                    end = is_last ? src_len : current;
+                }
+                if (!is_last)
+                    break;
+
+            case CONTENT_HTML_END:
+                if (c == '\n' || c == '\r' || is_last) {
+                    tmp = b_strndup(src + start, end - start);
+                    b_string_append_printf(rv, "%s\n", tmp);
+                    free(tmp);
+                    tmp = NULL;
+                    state = CONTENT_START_LINE;
+                    start = current;
+                }
+                else
+                    state = CONTENT_HTML;
+                break;
+
+            case CONTENT_BLOCKQUOTE:
+                if (c == ' ' || c == '\t')
+                    break;
+                prefix = b_strndup(src + start, current - start);
+                state = CONTENT_BLOCKQUOTE_START;
+
+            case CONTENT_BLOCKQUOTE_START:
+                if (c == '\n' || c == '\r' || is_last) {
+                    end = is_last ? src_len : current;
+                    tmp = b_strndup(src + start, end - start);
+                    if (b_str_starts_with(tmp, prefix)) {
+                        lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+                    }
+                    else {
+                        // error
+                    }
+                    free(tmp);
+                    tmp = NULL;
+                    state = CONTENT_BLOCKQUOTE_END;
+                }
+                if (!is_last)
+                    break;
+
+            case CONTENT_BLOCKQUOTE_END:
+                if (c == '\n' || c == '\r' || is_last) {
+                    tmp_str = b_string_new();
+                    for (b_slist_t *l = lines; l != NULL; l = l->next) {
+                        if (l->next == NULL)
+                            b_string_append_printf(tmp_str, "%s", l->data);
+                        else
+                            b_string_append_printf(tmp_str, "%s\n", l->data);
+                    }
+                    tmp = blogc_content_parse(tmp_str->str, tmp_str->len, err);
+                    if (*err == NULL) {
+                        b_string_append_printf(rv, "<blockquote>%s</blockquote>\n",
+                            tmp);
+                    }
+                    free(tmp);
+                    tmp = NULL;
+                    b_string_free(tmp_str, true);
+                    tmp_str = NULL;
+                    b_slist_free_full(lines, free);
+                    lines = NULL;
+                    free(prefix);
+                    prefix = NULL;
+                    state = CONTENT_START_LINE;
+                    start = current;
+                }
+                else {
+                    start = current;
+                    state = CONTENT_BLOCKQUOTE_START;
+                }
+                break;
+
+            case CONTENT_CODE:
+                if (c == ' ' || c == '\t')
+                    break;
+                prefix = b_strndup(src + start, current - start);
+                state = CONTENT_CODE_START;
+
+            case CONTENT_CODE_START:
+                if (c == '\n' || c == '\r' || is_last) {
+                    end = is_last ? src_len : current;
+                    tmp = b_strndup(src + start, end - start);
+                    if (b_str_starts_with(tmp, prefix)) {
+                        lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+                    }
+                    else {
+                        // error
+                    }
+                    free(tmp);
+                    tmp = NULL;
+                    state = CONTENT_CODE_END;
+                }
+                if (!is_last)
+                    break;
+
+            case CONTENT_CODE_END:
+                if (c == '\n' || c == '\r' || is_last) {
+                    b_string_append(rv, "<pre><code>");
+                    for (b_slist_t *l = lines; l != NULL; l = l->next) {
+                        if (l->next == NULL)
+                            b_string_append_printf(rv, "%s", l->data);
+                        else
+                            b_string_append_printf(rv, "%s\n", l->data);
+                    }
+                    b_string_append(rv, "</code></pre>\n");
+                    b_slist_free_full(lines, free);
+                    lines = NULL;
+                    free(prefix);
+                    prefix = NULL;
+                    state = CONTENT_START_LINE;
+                    start = current;
+                }
+                else {
+                    start = current;
+                    state = CONTENT_CODE_START;
+                }
+                break;
+
+            case CONTENT_PARAGRAPH:
+                if (c == '\n' || c == '\r' || is_last) {
+                    state = CONTENT_PARAGRAPH_END;
+                    end = is_last ? src_len : current;
+                }
+                if (!is_last)
+                    break;
+
+            case CONTENT_PARAGRAPH_END:
+                if (c == '\n' || c == '\r' || is_last) {
+                    tmp = b_strndup(src + start, end - start);
+                    b_string_append_printf(rv, "<p>%s</p>\n", tmp);
+                    free(tmp);
+                    tmp = NULL;
+                    state = CONTENT_START_LINE;
+                    start = current;
+                }
+                else
+                    state = CONTENT_PARAGRAPH;
+                break;
+
+        }
+
+        if (*err != NULL)
+            break;
+
+        current++;
+    }
+
+    if (*err != NULL) {
+        b_string_free(rv, true);
+        return NULL;
+    }
+
+    return b_string_free(rv, false);
+}
diff --git a/src/content-parser.h b/src/content-parser.h
new file mode 100644
index 0000000..0a55fd9
--- /dev/null
+++ b/src/content-parser.h
@@ -0,0 +1,18 @@
+/*
+ * blogc: A blog compiler.
+ * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br>
+ *
+ * This program can be distributed under the terms of the BSD License.
+ * See the file COPYING.
+ */
+
+#ifndef _CONTENT_PARSER_H
+#define _CONTENT_PARSER_H
+
+#include <stdlib.h>
+#include "error.h"
+
+char* blogc_content_parse(const char *src, size_t src_len,
+    blogc_error_t **err);
+
+#endif /* _CONTENT_PARSER_H */
diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c
new file mode 100644
index 0000000..86ba8d8
--- /dev/null
+++ b/tests/check_content_parser.c
@@ -0,0 +1,90 @@
+/*
+ * blogc: A blog compiler.
+ * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br>
+ *
+ * This program can be distributed under the terms of the BSD License.
+ * See the file COPYING.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+#include <string.h>
+#include "../src/content-parser.h"
+#include "../src/error.h"
+#include "../src/utils/utils.h"
+
+
+static void
+test_content_parse(void **state)
+{
+    const char *a =
+        "# um\n"
+        "## dois\n"
+        "### tres\n"
+        "#### quatro\n"
+        "##### cinco\n"
+        "###### seis\n"
+        "\n"
+        "bola\n"
+        "chunda\n"
+        "\n"
+        ">  bola\n"
+        ">  guda\n"
+        ">  buga\n"
+        ">  \n"
+        ">    asd\n"
+        "\n"
+        "    bola\n"
+        "     asd\n"
+        "    qwewer\n"
+        "\n"
+        "<style>\n"
+        "   chunda\n"
+        "</style>\n"
+        "\n"
+        "guda\n"
+        "yay";
+    blogc_error_t *err = NULL;
+    char *html = blogc_content_parse(a, strlen(a), &err);
+    assert_null(err);
+    assert_non_null(html);
+    assert_string_equal(html,
+        "<h1>um</h1>\n"
+        "<h2>dois</h2>\n"
+        "<h3>tres</h3>\n"
+        "<h4>quatro</h4>\n"
+        "<h5>cinco</h5>\n"
+        "<h6>seis</h6>\n"
+        "<p>bola\n"
+        "chunda</p>\n"
+        "<blockquote><p>bola\n"
+        "guda\n"
+        "buga</p>\n"
+        "<pre><code>asd</code></pre>\n"
+        "</blockquote>\n"
+        "<pre><code>bola\n"
+        " asd\n"
+        "qwewer</code></pre>\n"
+        "<style>\n"
+        "   chunda\n"
+        "</style>\n"
+        "<p>guda\n"
+        "yay</p>\n");
+    free(html);
+}
+
+
+int
+main(void)
+{
+    const UnitTest tests[] = {
+        unit_test(test_content_parse),
+    };
+    return run_tests(tests);
+}