aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/content-parser.c296
-rw-r--r--src/content-parser.h18
2 files changed, 314 insertions, 0 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
new file mode 100644
index 0000000..5448d96
--- /dev/null
+++ b/src/content-parser.c
@@ -0,0 +1,296 @@
+/*
+ * blogc: A blog compiler.
+ * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br>
+ *
+ * This program can be distributed under the terms of the BSD License.
+ * See the file COPYING.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "utils/utils.h"
+#include "content-parser.h"
+#include "error.h"
+
+
+// this is a half ass implementation of a markdown-like syntax. bugs are
+// expected. feel free to improve the parser and and new features.
+
+
+// TODO: block elements: list, horizontal rule
+// TODO: inline elements: links, emphasis, code, images
+// TODO: error handling
+
+
+typedef enum {
+ CONTENT_START_LINE = 1,
+ CONTENT_HEADER,
+ CONTENT_HEADER_TITLE_START,
+ CONTENT_HEADER_TITLE,
+ CONTENT_HTML,
+ CONTENT_HTML_END,
+ CONTENT_BLOCKQUOTE,
+ CONTENT_BLOCKQUOTE_START,
+ CONTENT_BLOCKQUOTE_END,
+ CONTENT_CODE,
+ CONTENT_CODE_START,
+ CONTENT_CODE_END,
+ CONTENT_PARAGRAPH,
+ CONTENT_PARAGRAPH_END,
+} blogc_content_parser_state_t;
+
+
+char*
+blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
+{
+ if (err == NULL || *err != NULL)
+ return NULL;
+
+ size_t current = 0;
+ size_t start = 0;
+ size_t end = 0;
+
+ unsigned int header_level = 0;
+ char *prefix = NULL;
+ char *tmp = NULL;
+
+ b_slist_t *lines = NULL;
+
+ b_string_t *rv = b_string_new();
+ b_string_t *tmp_str = NULL;
+
+ blogc_content_parser_state_t state = CONTENT_START_LINE;
+
+ while (current < src_len) {
+ char c = src[current];
+ bool is_last = current == src_len - 1;
+
+ switch (state) {
+
+ case CONTENT_START_LINE:
+ if (c == '\n' || c == '\r')
+ break;
+ if (c == '#') {
+ header_level = 1;
+ state = CONTENT_HEADER;
+ break;
+ }
+ if (c == ' ' || c == '\t') {
+ state = CONTENT_CODE;
+ start = current;
+ break;
+ }
+ if (c == '<') {
+ state = CONTENT_HTML;
+ start = current;
+ break;
+ }
+ if (c == '>') {
+ state = CONTENT_BLOCKQUOTE;
+ start = current;
+ break;
+ }
+ start = current;
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_HEADER:
+ if (c == '#') {
+ header_level += 1;
+ break;
+ }
+ if (c == ' ' || c == '\t') {
+ state = CONTENT_HEADER_TITLE_START;
+ break;
+ }
+ // error;
+ break;
+
+ case CONTENT_HEADER_TITLE_START:
+ if (c == ' ' || c == '\t')
+ break;
+ if (c != '\n' || c != '\r') {
+ start = current;
+ state = CONTENT_HEADER_TITLE;
+ break;
+ }
+ // error;
+ break;
+
+ case CONTENT_HEADER_TITLE:
+ if (c == '\n' || c == '\r' || is_last) {
+ end = is_last ? src_len : current;
+ tmp = b_strndup(src + start, end - start);
+ b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level,
+ tmp, header_level);
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ break;
+ }
+ break;
+
+
+ case CONTENT_HTML:
+ if (c == '\n' || c == '\r' || is_last) {
+ state = CONTENT_HTML_END;
+ end = is_last ? src_len : current;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_HTML_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ tmp = b_strndup(src + start, end - start);
+ b_string_append_printf(rv, "%s\n", tmp);
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else
+ state = CONTENT_HTML;
+ break;
+
+ case CONTENT_BLOCKQUOTE:
+ if (c == ' ' || c == '\t')
+ break;
+ prefix = b_strndup(src + start, current - start);
+ state = CONTENT_BLOCKQUOTE_START;
+
+ case CONTENT_BLOCKQUOTE_START:
+ if (c == '\n' || c == '\r' || is_last) {
+ end = is_last ? src_len : current;
+ tmp = b_strndup(src + start, end - start);
+ if (b_str_starts_with(tmp, prefix)) {
+ lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+ }
+ else {
+ // error
+ }
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_BLOCKQUOTE_END;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_BLOCKQUOTE_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ tmp_str = b_string_new();
+ for (b_slist_t *l = lines; l != NULL; l = l->next) {
+ if (l->next == NULL)
+ b_string_append_printf(tmp_str, "%s", l->data);
+ else
+ b_string_append_printf(tmp_str, "%s\n", l->data);
+ }
+ tmp = blogc_content_parse(tmp_str->str, tmp_str->len, err);
+ if (*err == NULL) {
+ b_string_append_printf(rv, "<blockquote>%s</blockquote>\n",
+ tmp);
+ }
+ free(tmp);
+ tmp = NULL;
+ b_string_free(tmp_str, true);
+ tmp_str = NULL;
+ b_slist_free_full(lines, free);
+ lines = NULL;
+ free(prefix);
+ prefix = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else {
+ start = current;
+ state = CONTENT_BLOCKQUOTE_START;
+ }
+ break;
+
+ case CONTENT_CODE:
+ if (c == ' ' || c == '\t')
+ break;
+ prefix = b_strndup(src + start, current - start);
+ state = CONTENT_CODE_START;
+
+ case CONTENT_CODE_START:
+ if (c == '\n' || c == '\r' || is_last) {
+ end = is_last ? src_len : current;
+ tmp = b_strndup(src + start, end - start);
+ if (b_str_starts_with(tmp, prefix)) {
+ lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix)));
+ }
+ else {
+ // error
+ }
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_CODE_END;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_CODE_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ b_string_append(rv, "<pre><code>");
+ for (b_slist_t *l = lines; l != NULL; l = l->next) {
+ if (l->next == NULL)
+ b_string_append_printf(rv, "%s", l->data);
+ else
+ b_string_append_printf(rv, "%s\n", l->data);
+ }
+ b_string_append(rv, "</code></pre>\n");
+ b_slist_free_full(lines, free);
+ lines = NULL;
+ free(prefix);
+ prefix = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else {
+ start = current;
+ state = CONTENT_CODE_START;
+ }
+ break;
+
+ case CONTENT_PARAGRAPH:
+ if (c == '\n' || c == '\r' || is_last) {
+ state = CONTENT_PARAGRAPH_END;
+ end = is_last ? src_len : current;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_PARAGRAPH_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ tmp = b_strndup(src + start, end - start);
+ b_string_append_printf(rv, "<p>%s</p>\n", tmp);
+ free(tmp);
+ tmp = NULL;
+ state = CONTENT_START_LINE;
+ start = current;
+ }
+ else
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ }
+
+ if (*err != NULL)
+ break;
+
+ current++;
+ }
+
+ if (*err != NULL) {
+ b_string_free(rv, true);
+ return NULL;
+ }
+
+ return b_string_free(rv, false);
+}
diff --git a/src/content-parser.h b/src/content-parser.h
new file mode 100644
index 0000000..0a55fd9
--- /dev/null
+++ b/src/content-parser.h
@@ -0,0 +1,18 @@
+/*
+ * blogc: A blog compiler.
+ * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br>
+ *
+ * This program can be distributed under the terms of the BSD License.
+ * See the file COPYING.
+ */
+
+#ifndef _CONTENT_PARSER_H
+#define _CONTENT_PARSER_H
+
+#include <stdlib.h>
+#include "error.h"
+
+char* blogc_content_parse(const char *src, size_t src_len,
+ blogc_error_t **err);
+
+#endif /* _CONTENT_PARSER_H */