diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile.am | 20 | ||||
-rw-r--r-- | src/content-parser.c | 296 | ||||
-rw-r--r-- | src/content-parser.h | 18 | ||||
-rw-r--r-- | tests/check_content_parser.c | 90 |
5 files changed, 425 insertions, 0 deletions
@@ -41,6 +41,7 @@ Makefile.in /blogc # tests +/tests/check_content_parser /tests/check_error /tests/check_loader /tests/check_renderer diff --git a/Makefile.am b/Makefile.am index 0f5c5e8..4537fcb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,6 +19,7 @@ CLEANFILES = \ $(NULL) noinst_HEADERS = \ + src/content-parser.h \ src/error.h \ src/loader.h \ src/renderer.h \ @@ -43,6 +44,7 @@ check_PROGRAMS = \ libblogc_la_SOURCES = \ + src/content-parser.c \ src/error.c \ src/loader.c \ src/renderer.c \ @@ -82,6 +84,7 @@ blogc_LDADD = \ if USE_CMOCKA check_PROGRAMS += \ + tests/check_content_parser \ tests/check_error \ tests/check_loader \ tests/check_renderer \ @@ -124,6 +127,23 @@ tests_check_loader_LDADD = \ libblogc.la \ $(NULL) +tests_check_content_parser_SOURCES = \ + tests/check_content_parser.c \ + $(NULL) + +tests_check_content_parser_CFLAGS = \ + $(CMOCKA_CFLAGS) \ + $(NULL) + +tests_check_content_parser_LDFLAGS = \ + -no-install \ + $(NULL) + +tests_check_content_parser_LDADD = \ + $(CMOCKA_LIBS) \ + libblogc.la \ + $(NULL) + tests_check_renderer_SOURCES = \ tests/check_renderer.c \ $(NULL) diff --git a/src/content-parser.c b/src/content-parser.c new file mode 100644 index 0000000..5448d96 --- /dev/null +++ b/src/content-parser.c @@ -0,0 +1,296 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdbool.h> +#include <string.h> + +#include "utils/utils.h" +#include "content-parser.h" +#include "error.h" + + +// this is a half ass implementation of a markdown-like syntax. bugs are +// expected. feel free to improve the parser and and new features. + + +// TODO: block elements: list, horizontal rule +// TODO: inline elements: links, emphasis, code, images +// TODO: error handling + + +typedef enum { + CONTENT_START_LINE = 1, + CONTENT_HEADER, + CONTENT_HEADER_TITLE_START, + CONTENT_HEADER_TITLE, + CONTENT_HTML, + CONTENT_HTML_END, + CONTENT_BLOCKQUOTE, + CONTENT_BLOCKQUOTE_START, + CONTENT_BLOCKQUOTE_END, + CONTENT_CODE, + CONTENT_CODE_START, + CONTENT_CODE_END, + CONTENT_PARAGRAPH, + CONTENT_PARAGRAPH_END, +} blogc_content_parser_state_t; + + +char* +blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + + size_t current = 0; + size_t start = 0; + size_t end = 0; + + unsigned int header_level = 0; + char *prefix = NULL; + char *tmp = NULL; + + b_slist_t *lines = NULL; + + b_string_t *rv = b_string_new(); + b_string_t *tmp_str = NULL; + + blogc_content_parser_state_t state = CONTENT_START_LINE; + + while (current < src_len) { + char c = src[current]; + bool is_last = current == src_len - 1; + + switch (state) { + + case CONTENT_START_LINE: + if (c == '\n' || c == '\r') + break; + if (c == '#') { + header_level = 1; + state = CONTENT_HEADER; + break; + } + if (c == ' ' || c == '\t') { + state = CONTENT_CODE; + start = current; + break; + } + if (c == '<') { + state = CONTENT_HTML; + start = current; + break; + } + if (c == '>') { + state = CONTENT_BLOCKQUOTE; + start = current; + break; + } + start = current; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_HEADER: + if (c == '#') { + header_level += 1; + break; + } + if (c == ' ' || c == '\t') { + state = CONTENT_HEADER_TITLE_START; + break; + } + // error; + break; + + case CONTENT_HEADER_TITLE_START: + if (c == ' ' || c == '\t') + break; + if (c != '\n' || c != '\r') { + start = current; + state = CONTENT_HEADER_TITLE; + break; + } + // error; + break; + + case CONTENT_HEADER_TITLE: + if (c == '\n' || c == '\r' || is_last) { + end = is_last ? src_len : current; + tmp = b_strndup(src + start, end - start); + b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level, + tmp, header_level); + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + break; + } + break; + + + case CONTENT_HTML: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_HTML_END; + end = is_last ? src_len : current; + } + if (!is_last) + break; + + case CONTENT_HTML_END: + if (c == '\n' || c == '\r' || is_last) { + tmp = b_strndup(src + start, end - start); + b_string_append_printf(rv, "%s\n", tmp); + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + } + else + state = CONTENT_HTML; + break; + + case CONTENT_BLOCKQUOTE: + if (c == ' ' || c == '\t') + break; + prefix = b_strndup(src + start, current - start); + state = CONTENT_BLOCKQUOTE_START; + + case CONTENT_BLOCKQUOTE_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last ? src_len : current; + tmp = b_strndup(src + start, end - start); + if (b_str_starts_with(tmp, prefix)) { + lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); + } + else { + // error + } + free(tmp); + tmp = NULL; + state = CONTENT_BLOCKQUOTE_END; + } + if (!is_last) + break; + + case CONTENT_BLOCKQUOTE_END: + if (c == '\n' || c == '\r' || is_last) { + tmp_str = b_string_new(); + for (b_slist_t *l = lines; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(tmp_str, "%s", l->data); + else + b_string_append_printf(tmp_str, "%s\n", l->data); + } + tmp = blogc_content_parse(tmp_str->str, tmp_str->len, err); + if (*err == NULL) { + b_string_append_printf(rv, "<blockquote>%s</blockquote>\n", + tmp); + } + free(tmp); + tmp = NULL; + b_string_free(tmp_str, true); + tmp_str = NULL; + b_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start = current; + } + else { + start = current; + state = CONTENT_BLOCKQUOTE_START; + } + break; + + case CONTENT_CODE: + if (c == ' ' || c == '\t') + break; + prefix = b_strndup(src + start, current - start); + state = CONTENT_CODE_START; + + case CONTENT_CODE_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last ? src_len : current; + tmp = b_strndup(src + start, end - start); + if (b_str_starts_with(tmp, prefix)) { + lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); + } + else { + // error + } + free(tmp); + tmp = NULL; + state = CONTENT_CODE_END; + } + if (!is_last) + break; + + case CONTENT_CODE_END: + if (c == '\n' || c == '\r' || is_last) { + b_string_append(rv, "<pre><code>"); + for (b_slist_t *l = lines; l != NULL; l = l->next) { + if (l->next == NULL) + b_string_append_printf(rv, "%s", l->data); + else + b_string_append_printf(rv, "%s\n", l->data); + } + b_string_append(rv, "</code></pre>\n"); + b_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start = current; + } + else { + start = current; + state = CONTENT_CODE_START; + } + break; + + case CONTENT_PARAGRAPH: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_PARAGRAPH_END; + end = is_last ? src_len : current; + } + if (!is_last) + break; + + case CONTENT_PARAGRAPH_END: + if (c == '\n' || c == '\r' || is_last) { + tmp = b_strndup(src + start, end - start); + b_string_append_printf(rv, "<p>%s</p>\n", tmp); + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + } + else + state = CONTENT_PARAGRAPH; + break; + + } + + if (*err != NULL) + break; + + current++; + } + + if (*err != NULL) { + b_string_free(rv, true); + return NULL; + } + + return b_string_free(rv, false); +} diff --git a/src/content-parser.h b/src/content-parser.h new file mode 100644 index 0000000..0a55fd9 --- /dev/null +++ b/src/content-parser.h @@ -0,0 +1,18 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifndef _CONTENT_PARSER_H +#define _CONTENT_PARSER_H + +#include <stdlib.h> +#include "error.h" + +char* blogc_content_parse(const char *src, size_t src_len, + blogc_error_t **err); + +#endif /* _CONTENT_PARSER_H */ diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c new file mode 100644 index 0000000..86ba8d8 --- /dev/null +++ b/tests/check_content_parser.c @@ -0,0 +1,90 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdarg.h> +#include <stddef.h> +#include <setjmp.h> +#include <cmocka.h> +#include <string.h> +#include "../src/content-parser.h" +#include "../src/error.h" +#include "../src/utils/utils.h" + + +static void +test_content_parse(void **state) +{ + const char *a = + "# um\n" + "## dois\n" + "### tres\n" + "#### quatro\n" + "##### cinco\n" + "###### seis\n" + "\n" + "bola\n" + "chunda\n" + "\n" + "> bola\n" + "> guda\n" + "> buga\n" + "> \n" + "> asd\n" + "\n" + " bola\n" + " asd\n" + " qwewer\n" + "\n" + "<style>\n" + " chunda\n" + "</style>\n" + "\n" + "guda\n" + "yay"; + blogc_error_t *err = NULL; + char *html = blogc_content_parse(a, strlen(a), &err); + assert_null(err); + assert_non_null(html); + assert_string_equal(html, + "<h1>um</h1>\n" + "<h2>dois</h2>\n" + "<h3>tres</h3>\n" + "<h4>quatro</h4>\n" + "<h5>cinco</h5>\n" + "<h6>seis</h6>\n" + "<p>bola\n" + "chunda</p>\n" + "<blockquote><p>bola\n" + "guda\n" + "buga</p>\n" + "<pre><code>asd</code></pre>\n" + "</blockquote>\n" + "<pre><code>bola\n" + " asd\n" + "qwewer</code></pre>\n" + "<style>\n" + " chunda\n" + "</style>\n" + "<p>guda\n" + "yay</p>\n"); + free(html); +} + + +int +main(void) +{ + const UnitTest tests[] = { + unit_test(test_content_parse), + }; + return run_tests(tests); +} |