diff options
| author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-05-04 05:40:53 -0300 | 
|---|---|---|
| committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-05-04 05:40:53 -0300 | 
| commit | 9d484725cf9874ff3f34c61af5f57f6b92e05c00 (patch) | |
| tree | b3c5b2ca63d108157ac69fe18b0ecabacf93920b | |
| parent | edb7d515d6f3c41196c1dcb36d719baad1a85bc9 (diff) | |
| download | blogc-9d484725cf9874ff3f34c61af5f57f6b92e05c00.tar.gz blogc-9d484725cf9874ff3f34c61af5f57f6b92e05c00.tar.bz2 blogc-9d484725cf9874ff3f34c61af5f57f6b92e05c00.zip | |
started implementint a markdown-like syntax for content
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Makefile.am | 20 | ||||
| -rw-r--r-- | src/content-parser.c | 296 | ||||
| -rw-r--r-- | src/content-parser.h | 18 | ||||
| -rw-r--r-- | tests/check_content_parser.c | 90 | 
5 files changed, 425 insertions, 0 deletions
| @@ -41,6 +41,7 @@ Makefile.in  /blogc  # tests +/tests/check_content_parser  /tests/check_error  /tests/check_loader  /tests/check_renderer diff --git a/Makefile.am b/Makefile.am index 0f5c5e8..4537fcb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,6 +19,7 @@ CLEANFILES = \  	$(NULL)  noinst_HEADERS = \ +	src/content-parser.h \  	src/error.h \  	src/loader.h \  	src/renderer.h \ @@ -43,6 +44,7 @@ check_PROGRAMS = \  libblogc_la_SOURCES = \ +	src/content-parser.c \  	src/error.c \  	src/loader.c \  	src/renderer.c \ @@ -82,6 +84,7 @@ blogc_LDADD = \  if USE_CMOCKA  check_PROGRAMS += \ +	tests/check_content_parser \  	tests/check_error \  	tests/check_loader \  	tests/check_renderer \ @@ -124,6 +127,23 @@ tests_check_loader_LDADD = \  	libblogc.la \  	$(NULL) +tests_check_content_parser_SOURCES = \ +	tests/check_content_parser.c \ +	$(NULL) + +tests_check_content_parser_CFLAGS = \ +	$(CMOCKA_CFLAGS) \ +	$(NULL) + +tests_check_content_parser_LDFLAGS = \ +	-no-install \ +	$(NULL) + +tests_check_content_parser_LDADD = \ +	$(CMOCKA_LIBS) \ +	libblogc.la \ +	$(NULL) +  tests_check_renderer_SOURCES = \  	tests/check_renderer.c \  	$(NULL) diff --git a/src/content-parser.c b/src/content-parser.c new file mode 100644 index 0000000..5448d96 --- /dev/null +++ b/src/content-parser.c @@ -0,0 +1,296 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdbool.h> +#include <string.h> + +#include "utils/utils.h" +#include "content-parser.h" +#include "error.h" + + +// this is a half ass implementation of a markdown-like syntax. bugs are +// expected. feel free to improve the parser and and new features. + + +// TODO: block elements: list, horizontal rule +// TODO: inline elements: links, emphasis, code, images +// TODO: error handling + + +typedef enum { +    CONTENT_START_LINE = 1, +    CONTENT_HEADER, +    CONTENT_HEADER_TITLE_START, +    CONTENT_HEADER_TITLE, +    CONTENT_HTML, +    CONTENT_HTML_END, +    CONTENT_BLOCKQUOTE, +    CONTENT_BLOCKQUOTE_START, +    CONTENT_BLOCKQUOTE_END, +    CONTENT_CODE, +    CONTENT_CODE_START, +    CONTENT_CODE_END, +    CONTENT_PARAGRAPH, +    CONTENT_PARAGRAPH_END, +} blogc_content_parser_state_t; + + +char* +blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err) +{ +    if (err == NULL || *err != NULL) +        return NULL; + +    size_t current = 0; +    size_t start = 0; +    size_t end = 0; + +    unsigned int header_level = 0; +    char *prefix = NULL; +    char *tmp = NULL; + +    b_slist_t *lines = NULL; + +    b_string_t *rv = b_string_new(); +    b_string_t *tmp_str = NULL; + +    blogc_content_parser_state_t state = CONTENT_START_LINE; + +    while (current < src_len) { +        char c = src[current]; +        bool is_last = current == src_len - 1; + +        switch (state) { + +            case CONTENT_START_LINE: +                if (c == '\n' || c == '\r') +                    break; +                if (c == '#') { +                    header_level = 1; +                    state = CONTENT_HEADER; +                    break; +                } +                if (c == ' ' || c == '\t') { +                    state = CONTENT_CODE; +                    start = current; +                    break; +                } +                if (c == '<') { +                    state = CONTENT_HTML; +                    start = current; +                    break; +                } +                if (c == '>') { +                    state = CONTENT_BLOCKQUOTE; +                    start = current; +                    break; +                } +                start = current; +                state = CONTENT_PARAGRAPH; +                break; + +            case CONTENT_HEADER: +                if (c == '#') { +                    header_level += 1; +                    break; +                } +                if (c == ' ' || c == '\t') { +                    state = CONTENT_HEADER_TITLE_START; +                    break; +                } +                // error; +                break; + +            case CONTENT_HEADER_TITLE_START: +                if (c == ' ' || c == '\t') +                    break; +                if (c != '\n' || c != '\r') { +                    start = current; +                    state = CONTENT_HEADER_TITLE; +                    break; +                } +                // error; +                break; + +            case CONTENT_HEADER_TITLE: +                if (c == '\n' || c == '\r' || is_last) { +                    end = is_last ? src_len : current; +                    tmp = b_strndup(src + start, end - start); +                    b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level, +                        tmp, header_level); +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                    break; +                } +                break; + + +            case CONTENT_HTML: +                if (c == '\n' || c == '\r' || is_last) { +                    state = CONTENT_HTML_END; +                    end = is_last ? src_len : current; +                } +                if (!is_last) +                    break; + +            case CONTENT_HTML_END: +                if (c == '\n' || c == '\r' || is_last) { +                    tmp = b_strndup(src + start, end - start); +                    b_string_append_printf(rv, "%s\n", tmp); +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else +                    state = CONTENT_HTML; +                break; + +            case CONTENT_BLOCKQUOTE: +                if (c == ' ' || c == '\t') +                    break; +                prefix = b_strndup(src + start, current - start); +                state = CONTENT_BLOCKQUOTE_START; + +            case CONTENT_BLOCKQUOTE_START: +                if (c == '\n' || c == '\r' || is_last) { +                    end = is_last ? src_len : current; +                    tmp = b_strndup(src + start, end - start); +                    if (b_str_starts_with(tmp, prefix)) { +                        lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); +                    } +                    else { +                        // error +                    } +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_BLOCKQUOTE_END; +                } +                if (!is_last) +                    break; + +            case CONTENT_BLOCKQUOTE_END: +                if (c == '\n' || c == '\r' || is_last) { +                    tmp_str = b_string_new(); +                    for (b_slist_t *l = lines; l != NULL; l = l->next) { +                        if (l->next == NULL) +                            b_string_append_printf(tmp_str, "%s", l->data); +                        else +                            b_string_append_printf(tmp_str, "%s\n", l->data); +                    } +                    tmp = blogc_content_parse(tmp_str->str, tmp_str->len, err); +                    if (*err == NULL) { +                        b_string_append_printf(rv, "<blockquote>%s</blockquote>\n", +                            tmp); +                    } +                    free(tmp); +                    tmp = NULL; +                    b_string_free(tmp_str, true); +                    tmp_str = NULL; +                    b_slist_free_full(lines, free); +                    lines = NULL; +                    free(prefix); +                    prefix = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else { +                    start = current; +                    state = CONTENT_BLOCKQUOTE_START; +                } +                break; + +            case CONTENT_CODE: +                if (c == ' ' || c == '\t') +                    break; +                prefix = b_strndup(src + start, current - start); +                state = CONTENT_CODE_START; + +            case CONTENT_CODE_START: +                if (c == '\n' || c == '\r' || is_last) { +                    end = is_last ? src_len : current; +                    tmp = b_strndup(src + start, end - start); +                    if (b_str_starts_with(tmp, prefix)) { +                        lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); +                    } +                    else { +                        // error +                    } +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_CODE_END; +                } +                if (!is_last) +                    break; + +            case CONTENT_CODE_END: +                if (c == '\n' || c == '\r' || is_last) { +                    b_string_append(rv, "<pre><code>"); +                    for (b_slist_t *l = lines; l != NULL; l = l->next) { +                        if (l->next == NULL) +                            b_string_append_printf(rv, "%s", l->data); +                        else +                            b_string_append_printf(rv, "%s\n", l->data); +                    } +                    b_string_append(rv, "</code></pre>\n"); +                    b_slist_free_full(lines, free); +                    lines = NULL; +                    free(prefix); +                    prefix = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else { +                    start = current; +                    state = CONTENT_CODE_START; +                } +                break; + +            case CONTENT_PARAGRAPH: +                if (c == '\n' || c == '\r' || is_last) { +                    state = CONTENT_PARAGRAPH_END; +                    end = is_last ? src_len : current; +                } +                if (!is_last) +                    break; + +            case CONTENT_PARAGRAPH_END: +                if (c == '\n' || c == '\r' || is_last) { +                    tmp = b_strndup(src + start, end - start); +                    b_string_append_printf(rv, "<p>%s</p>\n", tmp); +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else +                    state = CONTENT_PARAGRAPH; +                break; + +        } + +        if (*err != NULL) +            break; + +        current++; +    } + +    if (*err != NULL) { +        b_string_free(rv, true); +        return NULL; +    } + +    return b_string_free(rv, false); +} diff --git a/src/content-parser.h b/src/content-parser.h new file mode 100644 index 0000000..0a55fd9 --- /dev/null +++ b/src/content-parser.h @@ -0,0 +1,18 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifndef _CONTENT_PARSER_H +#define _CONTENT_PARSER_H + +#include <stdlib.h> +#include "error.h" + +char* blogc_content_parse(const char *src, size_t src_len, +    blogc_error_t **err); + +#endif /* _CONTENT_PARSER_H */ diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c new file mode 100644 index 0000000..86ba8d8 --- /dev/null +++ b/tests/check_content_parser.c @@ -0,0 +1,90 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file COPYING. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdarg.h> +#include <stddef.h> +#include <setjmp.h> +#include <cmocka.h> +#include <string.h> +#include "../src/content-parser.h" +#include "../src/error.h" +#include "../src/utils/utils.h" + + +static void +test_content_parse(void **state) +{ +    const char *a = +        "# um\n" +        "## dois\n" +        "### tres\n" +        "#### quatro\n" +        "##### cinco\n" +        "###### seis\n" +        "\n" +        "bola\n" +        "chunda\n" +        "\n" +        ">  bola\n" +        ">  guda\n" +        ">  buga\n" +        ">  \n" +        ">    asd\n" +        "\n" +        "    bola\n" +        "     asd\n" +        "    qwewer\n" +        "\n" +        "<style>\n" +        "   chunda\n" +        "</style>\n" +        "\n" +        "guda\n" +        "yay"; +    blogc_error_t *err = NULL; +    char *html = blogc_content_parse(a, strlen(a), &err); +    assert_null(err); +    assert_non_null(html); +    assert_string_equal(html, +        "<h1>um</h1>\n" +        "<h2>dois</h2>\n" +        "<h3>tres</h3>\n" +        "<h4>quatro</h4>\n" +        "<h5>cinco</h5>\n" +        "<h6>seis</h6>\n" +        "<p>bola\n" +        "chunda</p>\n" +        "<blockquote><p>bola\n" +        "guda\n" +        "buga</p>\n" +        "<pre><code>asd</code></pre>\n" +        "</blockquote>\n" +        "<pre><code>bola\n" +        " asd\n" +        "qwewer</code></pre>\n" +        "<style>\n" +        "   chunda\n" +        "</style>\n" +        "<p>guda\n" +        "yay</p>\n"); +    free(html); +} + + +int +main(void) +{ +    const UnitTest tests[] = { +        unit_test(test_content_parse), +    }; +    return run_tests(tests); +} | 
