From 1faa52052624e7c03256df0c63c43f5d40ddb57a Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" Date: Mon, 2 Nov 2015 03:58:09 -0200 Subject: content-parser: added basic rst-like directives support this patch adds support to something similar to reStructuredText directives [1]. the directive loader isn't implemented yet. also, the current implementation is stricter and differs a lot from the reStructuredText spec. documentation pending. this patch also fixes a few old parser bugs. [1] http://docutils.sourceforge.net/docs/ref/rst/directives.html --- src/content-parser.c | 246 +++++++++++++++++++++++++++++++++++++++++++++++---- src/directives.c | 21 +++++ src/directives.h | 17 ++++ 3 files changed, 265 insertions(+), 19 deletions(-) create mode 100644 src/directives.c create mode 100644 src/directives.h (limited to 'src') diff --git a/src/content-parser.c b/src/content-parser.c index 59e337e..1529ace 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -15,6 +15,7 @@ #include "utils/utils.h" #include "content-parser.h" +#include "directives.h" // this is a half ass implementation of a markdown-like syntax. bugs are // expected. feel free to improve the parser and add new features. @@ -43,7 +44,7 @@ blogc_slugify(const char *str) typedef enum { CONTENT_START_LINE = 1, - CONTENT_EXCERPT, + CONTENT_EXCERPT_OR_DIRECTIVE, CONTENT_EXCERPT_END, CONTENT_HEADER, CONTENT_HEADER_TITLE_START, @@ -64,6 +65,17 @@ typedef enum { CONTENT_ORDERED_LIST_SPACE, CONTENT_ORDERED_LIST_START, CONTENT_ORDERED_LIST_END, + CONTENT_DIRECTIVE_NAME_START, + CONTENT_DIRECTIVE_NAME, + CONTENT_DIRECTIVE_COLON, + CONTENT_DIRECTIVE_ARGUMENT_START, + CONTENT_DIRECTIVE_ARGUMENT, + CONTENT_DIRECTIVE_PARAM_PREFIX, + CONTENT_DIRECTIVE_PARAM_KEY_START, + CONTENT_DIRECTIVE_PARAM_KEY, + CONTENT_DIRECTIVE_PARAM_VALUE_START, + CONTENT_DIRECTIVE_PARAM_VALUE, + CONTENT_DIRECTIVE_PARAM_END, CONTENT_PARAGRAPH, CONTENT_PARAGRAPH_END, } blogc_content_parser_state_t; @@ -404,6 +416,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt) size_t end = 0; size_t eend = 0; size_t real_end = 0; + size_t spaces = 0; + + bool no_jump = false; unsigned int header_level = 0; char *prefix = NULL; @@ -413,6 +428,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt) char *parsed = NULL; char *slug = NULL; + char *directive_name = NULL; + char *directive_argument = NULL; + char *directive_key = NULL; + b_trie_t *directive_params = NULL; + // this isn't empty because we need some reasonable default value in the // unlikely case that we need to print some line ending before evaluating // the "real" value. @@ -463,11 +483,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt) break; start = current; if (c == '.') { - if (end_excerpt != NULL) { - eend = rv->len; // fuck it - state = CONTENT_EXCERPT; - break; - } + eend = rv->len; // fuck it + state = CONTENT_EXCERPT_OR_DIRECTIVE; + break; } if (c == '#') { header_level = 1; @@ -502,26 +520,29 @@ blogc_content_parse(const char *src, size_t *end_excerpt) state = CONTENT_PARAGRAPH; break; - case CONTENT_EXCERPT: - if (end_excerpt != NULL) { - if (c == '.') - break; - if (c == '\n' || c == '\r') { - state = CONTENT_EXCERPT_END; - break; - } + case CONTENT_EXCERPT_OR_DIRECTIVE: + if (c == '.') + break; + if (c == ' ' && current - start == 2) { + state = CONTENT_DIRECTIVE_NAME_START; + if (is_last) + goto para; + break; + } + if (c == '\n' || c == '\r') { + state = CONTENT_EXCERPT_END; + break; } eend = 0; state = CONTENT_PARAGRAPH; break; case CONTENT_EXCERPT_END: - if (end_excerpt != NULL) { - if (c == '\n' || c == '\r') { + if (c == '\n' || c == '\r') { + if (end_excerpt != NULL) *end_excerpt = eend; - state = CONTENT_START_LINE; - break; - } + state = CONTENT_START_LINE; + break; } eend = 0; state = CONTENT_PARAGRAPH_END; @@ -616,6 +637,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt) prefix = NULL; b_slist_free_full(lines, free); lines = NULL; + if (is_last) { + free(tmp); + tmp = NULL; + goto para; + } } free(tmp); tmp = NULL; @@ -677,6 +703,8 @@ blogc_content_parse(const char *src, size_t *end_excerpt) lines = NULL; free(tmp); tmp = NULL; + if (is_last) + goto para; break; } free(tmp); @@ -836,6 +864,8 @@ hr: break; } state = CONTENT_PARAGRAPH; + if (is_last) + goto para; break; case CONTENT_ORDERED_LIST_SPACE: @@ -937,6 +967,172 @@ hr: } break; + case CONTENT_DIRECTIVE_NAME_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_NAME; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_NAME: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + end = current; + state = CONTENT_DIRECTIVE_COLON; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_COLON: + if (c == ':') { + free(directive_name); + directive_name = b_strndup(src + start2, end - start2); + state = CONTENT_DIRECTIVE_ARGUMENT_START; + if (is_last) + goto param_end; + break; + } + if (is_last) + goto para; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_ARGUMENT_START: + if (c == ' ') { + if (is_last) + goto param_end; + break; + } + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + directive_argument = NULL; + if (is_last) + goto param_end; + break; + } + start2 = current; + state = CONTENT_DIRECTIVE_ARGUMENT; + break; + + case CONTENT_DIRECTIVE_ARGUMENT: + if (c == '\n' || c == '\r' || is_last) { + spaces = 0; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + free(directive_argument); + directive_argument = b_strndup(src + start2, end - start2); + if (is_last) + goto param_end; + } + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX: + if (c == ' ') { + spaces++; + break; + } + if ((c == '\n' || c == '\r') && spaces == 0) { + state = CONTENT_DIRECTIVE_PARAM_END; + if (is_last) + goto param_end; + break; + } + if (c == ':' && spaces == 3) { + state = CONTENT_DIRECTIVE_PARAM_KEY_START; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_KEY; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + free(directive_key); + directive_key = b_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_VALUE_START; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE_START: + if (is_last) + goto para; + if (c == ' ') + break; + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_VALUE; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + if (directive_params == NULL) + directive_params = b_trie_new(free); + b_trie_insert(directive_params, directive_key, + b_strndup(src + start2, end - start2)); + free(directive_key); + directive_key = NULL; + } + if (!is_last) + break; + + case CONTENT_DIRECTIVE_PARAM_END: +param_end: + if (c == '\n' || c == '\r' || is_last) { + char *rv_d = blogc_directive_loader(directive_name, + directive_argument, directive_params); + if (rv_d) + b_string_append(rv, rv_d); + free(rv_d); + state = CONTENT_START_LINE; + start = current; + free(directive_name); + directive_name = NULL; + free(directive_argument); + directive_argument = NULL; + b_trie_free(directive_params); + directive_params = NULL; + break; + } + if (c == ' ') { + start2 = current; + spaces = 1; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + case CONTENT_PARAGRAPH: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_PARAGRAPH_END; @@ -947,8 +1143,15 @@ hr: break; case CONTENT_PARAGRAPH_END: + no_jump = true; para: if (c == '\n' || c == '\r' || is_last) { + if (!no_jump && is_last) { + if (c == '\n' || c == '\r') + end = src_len - 1; + else + end = src_len; + } tmp = b_strndup(src + start, end - start); parsed = blogc_content_parse_inline(tmp); b_string_append_printf(rv, "

%s

%s", parsed, @@ -969,5 +1172,10 @@ para: current++; } + free(directive_name); + free(directive_argument); + free(directive_key); + b_trie_free(directive_params); + return b_string_free(rv, false); } diff --git a/src/directives.c b/src/directives.c new file mode 100644 index 0000000..7a6b47f --- /dev/null +++ b/src/directives.c @@ -0,0 +1,21 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include "utils/utils.h" + + +char* +blogc_directive_loader(const char *name, const char *argument, b_trie_t *params) +{ + // TODO: implement me! + return b_strdup("TODO\n"); +} diff --git a/src/directives.h b/src/directives.h new file mode 100644 index 0000000..fa758e2 --- /dev/null +++ b/src/directives.h @@ -0,0 +1,17 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015 Rafael G. Martins + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _DIRECTIVES_H +#define _DIRECTIVES_H + +#include "utils/utils.h" + +char* blogc_directive_loader(const char *name, const char *argument, + b_trie_t *params); + +#endif /* _DIRECTIVES_H */ -- cgit v1.2.3-18-g5258