diff options
author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-11-02 03:58:09 -0200 |
---|---|---|
committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2015-11-02 03:58:09 -0200 |
commit | 1faa52052624e7c03256df0c63c43f5d40ddb57a (patch) | |
tree | ff53b56d31231f9f05fd3ff9f518f7f90f90bea0 /src/content-parser.c | |
parent | 5b869700957af87f672f4be6336a8f587450ec14 (diff) | |
download | blogc-1faa52052624e7c03256df0c63c43f5d40ddb57a.tar.gz blogc-1faa52052624e7c03256df0c63c43f5d40ddb57a.tar.bz2 blogc-1faa52052624e7c03256df0c63c43f5d40ddb57a.zip |
content-parser: added basic rst-like directives support
this patch adds support to something similar to reStructuredText
directives [1]. the directive loader isn't implemented yet. also,
the current implementation is stricter and differs a lot from the
reStructuredText spec. documentation pending.
this patch also fixes a few old parser bugs.
[1] http://docutils.sourceforge.net/docs/ref/rst/directives.html
Diffstat (limited to 'src/content-parser.c')
-rw-r--r-- | src/content-parser.c | 246 |
1 files changed, 227 insertions, 19 deletions
diff --git a/src/content-parser.c b/src/content-parser.c index 59e337e..1529ace 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -15,6 +15,7 @@ #include "utils/utils.h" #include "content-parser.h" +#include "directives.h" // this is a half ass implementation of a markdown-like syntax. bugs are // expected. feel free to improve the parser and add new features. @@ -43,7 +44,7 @@ blogc_slugify(const char *str) typedef enum { CONTENT_START_LINE = 1, - CONTENT_EXCERPT, + CONTENT_EXCERPT_OR_DIRECTIVE, CONTENT_EXCERPT_END, CONTENT_HEADER, CONTENT_HEADER_TITLE_START, @@ -64,6 +65,17 @@ typedef enum { CONTENT_ORDERED_LIST_SPACE, CONTENT_ORDERED_LIST_START, CONTENT_ORDERED_LIST_END, + CONTENT_DIRECTIVE_NAME_START, + CONTENT_DIRECTIVE_NAME, + CONTENT_DIRECTIVE_COLON, + CONTENT_DIRECTIVE_ARGUMENT_START, + CONTENT_DIRECTIVE_ARGUMENT, + CONTENT_DIRECTIVE_PARAM_PREFIX, + CONTENT_DIRECTIVE_PARAM_KEY_START, + CONTENT_DIRECTIVE_PARAM_KEY, + CONTENT_DIRECTIVE_PARAM_VALUE_START, + CONTENT_DIRECTIVE_PARAM_VALUE, + CONTENT_DIRECTIVE_PARAM_END, CONTENT_PARAGRAPH, CONTENT_PARAGRAPH_END, } blogc_content_parser_state_t; @@ -404,6 +416,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt) size_t end = 0; size_t eend = 0; size_t real_end = 0; + size_t spaces = 0; + + bool no_jump = false; unsigned int header_level = 0; char *prefix = NULL; @@ -413,6 +428,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt) char *parsed = NULL; char *slug = NULL; + char *directive_name = NULL; + char *directive_argument = NULL; + char *directive_key = NULL; + b_trie_t *directive_params = NULL; + // this isn't empty because we need some reasonable default value in the // unlikely case that we need to print some line ending before evaluating // the "real" value. @@ -463,11 +483,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt) break; start = current; if (c == '.') { - if (end_excerpt != NULL) { - eend = rv->len; // fuck it - state = CONTENT_EXCERPT; - break; - } + eend = rv->len; // fuck it + state = CONTENT_EXCERPT_OR_DIRECTIVE; + break; } if (c == '#') { header_level = 1; @@ -502,26 +520,29 @@ blogc_content_parse(const char *src, size_t *end_excerpt) state = CONTENT_PARAGRAPH; break; - case CONTENT_EXCERPT: - if (end_excerpt != NULL) { - if (c == '.') - break; - if (c == '\n' || c == '\r') { - state = CONTENT_EXCERPT_END; - break; - } + case CONTENT_EXCERPT_OR_DIRECTIVE: + if (c == '.') + break; + if (c == ' ' && current - start == 2) { + state = CONTENT_DIRECTIVE_NAME_START; + if (is_last) + goto para; + break; + } + if (c == '\n' || c == '\r') { + state = CONTENT_EXCERPT_END; + break; } eend = 0; state = CONTENT_PARAGRAPH; break; case CONTENT_EXCERPT_END: - if (end_excerpt != NULL) { - if (c == '\n' || c == '\r') { + if (c == '\n' || c == '\r') { + if (end_excerpt != NULL) *end_excerpt = eend; - state = CONTENT_START_LINE; - break; - } + state = CONTENT_START_LINE; + break; } eend = 0; state = CONTENT_PARAGRAPH_END; @@ -616,6 +637,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt) prefix = NULL; b_slist_free_full(lines, free); lines = NULL; + if (is_last) { + free(tmp); + tmp = NULL; + goto para; + } } free(tmp); tmp = NULL; @@ -677,6 +703,8 @@ blogc_content_parse(const char *src, size_t *end_excerpt) lines = NULL; free(tmp); tmp = NULL; + if (is_last) + goto para; break; } free(tmp); @@ -836,6 +864,8 @@ hr: break; } state = CONTENT_PARAGRAPH; + if (is_last) + goto para; break; case CONTENT_ORDERED_LIST_SPACE: @@ -937,6 +967,172 @@ hr: } break; + case CONTENT_DIRECTIVE_NAME_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_NAME; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_NAME: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + end = current; + state = CONTENT_DIRECTIVE_COLON; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_COLON: + if (c == ':') { + free(directive_name); + directive_name = b_strndup(src + start2, end - start2); + state = CONTENT_DIRECTIVE_ARGUMENT_START; + if (is_last) + goto param_end; + break; + } + if (is_last) + goto para; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_ARGUMENT_START: + if (c == ' ') { + if (is_last) + goto param_end; + break; + } + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + directive_argument = NULL; + if (is_last) + goto param_end; + break; + } + start2 = current; + state = CONTENT_DIRECTIVE_ARGUMENT; + break; + + case CONTENT_DIRECTIVE_ARGUMENT: + if (c == '\n' || c == '\r' || is_last) { + spaces = 0; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + free(directive_argument); + directive_argument = b_strndup(src + start2, end - start2); + if (is_last) + goto param_end; + } + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX: + if (c == ' ') { + spaces++; + break; + } + if ((c == '\n' || c == '\r') && spaces == 0) { + state = CONTENT_DIRECTIVE_PARAM_END; + if (is_last) + goto param_end; + break; + } + if (c == ':' && spaces == 3) { + state = CONTENT_DIRECTIVE_PARAM_KEY_START; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_KEY; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + free(directive_key); + directive_key = b_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_VALUE_START; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE_START: + if (is_last) + goto para; + if (c == ' ') + break; + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_VALUE; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + if (directive_params == NULL) + directive_params = b_trie_new(free); + b_trie_insert(directive_params, directive_key, + b_strndup(src + start2, end - start2)); + free(directive_key); + directive_key = NULL; + } + if (!is_last) + break; + + case CONTENT_DIRECTIVE_PARAM_END: +param_end: + if (c == '\n' || c == '\r' || is_last) { + char *rv_d = blogc_directive_loader(directive_name, + directive_argument, directive_params); + if (rv_d) + b_string_append(rv, rv_d); + free(rv_d); + state = CONTENT_START_LINE; + start = current; + free(directive_name); + directive_name = NULL; + free(directive_argument); + directive_argument = NULL; + b_trie_free(directive_params); + directive_params = NULL; + break; + } + if (c == ' ') { + start2 = current; + spaces = 1; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + case CONTENT_PARAGRAPH: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_PARAGRAPH_END; @@ -947,8 +1143,15 @@ hr: break; case CONTENT_PARAGRAPH_END: + no_jump = true; para: if (c == '\n' || c == '\r' || is_last) { + if (!no_jump && is_last) { + if (c == '\n' || c == '\r') + end = src_len - 1; + else + end = src_len; + } tmp = b_strndup(src + start, end - start); parsed = blogc_content_parse_inline(tmp); b_string_append_printf(rv, "<p>%s</p>%s", parsed, @@ -969,5 +1172,10 @@ para: current++; } + free(directive_name); + free(directive_argument); + free(directive_key); + b_trie_free(directive_params); + return b_string_free(rv, false); } |