diff options
Diffstat (limited to 'src/content-parser.c')
-rw-r--r-- | src/content-parser.c | 248 |
1 files changed, 229 insertions, 19 deletions
diff --git a/src/content-parser.c b/src/content-parser.c index f5450d6..8e98405 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -15,6 +15,7 @@ #include "utils/utils.h" #include "content-parser.h" +#include "directives.h" // this is a half ass implementation of a markdown-like syntax. bugs are // expected. feel free to improve the parser and add new features. @@ -43,7 +44,7 @@ blogc_slugify(const char *str) typedef enum { CONTENT_START_LINE = 1, - CONTENT_EXCERPT, + CONTENT_EXCERPT_OR_DIRECTIVE, CONTENT_EXCERPT_END, CONTENT_HEADER, CONTENT_HEADER_TITLE_START, @@ -64,6 +65,18 @@ typedef enum { CONTENT_ORDERED_LIST_SPACE, CONTENT_ORDERED_LIST_START, CONTENT_ORDERED_LIST_END, + CONTENT_DIRECTIVE_NAME_START, + CONTENT_DIRECTIVE_NAME, + CONTENT_DIRECTIVE_COLON, + CONTENT_DIRECTIVE_ARGUMENT_START, + CONTENT_DIRECTIVE_ARGUMENT, + CONTENT_DIRECTIVE_PARAM_PREFIX_START, + CONTENT_DIRECTIVE_PARAM_PREFIX, + CONTENT_DIRECTIVE_PARAM_KEY_START, + CONTENT_DIRECTIVE_PARAM_KEY, + CONTENT_DIRECTIVE_PARAM_VALUE_START, + CONTENT_DIRECTIVE_PARAM_VALUE, + CONTENT_DIRECTIVE_PARAM_END, CONTENT_PARAGRAPH, CONTENT_PARAGRAPH_END, } blogc_content_parser_state_t; @@ -415,6 +428,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt) char *parsed = NULL; char *slug = NULL; + char *directive_name = NULL; + char *directive_argument = NULL; + char *directive_key = NULL; + b_trie_t *directive_params = NULL; + // this isn't empty because we need some reasonable default value in the // unlikely case that we need to print some line ending before evaluating // the "real" value. @@ -465,11 +483,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt) break; start = current; if (c == '.') { - if (end_excerpt != NULL) { - eend = rv->len; // fuck it - state = CONTENT_EXCERPT; - break; - } + eend = rv->len; // fuck it + state = CONTENT_EXCERPT_OR_DIRECTIVE; + break; } if (c == '#') { header_level = 1; @@ -504,26 +520,29 @@ blogc_content_parse(const char *src, size_t *end_excerpt) state = CONTENT_PARAGRAPH; break; - case CONTENT_EXCERPT: - if (end_excerpt != NULL) { - if (c == '.') - break; - if (c == '\n' || c == '\r') { - state = CONTENT_EXCERPT_END; - break; - } + case CONTENT_EXCERPT_OR_DIRECTIVE: + if (c == '.') + break; + if ((c == ' ' || c == '\t') && current - start == 2) { + state = CONTENT_DIRECTIVE_NAME_START; + if (is_last) + goto para; + break; + } + if (c == '\n' || c == '\r') { + state = CONTENT_EXCERPT_END; + break; } eend = 0; state = CONTENT_PARAGRAPH; break; case CONTENT_EXCERPT_END: - if (end_excerpt != NULL) { - if (c == '\n' || c == '\r') { + if (c == '\n' || c == '\r') { + if (end_excerpt != NULL) *end_excerpt = eend; - state = CONTENT_START_LINE; - break; - } + state = CONTENT_START_LINE; + break; } eend = 0; state = CONTENT_PARAGRAPH_END; @@ -948,6 +967,191 @@ hr: } break; + case CONTENT_DIRECTIVE_NAME_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_NAME; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_NAME: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + end = current; + state = CONTENT_DIRECTIVE_COLON; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_COLON: + if (c == ':') { + free(directive_name); + directive_name = b_strndup(src + start2, end - start2); + state = CONTENT_DIRECTIVE_ARGUMENT_START; + if (is_last) + goto param_end; + break; + } + if (is_last) + goto para; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_ARGUMENT_START: + if (c == ' ' || c == '\t') { + if (is_last) + goto param_end; + break; + } + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX_START; + directive_argument = NULL; + if (is_last) + goto param_end; + else + start2 = current + 1; + break; + } + start2 = current; + state = CONTENT_DIRECTIVE_ARGUMENT; + break; + + case CONTENT_DIRECTIVE_ARGUMENT: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX_START; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + free(directive_argument); + directive_argument = b_strndup(src + start2, end - start2); + if (is_last) + goto param_end; + else + start2 = current + 1; + } + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX_START: + if (is_last) + goto para; + if (c == ' ' || c == '\t') + break; + if (c == '\n' || c == '\r') + goto param_end; + prefix = b_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + current--; + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX: + if (c == ' ' || c == '\t') + break; + if (c == ':' && b_str_starts_with(src + start2, prefix)) { + state = CONTENT_DIRECTIVE_PARAM_KEY_START; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY_START: + if (is_last) + goto para; + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_KEY; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY: + if (is_last) + goto para; + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + free(directive_key); + directive_key = b_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_VALUE_START; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE_START: + if (is_last) + goto para; + if (c == ' ' || c == '\t') + break; + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_VALUE; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + if (directive_params == NULL) + directive_params = b_trie_new(free); + b_trie_insert(directive_params, directive_key, + b_strndup(src + start2, end - start2)); + free(directive_key); + directive_key = NULL; + if (!is_last) + start2 = current + 1; + } + if (!is_last) + break; + + case CONTENT_DIRECTIVE_PARAM_END: +param_end: + if (c == '\n' || c == '\r' || is_last) { + // FIXME: handle errors in the rest of the parser. + blogc_error_t *err = NULL; + blogc_directive_ctx_t *ctx = b_malloc( + sizeof(blogc_directive_ctx_t)); + ctx->name = directive_name; + ctx->argument = directive_argument; + ctx->params = directive_params; + ctx->eol = line_ending; + char *rv_d = blogc_directive_loader(ctx, &err); + free(ctx); + blogc_error_print(err); + if (rv_d != NULL) + b_string_append(rv, rv_d); + free(rv_d); + state = CONTENT_START_LINE; + start = current; + free(directive_name); + directive_name = NULL; + free(directive_argument); + directive_argument = NULL; + b_trie_free(directive_params); + directive_params = NULL; + free(prefix); + prefix = NULL; + break; + } + if (c == ' ' || c == '\t') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + goto para; + break; + case CONTENT_PARAGRAPH: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_PARAGRAPH_END; @@ -987,5 +1191,11 @@ para: current++; } + free(directive_name); + free(directive_argument); + free(directive_key); + b_trie_free(directive_params); + free(prefix); + return b_string_free(rv, false); } |