diff options
Diffstat (limited to 'src/content-parser.c')
-rw-r--r-- | src/content-parser.c | 271 |
1 files changed, 253 insertions, 18 deletions
diff --git a/src/content-parser.c b/src/content-parser.c index 0c99e31..0561d20 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -14,6 +14,7 @@ #include <string.h> #include "content-parser.h" +#include "directives.h" #include "utils.h" // this is a half ass implementation of a markdown-like syntax. bugs are @@ -77,7 +78,7 @@ blogc_htmlentities(const char *str) typedef enum { CONTENT_START_LINE = 1, - CONTENT_EXCERPT, + CONTENT_EXCERPT_OR_DIRECTIVE, CONTENT_EXCERPT_END, CONTENT_HEADER, CONTENT_HEADER_TITLE_START, @@ -98,6 +99,18 @@ typedef enum { CONTENT_ORDERED_LIST_SPACE, CONTENT_ORDERED_LIST_START, CONTENT_ORDERED_LIST_END, + CONTENT_DIRECTIVE_NAME_START, + CONTENT_DIRECTIVE_NAME, + CONTENT_DIRECTIVE_COLON, + CONTENT_DIRECTIVE_ARGUMENT_START, + CONTENT_DIRECTIVE_ARGUMENT, + CONTENT_DIRECTIVE_PARAM_PREFIX_START, + CONTENT_DIRECTIVE_PARAM_PREFIX, + CONTENT_DIRECTIVE_PARAM_KEY_START, + CONTENT_DIRECTIVE_PARAM_KEY, + CONTENT_DIRECTIVE_PARAM_VALUE_START, + CONTENT_DIRECTIVE_PARAM_VALUE, + CONTENT_DIRECTIVE_PARAM_END, CONTENT_PARAGRAPH, CONTENT_PARAGRAPH_END, } blogc_content_parser_state_t; @@ -480,6 +493,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description) char *parsed = NULL; char *slug = NULL; + char *directive_name = NULL; + char *directive_argument = NULL; + char *directive_key = NULL; + sb_trie_t *directive_params = NULL; + // this isn't empty because we need some reasonable default value in the // unlikely case that we need to print some line ending before evaluating // the "real" value. @@ -530,11 +548,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description) break; start = current; if (c == '.') { - if (end_excerpt != NULL) { - eend = rv->len; // fuck it - state = CONTENT_EXCERPT; - break; - } + eend = rv->len; // fuck it + state = CONTENT_EXCERPT_OR_DIRECTIVE; + break; } if (c == '#') { header_level = 1; @@ -569,26 +585,31 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description) state = CONTENT_PARAGRAPH; break; - case CONTENT_EXCERPT: - if (end_excerpt != NULL) { - if (c == '.') - break; - if (c == '\n' || c == '\r') { - state = CONTENT_EXCERPT_END; - break; + case CONTENT_EXCERPT_OR_DIRECTIVE: + if (c == '.') + break; + if ((c == ' ' || c == '\t') && current - start == 2) { + state = CONTENT_DIRECTIVE_NAME_START; + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; } + break; + } + if (c == '\n' || c == '\r') { + state = CONTENT_EXCERPT_END; + break; } eend = 0; state = CONTENT_PARAGRAPH; break; case CONTENT_EXCERPT_END: - if (end_excerpt != NULL) { - if (c == '\n' || c == '\r') { + if (c == '\n' || c == '\r') { + if (end_excerpt != NULL) *end_excerpt = eend; - state = CONTENT_START_LINE; - break; - } + state = CONTENT_START_LINE; + break; } eend = 0; state = CONTENT_PARAGRAPH_END; @@ -1010,6 +1031,214 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description) } break; + case CONTENT_DIRECTIVE_NAME_START: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_NAME; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_NAME: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + end = current; + state = CONTENT_DIRECTIVE_COLON; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_COLON: + if (c == ':') { + free(directive_name); + directive_name = sb_strndup(src + start2, end - start2); + state = CONTENT_DIRECTIVE_ARGUMENT_START; + if (is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + continue; + } + break; + } + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_ARGUMENT_START: + if (c == ' ' || c == '\t') { + if (is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + continue; + } + break; + } + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX_START; + directive_argument = NULL; + if (is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + continue; + } + else + start2 = current + 1; + break; + } + start2 = current; + state = CONTENT_DIRECTIVE_ARGUMENT; + break; + + case CONTENT_DIRECTIVE_ARGUMENT: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_PREFIX_START; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + free(directive_argument); + directive_argument = sb_strndup(src + start2, end - start2); + if (is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + continue; + } + else + start2 = current + 1; + } + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX_START: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if (c == ' ' || c == '\t') + break; + if (c == '\n' || c == '\r') { + state = CONTENT_DIRECTIVE_PARAM_END; + continue; + } + prefix = sb_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + current--; + break; + + case CONTENT_DIRECTIVE_PARAM_PREFIX: + if (c == ' ' || c == '\t') + break; + if (c == ':' && sb_str_starts_with(src + start2, prefix)) { + state = CONTENT_DIRECTIVE_PARAM_KEY_START; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + continue; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY_START: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if (c >= 'a' && c <= 'z') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_KEY; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_KEY: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + free(directive_key); + directive_key = sb_strndup(src + start2, current - start2); + state = CONTENT_DIRECTIVE_PARAM_VALUE_START; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE_START: + if (is_last) { + state = CONTENT_PARAGRAPH; + continue; + } + if (c == ' ' || c == '\t') + break; + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_VALUE; + break; + + case CONTENT_DIRECTIVE_PARAM_VALUE: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_DIRECTIVE_PARAM_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + if (directive_params == NULL) + directive_params = sb_trie_new(free); + sb_trie_insert(directive_params, directive_key, + sb_strndup(src + start2, end - start2)); + free(directive_key); + directive_key = NULL; + if (!is_last) + start2 = current + 1; + } + if (!is_last) + break; + + case CONTENT_DIRECTIVE_PARAM_END: + if (c == '\n' || c == '\r' || is_last) { + // FIXME: handle errors in the rest of the parser. + blogc_error_t *err = NULL; + blogc_directive_ctx_t *ctx = sb_malloc( + sizeof(blogc_directive_ctx_t)); + ctx->name = directive_name; + ctx->argument = directive_argument; + ctx->params = directive_params; + ctx->eol = line_ending; + char *rv_d = blogc_directive_loader(ctx, &err); + free(ctx); + blogc_error_print(err); + if (rv_d != NULL) + sb_string_append(rv, rv_d); + free(rv_d); + state = CONTENT_START_LINE; + start = current; + free(directive_name); + directive_name = NULL; + free(directive_argument); + directive_argument = NULL; + sb_trie_free(directive_params); + directive_params = NULL; + free(prefix); + prefix = NULL; + break; + } + if (c == ' ' || c == '\t') { + start2 = current; + state = CONTENT_DIRECTIVE_PARAM_PREFIX; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + continue; + break; + case CONTENT_PARAGRAPH: if (c == '\n' || c == '\r' || is_last) { state = CONTENT_PARAGRAPH_END; @@ -1043,5 +1272,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description) current++; } + free(directive_name); + free(directive_argument); + free(directive_key); + sb_trie_free(directive_params); + free(prefix); + return sb_string_free(rv, false); } |