aboutsummaryrefslogtreecommitdiffstats
path: root/src/content-parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/content-parser.c')
-rw-r--r--src/content-parser.c271
1 files changed, 253 insertions, 18 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
index 0c99e31..0561d20 100644
--- a/src/content-parser.c
+++ b/src/content-parser.c
@@ -14,6 +14,7 @@
#include <string.h>
#include "content-parser.h"
+#include "directives.h"
#include "utils.h"
// this is a half ass implementation of a markdown-like syntax. bugs are
@@ -77,7 +78,7 @@ blogc_htmlentities(const char *str)
typedef enum {
CONTENT_START_LINE = 1,
- CONTENT_EXCERPT,
+ CONTENT_EXCERPT_OR_DIRECTIVE,
CONTENT_EXCERPT_END,
CONTENT_HEADER,
CONTENT_HEADER_TITLE_START,
@@ -98,6 +99,18 @@ typedef enum {
CONTENT_ORDERED_LIST_SPACE,
CONTENT_ORDERED_LIST_START,
CONTENT_ORDERED_LIST_END,
+ CONTENT_DIRECTIVE_NAME_START,
+ CONTENT_DIRECTIVE_NAME,
+ CONTENT_DIRECTIVE_COLON,
+ CONTENT_DIRECTIVE_ARGUMENT_START,
+ CONTENT_DIRECTIVE_ARGUMENT,
+ CONTENT_DIRECTIVE_PARAM_PREFIX_START,
+ CONTENT_DIRECTIVE_PARAM_PREFIX,
+ CONTENT_DIRECTIVE_PARAM_KEY_START,
+ CONTENT_DIRECTIVE_PARAM_KEY,
+ CONTENT_DIRECTIVE_PARAM_VALUE_START,
+ CONTENT_DIRECTIVE_PARAM_VALUE,
+ CONTENT_DIRECTIVE_PARAM_END,
CONTENT_PARAGRAPH,
CONTENT_PARAGRAPH_END,
} blogc_content_parser_state_t;
@@ -480,6 +493,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
char *parsed = NULL;
char *slug = NULL;
+ char *directive_name = NULL;
+ char *directive_argument = NULL;
+ char *directive_key = NULL;
+ sb_trie_t *directive_params = NULL;
+
// this isn't empty because we need some reasonable default value in the
// unlikely case that we need to print some line ending before evaluating
// the "real" value.
@@ -530,11 +548,9 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
break;
start = current;
if (c == '.') {
- if (end_excerpt != NULL) {
- eend = rv->len; // fuck it
- state = CONTENT_EXCERPT;
- break;
- }
+ eend = rv->len; // fuck it
+ state = CONTENT_EXCERPT_OR_DIRECTIVE;
+ break;
}
if (c == '#') {
header_level = 1;
@@ -569,26 +585,31 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
state = CONTENT_PARAGRAPH;
break;
- case CONTENT_EXCERPT:
- if (end_excerpt != NULL) {
- if (c == '.')
- break;
- if (c == '\n' || c == '\r') {
- state = CONTENT_EXCERPT_END;
- break;
+ case CONTENT_EXCERPT_OR_DIRECTIVE:
+ if (c == '.')
+ break;
+ if ((c == ' ' || c == '\t') && current - start == 2) {
+ state = CONTENT_DIRECTIVE_NAME_START;
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
}
+ break;
+ }
+ if (c == '\n' || c == '\r') {
+ state = CONTENT_EXCERPT_END;
+ break;
}
eend = 0;
state = CONTENT_PARAGRAPH;
break;
case CONTENT_EXCERPT_END:
- if (end_excerpt != NULL) {
- if (c == '\n' || c == '\r') {
+ if (c == '\n' || c == '\r') {
+ if (end_excerpt != NULL)
*end_excerpt = eend;
- state = CONTENT_START_LINE;
- break;
- }
+ state = CONTENT_START_LINE;
+ break;
}
eend = 0;
state = CONTENT_PARAGRAPH_END;
@@ -1010,6 +1031,214 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
}
break;
+ case CONTENT_DIRECTIVE_NAME_START:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if (c >= 'a' && c <= 'z') {
+ start2 = current;
+ state = CONTENT_DIRECTIVE_NAME;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_DIRECTIVE_NAME:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
+ break;
+ if (c == ':') {
+ end = current;
+ state = CONTENT_DIRECTIVE_COLON;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_DIRECTIVE_COLON:
+ if (c == ':') {
+ free(directive_name);
+ directive_name = sb_strndup(src + start2, end - start2);
+ state = CONTENT_DIRECTIVE_ARGUMENT_START;
+ if (is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ continue;
+ }
+ break;
+ }
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_DIRECTIVE_ARGUMENT_START:
+ if (c == ' ' || c == '\t') {
+ if (is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ continue;
+ }
+ break;
+ }
+ if (c == '\n' || c == '\r' || is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_PREFIX_START;
+ directive_argument = NULL;
+ if (is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ continue;
+ }
+ else
+ start2 = current + 1;
+ break;
+ }
+ start2 = current;
+ state = CONTENT_DIRECTIVE_ARGUMENT;
+ break;
+
+ case CONTENT_DIRECTIVE_ARGUMENT:
+ if (c == '\n' || c == '\r' || is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_PREFIX_START;
+ end = is_last && c != '\n' && c != '\r' ? src_len :
+ (real_end != 0 ? real_end : current);
+ free(directive_argument);
+ directive_argument = sb_strndup(src + start2, end - start2);
+ if (is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ continue;
+ }
+ else
+ start2 = current + 1;
+ }
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_PREFIX_START:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if (c == ' ' || c == '\t')
+ break;
+ if (c == '\n' || c == '\r') {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ continue;
+ }
+ prefix = sb_strndup(src + start2, current - start2);
+ state = CONTENT_DIRECTIVE_PARAM_PREFIX;
+ current--;
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_PREFIX:
+ if (c == ' ' || c == '\t')
+ break;
+ if (c == ':' && sb_str_starts_with(src + start2, prefix)) {
+ state = CONTENT_DIRECTIVE_PARAM_KEY_START;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ if (is_last)
+ continue;
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_KEY_START:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if (c >= 'a' && c <= 'z') {
+ start2 = current;
+ state = CONTENT_DIRECTIVE_PARAM_KEY;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_KEY:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
+ break;
+ if (c == ':') {
+ free(directive_key);
+ directive_key = sb_strndup(src + start2, current - start2);
+ state = CONTENT_DIRECTIVE_PARAM_VALUE_START;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_VALUE_START:
+ if (is_last) {
+ state = CONTENT_PARAGRAPH;
+ continue;
+ }
+ if (c == ' ' || c == '\t')
+ break;
+ start2 = current;
+ state = CONTENT_DIRECTIVE_PARAM_VALUE;
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_VALUE:
+ if (c == '\n' || c == '\r' || is_last) {
+ state = CONTENT_DIRECTIVE_PARAM_END;
+ end = is_last && c != '\n' && c != '\r' ? src_len :
+ (real_end != 0 ? real_end : current);
+ if (directive_params == NULL)
+ directive_params = sb_trie_new(free);
+ sb_trie_insert(directive_params, directive_key,
+ sb_strndup(src + start2, end - start2));
+ free(directive_key);
+ directive_key = NULL;
+ if (!is_last)
+ start2 = current + 1;
+ }
+ if (!is_last)
+ break;
+
+ case CONTENT_DIRECTIVE_PARAM_END:
+ if (c == '\n' || c == '\r' || is_last) {
+ // FIXME: handle errors in the rest of the parser.
+ blogc_error_t *err = NULL;
+ blogc_directive_ctx_t *ctx = sb_malloc(
+ sizeof(blogc_directive_ctx_t));
+ ctx->name = directive_name;
+ ctx->argument = directive_argument;
+ ctx->params = directive_params;
+ ctx->eol = line_ending;
+ char *rv_d = blogc_directive_loader(ctx, &err);
+ free(ctx);
+ blogc_error_print(err);
+ if (rv_d != NULL)
+ sb_string_append(rv, rv_d);
+ free(rv_d);
+ state = CONTENT_START_LINE;
+ start = current;
+ free(directive_name);
+ directive_name = NULL;
+ free(directive_argument);
+ directive_argument = NULL;
+ sb_trie_free(directive_params);
+ directive_params = NULL;
+ free(prefix);
+ prefix = NULL;
+ break;
+ }
+ if (c == ' ' || c == '\t') {
+ start2 = current;
+ state = CONTENT_DIRECTIVE_PARAM_PREFIX;
+ break;
+ }
+ state = CONTENT_PARAGRAPH;
+ if (is_last)
+ continue;
+ break;
+
case CONTENT_PARAGRAPH:
if (c == '\n' || c == '\r' || is_last) {
state = CONTENT_PARAGRAPH_END;
@@ -1043,5 +1272,11 @@ blogc_content_parse(const char *src, size_t *end_excerpt, char **description)
current++;
}
+ free(directive_name);
+ free(directive_argument);
+ free(directive_key);
+ sb_trie_free(directive_params);
+ free(prefix);
+
return sb_string_free(rv, false);
}