diff options
Diffstat (limited to 'src/blogc')
-rw-r--r-- | src/blogc/content-parser.c | 1264 | ||||
-rw-r--r-- | src/blogc/content-parser.h | 23 | ||||
-rw-r--r-- | src/blogc/datetime-parser.c | 386 | ||||
-rw-r--r-- | src/blogc/datetime-parser.h | 17 | ||||
-rw-r--r-- | src/blogc/debug.c | 80 | ||||
-rw-r--r-- | src/blogc/debug.h | 16 | ||||
-rw-r--r-- | src/blogc/error.c | 139 | ||||
-rw-r--r-- | src/blogc/error.h | 34 | ||||
-rw-r--r-- | src/blogc/file.c | 81 | ||||
-rw-r--r-- | src/blogc/file.h | 21 | ||||
-rw-r--r-- | src/blogc/loader.c | 212 | ||||
-rw-r--r-- | src/blogc/loader.h | 21 | ||||
-rw-r--r-- | src/blogc/main.c | 297 | ||||
-rw-r--r-- | src/blogc/renderer.c | 457 | ||||
-rw-r--r-- | src/blogc/renderer.h | 24 | ||||
-rw-r--r-- | src/blogc/source-parser.c | 218 | ||||
-rw-r--r-- | src/blogc/source-parser.h | 19 | ||||
-rw-r--r-- | src/blogc/template-parser.c | 679 | ||||
-rw-r--r-- | src/blogc/template-parser.h | 53 |
19 files changed, 4041 insertions, 0 deletions
diff --git a/src/blogc/content-parser.c b/src/blogc/content-parser.c new file mode 100644 index 0000000..e751548 --- /dev/null +++ b/src/blogc/content-parser.c @@ -0,0 +1,1264 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include "content-parser.h" +#include "../common/utils.h" + +// this is a half ass implementation of a markdown-like syntax. bugs are +// expected. feel free to improve the parser and add new features. + + +char* +blogc_slugify(const char *str) +{ + if (str == NULL) + return NULL; + char *new_str = sb_strdup(str); + int diff = 'a' - 'A'; // just to avoid magic numbers + for (size_t i = 0; new_str[i] != '\0'; i++) { + if (new_str[i] >= 'a' && new_str[i] <= 'z') + continue; + if (new_str[i] >= '0' && new_str[i] <= '9') + continue; + if (new_str[i] >= 'A' && new_str[i] <= 'Z') + new_str[i] += diff; + else + new_str[i] = '-'; + } + return new_str; +} + + +static const char* +htmlentities(char c) +{ + switch (c) { + case '&': + return "&"; + case '<': + return "<"; + case '>': + return ">"; + case '"': + return """; + case '\'': + return "'"; + case '/': + return "/"; + } + return NULL; +} + + +static void +htmlentities_append(sb_string_t *str, char c) +{ + const char *e = htmlentities(c); + if (e == NULL) + sb_string_append_c(str, c); + else + sb_string_append(str, e); +} + + +char* +blogc_htmlentities(const char *str) +{ + if (str == NULL) + return NULL; + sb_string_t *rv = sb_string_new(); + for (size_t i = 0; str[i] != '\0'; i++) + htmlentities_append(rv, str[i]); + return sb_string_free(rv, false); +} + + +char* +blogc_fix_description(const char *paragraph) +{ + if (paragraph == NULL) + return NULL; + sb_string_t *rv = sb_string_new(); + bool last = false; + bool newline = false; + char *tmp = NULL; + size_t start = 0; + size_t current = 0; + while (true) { + switch (paragraph[current]) { + case '\0': + last = true; + case '\r': + case '\n': + if (newline) + break; + tmp = sb_strndup(paragraph + start, current - start); + sb_string_append(rv, sb_str_strip(tmp)); + free(tmp); + tmp = NULL; + if (!last) + sb_string_append_c(rv, ' '); + start = current + 1; + newline = true; + break; + default: + newline = false; + } + if (last) + break; + current++; + } + tmp = blogc_htmlentities(sb_str_strip(rv->str)); + sb_string_free(rv, true); + return tmp; +} + + +typedef enum { + CONTENT_START_LINE = 1, + CONTENT_EXCERPT, + CONTENT_EXCERPT_END, + CONTENT_HEADER, + CONTENT_HEADER_TITLE_START, + CONTENT_HEADER_TITLE, + CONTENT_HTML, + CONTENT_HTML_END, + CONTENT_BLOCKQUOTE, + CONTENT_BLOCKQUOTE_START, + CONTENT_BLOCKQUOTE_END, + CONTENT_CODE, + CONTENT_CODE_START, + CONTENT_CODE_END, + CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE, + CONTENT_HORIZONTAL_RULE, + CONTENT_UNORDERED_LIST_START, + CONTENT_UNORDERED_LIST_END, + CONTENT_ORDERED_LIST, + CONTENT_ORDERED_LIST_SPACE, + CONTENT_ORDERED_LIST_START, + CONTENT_ORDERED_LIST_END, + CONTENT_PARAGRAPH, + CONTENT_PARAGRAPH_END, +} blogc_content_parser_state_t; + + +typedef enum { + CONTENT_INLINE_START = 1, + CONTENT_INLINE_ASTERISK, + CONTENT_INLINE_ASTERISK_DOUBLE, + CONTENT_INLINE_UNDERSCORE, + CONTENT_INLINE_UNDERSCORE_DOUBLE, + CONTENT_INLINE_BACKTICKS, + CONTENT_INLINE_BACKTICKS_DOUBLE, + CONTENT_INLINE_LINK_START, + CONTENT_INLINE_LINK_AUTO, + CONTENT_INLINE_LINK_CONTENT, + CONTENT_INLINE_LINK_URL_START, + CONTENT_INLINE_LINK_URL, + CONTENT_INLINE_IMAGE_START, + CONTENT_INLINE_IMAGE_ALT, + CONTENT_INLINE_IMAGE_URL_START, + CONTENT_INLINE_IMAGE_URL, + CONTENT_INLINE_ENDASH, + CONTENT_INLINE_EMDASH, + CONTENT_INLINE_LINE_BREAK_START, + CONTENT_INLINE_LINE_BREAK, +} blogc_content_parser_inline_state_t; + + +static char* +blogc_content_parse_inline_internal(const char *src, size_t src_len) +{ + size_t current = 0; + size_t start = 0; + size_t count = 0; + + const char *tmp = NULL; + char *tmp2 = NULL; + char *tmp3 = NULL; + + size_t start_link = 0; + char *link1 = NULL; + + sb_string_t *rv = sb_string_new(); + + blogc_content_parser_inline_state_t state = CONTENT_INLINE_START; + + while (current < src_len) { + char c = src[current]; + bool is_last = current == src_len - 1; + + switch (state) { + case CONTENT_INLINE_START: + if (is_last) { + htmlentities_append(rv, c); + break; + } + if (c == '\\') { + htmlentities_append(rv, src[++current]); + break; + } + if (c == '*') { + state = CONTENT_INLINE_ASTERISK; + break; + } + if (c == '_') { + state = CONTENT_INLINE_UNDERSCORE; + break; + } + if (c == '`') { + state = CONTENT_INLINE_BACKTICKS; + break; + } + if (c == '[') { + state = CONTENT_INLINE_LINK_START; + break; + } + if (c == '!') { + state = CONTENT_INLINE_IMAGE_START; + break; + } + if (c == '-') { + state = CONTENT_INLINE_ENDASH; + break; + } + if (c == ' ') { + state = CONTENT_INLINE_LINE_BREAK_START; + break; + } + htmlentities_append(rv, c); + break; + + case CONTENT_INLINE_ASTERISK: + if (c == '*') { + state = CONTENT_INLINE_ASTERISK_DOUBLE; + break; + } + tmp = sb_str_find(src + current, '*'); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '*'); + state = CONTENT_INLINE_START; + continue; + } + tmp2 = blogc_content_parse_inline_internal( + src + current, (tmp - src) - current); + sb_string_append_printf(rv, "<em>%s</em>", tmp2); + current = tmp - src; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_ASTERISK_DOUBLE: + tmp = src + current; + do { + tmp = sb_str_find(tmp, '*'); + if (((tmp - src) < src_len) && *(tmp + 1) == '*') { + break; + } + tmp++; + } while (tmp != NULL && (tmp - src) < src_len); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '*'); + sb_string_append_c(rv, '*'); + state = CONTENT_INLINE_START; + continue; + } + tmp2 = blogc_content_parse_inline_internal( + src + current, (tmp - src) - current); + sb_string_append_printf(rv, "<strong>%s</strong>", tmp2); + current = tmp - src + 1; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_UNDERSCORE: + if (c == '_') { + state = CONTENT_INLINE_UNDERSCORE_DOUBLE; + break; + } + tmp = sb_str_find(src + current, '_'); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '_'); + state = CONTENT_INLINE_START; + continue; + } + tmp2 = blogc_content_parse_inline_internal( + src + current, (tmp - src) - current); + sb_string_append_printf(rv, "<em>%s</em>", tmp2); + current = tmp - src; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_UNDERSCORE_DOUBLE: + tmp = src + current; + do { + tmp = sb_str_find(tmp, '_'); + if (((tmp - src) < src_len) && *(tmp + 1) == '_') { + break; + } + tmp++; + } while (tmp != NULL && (tmp - src) < src_len); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '_'); + sb_string_append_c(rv, '_'); + state = CONTENT_INLINE_START; + continue; + } + tmp2 = blogc_content_parse_inline_internal( + src + current, (tmp - src) - current); + sb_string_append_printf(rv, "<strong>%s</strong>", tmp2); + current = tmp - src + 1; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_BACKTICKS: + if (c == '`') { + state = CONTENT_INLINE_BACKTICKS_DOUBLE; + break; + } + tmp = sb_str_find(src + current, '`'); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '`'); + state = CONTENT_INLINE_START; + continue; + } + tmp3 = sb_strndup(src + current, (tmp - src) - current); + tmp2 = blogc_htmlentities(tmp3); + free(tmp3); + tmp3 = NULL; + sb_string_append(rv, "<code>"); + sb_string_append_escaped(rv, tmp2); + sb_string_append(rv, "</code>"); + current = tmp - src; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_BACKTICKS_DOUBLE: + tmp = src + current; + do { + tmp = sb_str_find(tmp, '`'); + if (((tmp - src) < src_len) && *(tmp + 1) == '`') { + break; + } + tmp++; + } while (tmp != NULL && (tmp - src) < src_len); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '`'); + sb_string_append_c(rv, '`'); + state = CONTENT_INLINE_START; + continue; + } + tmp3 = sb_strndup(src + current, (tmp - src) - current); + tmp2 = blogc_htmlentities(tmp3); + free(tmp3); + tmp3 = NULL; + sb_string_append(rv, "<code>"); + sb_string_append_escaped(rv, tmp2); + sb_string_append(rv, "</code>"); + current = tmp - src + 1; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_LINK_START: + if (c == '[') { + state = CONTENT_INLINE_LINK_AUTO; + break; + } + start_link = current; + count = 1; + state = CONTENT_INLINE_LINK_CONTENT; + break; + + case CONTENT_INLINE_LINK_AUTO: + tmp = src + current; + do { + tmp = sb_str_find(tmp, ']'); + if (((tmp - src) < src_len) && *(tmp + 1) == ']') { + break; + } + tmp++; + } while (tmp != NULL && (tmp - src) < src_len); + if (tmp == NULL || ((tmp - src) >= src_len)) { + sb_string_append_c(rv, '['); + sb_string_append_c(rv, '['); + state = CONTENT_INLINE_START; + continue; + } + tmp2 = sb_strndup(src + current, (tmp - src) - current); + sb_string_append(rv, "<a href=\""); + sb_string_append_escaped(rv, tmp2); + sb_string_append(rv, "\">"); + sb_string_append_escaped(rv, tmp2); + sb_string_append(rv, "</a>"); + current = tmp - src + 1; + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_INLINE_START; + break; + + case CONTENT_INLINE_LINK_CONTENT: + if (c == '\\') { + current++; + break; + } + if (c == '[') { // links can be nested :/ + count++; + break; + } + if (c == ']') { + if (--count == 0) { + link1 = sb_strndup(src + start_link, current - start_link); + state = CONTENT_INLINE_LINK_URL_START; + } + } + break; + + case CONTENT_INLINE_LINK_URL_START: + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + break; + if (c == '(') { + state = CONTENT_INLINE_LINK_URL; + start = current + 1; + break; + } + sb_string_append_c(rv, '['); + state = CONTENT_INLINE_START; + current = start_link; + start_link = 0; + continue; + + case CONTENT_INLINE_LINK_URL: + if (c == '\\') { + current++; + break; + } + if (c == ')') { + tmp2 = sb_strndup(src + start, current - start); + tmp3 = blogc_content_parse_inline(link1); + free(link1); + link1 = NULL; + sb_string_append(rv, "<a href=\""); + sb_string_append_escaped(rv, tmp2); + sb_string_append_printf(rv, "\">%s</a>", tmp3); + free(tmp2); + tmp2 = NULL; + free(tmp3); + tmp3 = NULL; + state = CONTENT_INLINE_START; + break; + } + break; + + case CONTENT_INLINE_IMAGE_START: + // we use the same variables used for links, because why not? + if (c == '[') { + state = CONTENT_INLINE_IMAGE_ALT; + start_link = current + 1; + break; + } + sb_string_append_c(rv, '!'); + state = CONTENT_INLINE_START; + continue; + + case CONTENT_INLINE_IMAGE_ALT: + if (c == '\\') { + current++; + break; + } + if (c == ']') { + link1 = sb_strndup(src + start_link, current - start_link); + state = CONTENT_INLINE_IMAGE_URL_START; + } + break; + + case CONTENT_INLINE_IMAGE_URL_START: + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + break; + if (c == '(') { + state = CONTENT_INLINE_IMAGE_URL; + start = current + 1; + break; + } + sb_string_append_c(rv, '!'); + sb_string_append_c(rv, '['); + state = CONTENT_INLINE_START; + current = start_link; + start_link = 0; + continue; + + case CONTENT_INLINE_IMAGE_URL: + if (c == '\\') { + current++; + break; + } + if (c == ')') { + tmp2 = sb_strndup(src + start, current - start); + sb_string_append(rv, "<img src=\""); + sb_string_append_escaped(rv, tmp2); + sb_string_append(rv, "\" alt=\""); + sb_string_append_escaped(rv, link1); + sb_string_append(rv, "\">"); + free(tmp2); + tmp2 = NULL; + free(link1); + link1 = NULL; + state = CONTENT_INLINE_START; + break; + } + break; + + case CONTENT_INLINE_ENDASH: + if (c == '-') { + if (is_last) { + sb_string_append(rv, "–"); + state = CONTENT_INLINE_START; // wat + break; + } + state = CONTENT_INLINE_EMDASH; + break; + } + sb_string_append_c(rv, '-'); + state = CONTENT_INLINE_START; + continue; + + case CONTENT_INLINE_EMDASH: + if (c == '-') { + sb_string_append(rv, "—"); + state = CONTENT_INLINE_START; + break; + } + sb_string_append(rv, "–"); + state = CONTENT_INLINE_START; + continue; + + case CONTENT_INLINE_LINE_BREAK_START: + if (c == ' ') { + if (is_last) { + sb_string_append(rv, "<br />"); + state = CONTENT_INLINE_START; // wat + break; + } + count = 2; + state = CONTENT_INLINE_LINE_BREAK; + break; + } + sb_string_append_c(rv, ' '); + state = CONTENT_INLINE_START; + continue; + + case CONTENT_INLINE_LINE_BREAK: + if (c == ' ') { + if (is_last) { + sb_string_append(rv, "<br />"); + state = CONTENT_INLINE_START; // wat + break; + } + count++; + break; + } + if (c == '\n' || c == '\r') { + sb_string_append_printf(rv, "<br />%c", c); + state = CONTENT_INLINE_START; + break; + } + for (size_t i = 0; i < count; i++) + sb_string_append_c(rv, ' '); + state = CONTENT_INLINE_START; + continue; + } + current++; + } + + switch (state) { + + // if after the end of the loop we are on any of the following states, + // we must call the parser again, from start_link + case CONTENT_INLINE_IMAGE_START: + case CONTENT_INLINE_IMAGE_ALT: + case CONTENT_INLINE_IMAGE_URL_START: + case CONTENT_INLINE_IMAGE_URL: + sb_string_append_c(rv, '!'); + + case CONTENT_INLINE_LINK_CONTENT: + case CONTENT_INLINE_LINK_URL_START: + case CONTENT_INLINE_LINK_URL: + tmp2 = blogc_content_parse_inline(src + start_link); + sb_string_append_c(rv, '['); + sb_string_append_escaped(rv, tmp2); // no need to free, as it wil be done below. + break; + + // add all the other states here explicitly, so the compiler helps us + // not missing any new state that should be handled. + case CONTENT_INLINE_START: + case CONTENT_INLINE_ASTERISK: + case CONTENT_INLINE_ASTERISK_DOUBLE: + case CONTENT_INLINE_UNDERSCORE: + case CONTENT_INLINE_UNDERSCORE_DOUBLE: + case CONTENT_INLINE_BACKTICKS: + case CONTENT_INLINE_BACKTICKS_DOUBLE: + case CONTENT_INLINE_LINK_START: + case CONTENT_INLINE_LINK_AUTO: + case CONTENT_INLINE_ENDASH: + case CONTENT_INLINE_EMDASH: + case CONTENT_INLINE_LINE_BREAK_START: + case CONTENT_INLINE_LINE_BREAK: + break; + } + + free(tmp2); + free(tmp3); + free(link1); + + return sb_string_free(rv, false); +} + + +char* +blogc_content_parse_inline(const char *src) +{ + return blogc_content_parse_inline_internal(src, strlen(src)); +} + + +bool +blogc_is_ordered_list_item(const char *str, size_t prefix_len) +{ + if (str == NULL) + return false; + + if (strlen(str) < 2) + return false; + + size_t i; + + for (i = 0; str[i] >= '0' && str[i] <= '9'; i++); + + if (i == 0) + return false; + if (str[i] != '.') + return false; + + for (i++; i < prefix_len && (str[i] == ' ' || str[i] == '\t'); i++); + + if (str[i] == '\0') + return false; + + return i == prefix_len; +} + + +char* +blogc_content_parse(const char *src, size_t *end_excerpt, char **description) +{ + // src is always nul-terminated. + size_t src_len = strlen(src); + + size_t current = 0; + size_t start = 0; + size_t start2 = 0; + size_t end = 0; + size_t eend = 0; + size_t real_end = 0; + + unsigned int header_level = 0; + char *prefix = NULL; + size_t prefix_len = 0; + char *tmp = NULL; + char *tmp2 = NULL; + char *parsed = NULL; + char *slug = NULL; + + // this isn't empty because we need some reasonable default value in the + // unlikely case that we need to print some line ending before evaluating + // the "real" value. + char line_ending[3] = "\n"; + bool line_ending_found = false; + + char d = '\0'; + + sb_slist_t *lines = NULL; + sb_slist_t *lines2 = NULL; + + sb_string_t *rv = sb_string_new(); + sb_string_t *tmp_str = NULL; + + blogc_content_parser_state_t state = CONTENT_START_LINE; + + while (current < src_len) { + char c = src[current]; + bool is_last = current == src_len - 1; + + if (c == '\n' || c == '\r') { + if ((current + 1) < src_len) { + if ((c == '\n' && src[current + 1] == '\r') || + (c == '\r' && src[current + 1] == '\n')) + { + if (!line_ending_found) { + line_ending[0] = c; + line_ending[1] = src[current + 1]; + line_ending[2] = '\0'; + line_ending_found = true; + } + real_end = current; + c = src[++current]; + is_last = current == src_len - 1; + } + } + if (!line_ending_found) { + line_ending[0] = c; + line_ending[1] = '\0'; + line_ending_found = true; + } + } + + switch (state) { + + case CONTENT_START_LINE: + if (c == '\n' || c == '\r' || is_last) + break; + start = current; + if (c == '.') { + if (end_excerpt != NULL) { + eend = rv->len; // fuck it + state = CONTENT_EXCERPT; + break; + } + } + if (c == '#') { + header_level = 1; + state = CONTENT_HEADER; + break; + } + if (c == '*' || c == '+' || c == '-') { + start2 = current; + state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE; + d = c; + break; + } + if (c >= '0' && c <= '9') { + start2 = current; + state = CONTENT_ORDERED_LIST; + break; + } + if (c == ' ' || c == '\t') { + start2 = current; + state = CONTENT_CODE; + break; + } + if (c == '<') { + state = CONTENT_HTML; + break; + } + if (c == '>') { + state = CONTENT_BLOCKQUOTE; + start2 = current; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_EXCERPT: + if (end_excerpt != NULL) { + if (c == '.') + break; + if (c == '\n' || c == '\r') { + state = CONTENT_EXCERPT_END; + break; + } + } + eend = 0; + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_EXCERPT_END: + if (end_excerpt != NULL) { + if (c == '\n' || c == '\r') { + *end_excerpt = eend; + state = CONTENT_START_LINE; + break; + } + } + eend = 0; + state = CONTENT_PARAGRAPH_END; + break; + + case CONTENT_HEADER: + if (c == '#') { + header_level += 1; + break; + } + if (c == ' ' || c == '\t') { + state = CONTENT_HEADER_TITLE_START; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_HEADER_TITLE_START: + if (c == ' ' || c == '\t') + break; + start = current; + if (c != '\n' && c != '\r') { + state = CONTENT_HEADER_TITLE; + break; + } + + case CONTENT_HEADER_TITLE: + if (c == '\n' || c == '\r' || is_last) { + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + tmp = sb_strndup(src + start, end - start); + parsed = blogc_content_parse_inline(tmp); + slug = blogc_slugify(tmp); + if (slug == NULL) + sb_string_append_printf(rv, "<h%d>%s</h%d>%s", + header_level, parsed, header_level, line_ending); + else + sb_string_append_printf(rv, "<h%d id=\"%s\">%s</h%d>%s", + header_level, slug, parsed, header_level, + line_ending); + free(slug); + free(parsed); + parsed = NULL; + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + } + break; + + case CONTENT_HTML: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_HTML_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + } + if (!is_last) + break; + + case CONTENT_HTML_END: + if (c == '\n' || c == '\r' || is_last) { + tmp = sb_strndup(src + start, end - start); + sb_string_append_printf(rv, "%s%s", tmp, line_ending); + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + } + else + state = CONTENT_HTML; + break; + + case CONTENT_BLOCKQUOTE: + if (c == ' ' || c == '\t') + break; + prefix = sb_strndup(src + start, current - start); + state = CONTENT_BLOCKQUOTE_START; + break; + + case CONTENT_BLOCKQUOTE_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + tmp = sb_strndup(src + start2, end - start2); + if (sb_str_starts_with(tmp, prefix)) { + lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix))); + state = CONTENT_BLOCKQUOTE_END; + } + else { + state = CONTENT_PARAGRAPH; + free(prefix); + prefix = NULL; + sb_slist_free_full(lines, free); + lines = NULL; + if (is_last) { + free(tmp); + tmp = NULL; + continue; + } + } + free(tmp); + tmp = NULL; + } + if (!is_last) + break; + + case CONTENT_BLOCKQUOTE_END: + if (c == '\n' || c == '\r' || is_last) { + tmp_str = sb_string_new(); + for (sb_slist_t *l = lines; l != NULL; l = l->next) + sb_string_append_printf(tmp_str, "%s%s", l->data, + line_ending); + // do not propagate description to blockquote parsing, + // because we just want paragraphs from first level of + // content. + tmp = blogc_content_parse(tmp_str->str, NULL, NULL); + sb_string_append_printf(rv, "<blockquote>%s</blockquote>%s", + tmp, line_ending); + free(tmp); + tmp = NULL; + sb_string_free(tmp_str, true); + tmp_str = NULL; + sb_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start2 = current; + } + else { + start2 = current; + state = CONTENT_BLOCKQUOTE_START; + } + break; + + case CONTENT_CODE: + if (c == ' ' || c == '\t') + break; + prefix = sb_strndup(src + start, current - start); + state = CONTENT_CODE_START; + break; + + case CONTENT_CODE_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + tmp = sb_strndup(src + start2, end - start2); + if (sb_str_starts_with(tmp, prefix)) { + lines = sb_slist_append(lines, sb_strdup(tmp + strlen(prefix))); + state = CONTENT_CODE_END; + } + else { + state = CONTENT_PARAGRAPH; + free(prefix); + prefix = NULL; + sb_slist_free_full(lines, free); + lines = NULL; + free(tmp); + tmp = NULL; + if (is_last) + continue; + break; + } + free(tmp); + tmp = NULL; + } + if (!is_last) + break; + + case CONTENT_CODE_END: + if (c == '\n' || c == '\r' || is_last) { + sb_string_append(rv, "<pre><code>"); + for (sb_slist_t *l = lines; l != NULL; l = l->next) { + char *tmp_line = blogc_htmlentities(l->data); + if (l->next == NULL) + sb_string_append_printf(rv, "%s", tmp_line); + else + sb_string_append_printf(rv, "%s%s", tmp_line, + line_ending); + free(tmp_line); + } + sb_string_append_printf(rv, "</code></pre>%s", line_ending); + sb_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start2 = current; + } + else { + start2 = current; + state = CONTENT_CODE_START; + } + break; + + case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE: + if (c == d) { + state = CONTENT_HORIZONTAL_RULE; + if (is_last) + continue; + break; + } + if (c == ' ' || c == '\t') + break; + prefix = sb_strndup(src + start, current - start); + state = CONTENT_UNORDERED_LIST_START; + break; + + case CONTENT_HORIZONTAL_RULE: + if (c == d && !is_last) { + break; + } + if (c == '\n' || c == '\r' || is_last) { + sb_string_append_printf(rv, "<hr />%s", line_ending); + state = CONTENT_START_LINE; + start = current; + d = '\0'; + break; + } + state = CONTENT_PARAGRAPH; + break; + + case CONTENT_UNORDERED_LIST_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + tmp = sb_strndup(src + start2, end - start2); + tmp2 = sb_strdup_printf("%-*s", strlen(prefix), ""); + if (sb_str_starts_with(tmp, prefix)) { + if (lines2 != NULL) { + tmp_str = sb_string_new(); + for (sb_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + sb_string_append_printf(tmp_str, "%s", l->data); + else + sb_string_append_printf(tmp_str, "%s%s", l->data, + line_ending); + } + sb_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + sb_string_free(tmp_str, true); + lines = sb_slist_append(lines, sb_strdup(parsed)); + free(parsed); + parsed = NULL; + } + lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix))); + } + else if (sb_str_starts_with(tmp, tmp2)) { + lines2 = sb_slist_append(lines2, sb_strdup(tmp + strlen(prefix))); + } + else { + state = CONTENT_PARAGRAPH_END; + free(tmp); + tmp = NULL; + free(tmp2); + tmp2 = NULL; + free(prefix); + prefix = NULL; + sb_slist_free_full(lines, free); + sb_slist_free_full(lines2, free); + lines = NULL; + if (is_last) + continue; + break; + } + free(tmp); + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_UNORDERED_LIST_END; + } + if (!is_last) + break; + + case CONTENT_UNORDERED_LIST_END: + if (c == '\n' || c == '\r' || is_last) { + if (lines2 != NULL) { + // FIXME: avoid repeting the code below + tmp_str = sb_string_new(); + for (sb_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + sb_string_append_printf(tmp_str, "%s", l->data); + else + sb_string_append_printf(tmp_str, "%s%s", l->data, + line_ending); + } + sb_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + sb_string_free(tmp_str, true); + lines = sb_slist_append(lines, sb_strdup(parsed)); + free(parsed); + parsed = NULL; + } + sb_string_append_printf(rv, "<ul>%s", line_ending); + for (sb_slist_t *l = lines; l != NULL; l = l->next) + sb_string_append_printf(rv, "<li>%s</li>%s", l->data, + line_ending); + sb_string_append_printf(rv, "</ul>%s", line_ending); + sb_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start2 = current; + } + else { + start2 = current; + state = CONTENT_UNORDERED_LIST_START; + } + break; + + case CONTENT_ORDERED_LIST: + if (c >= '0' && c <= '9') + break; + if (c == '.') { + state = CONTENT_ORDERED_LIST_SPACE; + break; + } + state = CONTENT_PARAGRAPH; + if (is_last) + continue; + break; + + case CONTENT_ORDERED_LIST_SPACE: + if (c == ' ' || c == '\t') + break; + prefix_len = current - start; + state = CONTENT_ORDERED_LIST_START; + if (c != '\n' && c != '\r' && !is_last) + break; + + case CONTENT_ORDERED_LIST_START: + if (c == '\n' || c == '\r' || is_last) { + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + tmp = sb_strndup(src + start2, end - start2); + tmp2 = sb_strdup_printf("%-*s", prefix_len, ""); + if (blogc_is_ordered_list_item(tmp, prefix_len)) { + if (lines2 != NULL) { + tmp_str = sb_string_new(); + for (sb_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + sb_string_append_printf(tmp_str, "%s", l->data); + else + sb_string_append_printf(tmp_str, "%s%s", l->data, + line_ending); + } + sb_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + sb_string_free(tmp_str, true); + lines = sb_slist_append(lines, sb_strdup(parsed)); + free(parsed); + parsed = NULL; + } + lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len)); + } + else if (sb_str_starts_with(tmp, tmp2)) { + lines2 = sb_slist_append(lines2, sb_strdup(tmp + prefix_len)); + } + else { + state = CONTENT_PARAGRAPH_END; + free(tmp); + tmp = NULL; + free(tmp2); + tmp2 = NULL; + free(parsed); + parsed = NULL; + sb_slist_free_full(lines, free); + sb_slist_free_full(lines2, free); + lines = NULL; + if (is_last) + continue; + break; + } + free(tmp); + tmp = NULL; + free(tmp2); + tmp2 = NULL; + state = CONTENT_ORDERED_LIST_END; + } + if (!is_last) + break; + + case CONTENT_ORDERED_LIST_END: + if (c == '\n' || c == '\r' || is_last) { + if (lines2 != NULL) { + // FIXME: avoid repeting the code below + tmp_str = sb_string_new(); + for (sb_slist_t *l = lines2; l != NULL; l = l->next) { + if (l->next == NULL) + sb_string_append_printf(tmp_str, "%s", l->data); + else + sb_string_append_printf(tmp_str, "%s%s", l->data, + line_ending); + } + sb_slist_free_full(lines2, free); + lines2 = NULL; + parsed = blogc_content_parse_inline(tmp_str->str); + sb_string_free(tmp_str, true); + lines = sb_slist_append(lines, sb_strdup(parsed)); + free(parsed); + parsed = NULL; + } + sb_string_append_printf(rv, "<ol>%s", line_ending); + for (sb_slist_t *l = lines; l != NULL; l = l->next) + sb_string_append_printf(rv, "<li>%s</li>%s", l->data, + line_ending); + sb_string_append_printf(rv, "</ol>%s", line_ending); + sb_slist_free_full(lines, free); + lines = NULL; + free(prefix); + prefix = NULL; + state = CONTENT_START_LINE; + start2 = current; + } + else { + start2 = current; + state = CONTENT_ORDERED_LIST_START; + } + break; + + case CONTENT_PARAGRAPH: + if (c == '\n' || c == '\r' || is_last) { + state = CONTENT_PARAGRAPH_END; + end = is_last && c != '\n' && c != '\r' ? src_len : + (real_end != 0 ? real_end : current); + } + if (!is_last) + break; + + case CONTENT_PARAGRAPH_END: + if (c == '\n' || c == '\r' || is_last) { + tmp = sb_strndup(src + start, end - start); + if (description != NULL && *description == NULL) + *description = blogc_fix_description(tmp); + parsed = blogc_content_parse_inline(tmp); + sb_string_append_printf(rv, "<p>%s</p>%s", parsed, + line_ending); + free(parsed); + parsed = NULL; + free(tmp); + tmp = NULL; + state = CONTENT_START_LINE; + start = current; + } + else + state = CONTENT_PARAGRAPH; + break; + + } + + current++; + } + + return sb_string_free(rv, false); +} diff --git a/src/blogc/content-parser.h b/src/blogc/content-parser.h new file mode 100644 index 0000000..37e38d7 --- /dev/null +++ b/src/blogc/content-parser.h @@ -0,0 +1,23 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _CONTENT_PARSER_H +#define _CONTENT_PARSER_H + +#include <stddef.h> +#include <stdbool.h> + +char* blogc_slugify(const char *str); +char* blogc_htmlentities(const char *str); +char* blogc_fix_description(const char *paragraph); +char* blogc_content_parse_inline(const char *src); +bool blogc_is_ordered_list_item(const char *str, size_t prefix_len); +char* blogc_content_parse(const char *src, size_t *end_excerpt, + char **description); + +#endif /* _CONTENT_PARSER_H */ diff --git a/src/blogc/datetime-parser.c b/src/blogc/datetime-parser.c new file mode 100644 index 0000000..28efb74 --- /dev/null +++ b/src/blogc/datetime-parser.c @@ -0,0 +1,386 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_TIME_H +#include <time.h> +#endif /* HAVE_TIME_H */ + +#include <string.h> + +#include "error.h" +#include "datetime-parser.h" +#include "../common/utils.h" + + +typedef enum { + DATETIME_FIRST_YEAR = 1, + DATETIME_SECOND_YEAR, + DATETIME_THIRD_YEAR, + DATETIME_FOURTH_YEAR, + DATETIME_FIRST_HYPHEN, + DATETIME_FIRST_MONTH, + DATETIME_SECOND_MONTH, + DATETIME_SECOND_HYPHEN, + DATETIME_FIRST_DAY, + DATETIME_SECOND_DAY, + DATETIME_SPACE, + DATETIME_FIRST_HOUR, + DATETIME_SECOND_HOUR, + DATETIME_FIRST_COLON, + DATETIME_FIRST_MINUTE, + DATETIME_SECOND_MINUTE, + DATETIME_SECOND_COLON, + DATETIME_FIRST_SECOND, + DATETIME_SECOND_SECOND, + DATETIME_DONE, +} blogc_datetime_state_t; + + +char* +blogc_convert_datetime(const char *orig, const char *format, + blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + +#ifndef HAVE_TIME_H + + *err = blogc_error_new(BLOGC_WARNING_DATETIME_PARSER, + "Your operating system does not supports the datetime functionalities " + "used by blogc. Sorry."); + return NULL; + +#else + + struct tm t; + memset(&t, 0, sizeof(struct tm)); + t.tm_isdst = -1; + + blogc_datetime_state_t state = DATETIME_FIRST_YEAR; + int tmp = 0; + int diff = '0'; + + for (unsigned int i = 0; orig[i] != '\0'; i++) { + char c = orig[i]; + + switch (state) { + + case DATETIME_FIRST_YEAR: + if (c >= '0' && c <= '9') { + tmp += (c - diff) * 1000; + state = DATETIME_SECOND_YEAR; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of year. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_SECOND_YEAR: + if (c >= '0' && c <= '9') { + tmp += (c - diff) * 100; + state = DATETIME_THIRD_YEAR; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of year. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_THIRD_YEAR: + if (c >= '0' && c <= '9') { + tmp += (c - diff) * 10; + state = DATETIME_FOURTH_YEAR; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid third digit of year. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_FOURTH_YEAR: + if (c >= '0' && c <= '9') { + tmp += c - diff - 1900; + if (tmp < 0) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid year. Found %d, must be >= 1900.", + tmp + 1900); + break; + } + t.tm_year = tmp; + state = DATETIME_FIRST_HYPHEN; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid fourth digit of year. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_FIRST_HYPHEN: + if (c == '-') { + tmp = 0; + state = DATETIME_FIRST_MONTH; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid separator between year and month. " + "Found '%c', must be '-'.", c); + break; + + case DATETIME_FIRST_MONTH: + if (c >= '0' && c <= '1') { + tmp += (c - diff) * 10; + state = DATETIME_SECOND_MONTH; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of month. " + "Found '%c', must be integer >= 0 and <= 1.", c); + break; + + case DATETIME_SECOND_MONTH: + if (c >= '0' && c <= '9') { + tmp += c - diff - 1; + if (tmp < 0 || tmp > 11) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid month. Found %d, must be >= 1 and <= 12.", + tmp + 1); + break; + } + t.tm_mon = tmp; + state = DATETIME_SECOND_HYPHEN; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of month. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_SECOND_HYPHEN: + if (c == '-') { + tmp = 0; + state = DATETIME_FIRST_DAY; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid separator between month and day. " + "Found '%c', must be '-'.", c); + break; + + case DATETIME_FIRST_DAY: + if (c >= '0' && c <= '3') { + tmp += (c - diff) * 10; + state = DATETIME_SECOND_DAY; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of day. " + "Found '%c', must be integer >= 0 and <= 3.", c); + break; + + case DATETIME_SECOND_DAY: + if (c >= '0' && c <= '9') { + tmp += c - diff; + if (tmp < 1 || tmp > 31) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid day. Found %d, must be >= 1 and <= 31.", + tmp); + break; + } + t.tm_mday = tmp; + state = DATETIME_SPACE; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of day. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_SPACE: + if (c == ' ') { + tmp = 0; + state = DATETIME_FIRST_HOUR; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid separator between date and time. " + "Found '%c', must be ' ' (empty space).", c); + break; + + case DATETIME_FIRST_HOUR: + if (c >= '0' && c <= '2') { + tmp += (c - diff) * 10; + state = DATETIME_SECOND_HOUR; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of hours. " + "Found '%c', must be integer >= 0 and <= 2.", c); + break; + + case DATETIME_SECOND_HOUR: + if (c >= '0' && c <= '9') { + tmp += c - diff; + if (tmp < 0 || tmp > 23) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid hours. Found %d, must be >= 0 and <= 23.", + tmp); + break; + } + t.tm_hour = tmp; + state = DATETIME_FIRST_COLON; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of hours. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_FIRST_COLON: + if (c == ':') { + tmp = 0; + state = DATETIME_FIRST_MINUTE; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid separator between hours and minutes. " + "Found '%c', must be ':'.", c); + break; + + case DATETIME_FIRST_MINUTE: + if (c >= '0' && c <= '5') { + tmp += (c - diff) * 10; + state = DATETIME_SECOND_MINUTE; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of minutes. " + "Found '%c', must be integer >= 0 and <= 5.", c); + break; + + case DATETIME_SECOND_MINUTE: + if (c >= '0' && c <= '9') { + tmp += c - diff; + if (tmp < 0 || tmp > 59) { + // this won't happen because we are restricting the digits + // to 00-59 already, but lets keep the code here for + // reference. + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid minutes. Found %d, must be >= 0 and <= 59.", + tmp); + break; + } + t.tm_min = tmp; + state = DATETIME_SECOND_COLON; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of minutes. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_SECOND_COLON: + if (c == ':') { + tmp = 0; + state = DATETIME_FIRST_SECOND; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid separator between minutes and seconds. " + "Found '%c', must be ':'.", c); + break; + + case DATETIME_FIRST_SECOND: + if (c >= '0' && c <= '6') { + tmp += (c - diff) * 10; + state = DATETIME_SECOND_SECOND; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid first digit of seconds. " + "Found '%c', must be integer >= 0 and <= 6.", c); + break; + + case DATETIME_SECOND_SECOND: + if (c >= '0' && c <= '9') { + tmp += c - diff; + if (tmp < 0 || tmp > 60) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid seconds. Found %d, must be >= 0 and <= 60.", + tmp); + break; + } + t.tm_sec = tmp; + state = DATETIME_DONE; + break; + } + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid second digit of seconds. " + "Found '%c', must be integer >= 0 and <= 9.", c); + break; + + case DATETIME_DONE: + // well, its done ;) + break; + } + + if (*err != NULL) + return NULL; + } + + if (*err == NULL) { + switch (state) { + case DATETIME_FIRST_YEAR: + case DATETIME_SECOND_YEAR: + case DATETIME_THIRD_YEAR: + case DATETIME_FOURTH_YEAR: + case DATETIME_FIRST_HYPHEN: + case DATETIME_FIRST_MONTH: + case DATETIME_SECOND_MONTH: + case DATETIME_SECOND_HYPHEN: + case DATETIME_FIRST_DAY: + case DATETIME_SECOND_DAY: + case DATETIME_FIRST_HOUR: + case DATETIME_SECOND_HOUR: + case DATETIME_FIRST_MINUTE: + case DATETIME_SECOND_MINUTE: + case DATETIME_FIRST_SECOND: + case DATETIME_SECOND_SECOND: + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Invalid datetime string. " + "Found '%s', formats allowed are: 'yyyy-mm-dd hh:mm:ss', " + "'yyyy-mm-dd hh:ss', 'yyyy-mm-dd hh' and 'yyyy-mm-dd'.", + orig); + return NULL; + + case DATETIME_SPACE: + case DATETIME_FIRST_COLON: + case DATETIME_SECOND_COLON: + case DATETIME_DONE: + break; // these states are ok + } + } + + mktime(&t); + + char buf[1024]; + if (0 == strftime(buf, sizeof(buf), format, &t)) { + *err = blogc_error_new_printf(BLOGC_WARNING_DATETIME_PARSER, + "Failed to format DATE variable, FORMAT is too long: %s", + format); + return NULL; + } + + return sb_strdup(buf); + +#endif +} diff --git a/src/blogc/datetime-parser.h b/src/blogc/datetime-parser.h new file mode 100644 index 0000000..a5087b3 --- /dev/null +++ b/src/blogc/datetime-parser.h @@ -0,0 +1,17 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _DATETIME_H +#define _DATETIME_H + +#include "error.h" + +char* blogc_convert_datetime(const char *orig, const char *format, + blogc_error_t **err); + +#endif /* _DATETIME_H */ diff --git a/src/blogc/debug.c b/src/blogc/debug.c new file mode 100644 index 0000000..2840f60 --- /dev/null +++ b/src/blogc/debug.c @@ -0,0 +1,80 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <stdio.h> + +#include "template-parser.h" +#include "../common/utils.h" +#include "debug.h" + + +static const char* +get_operator(blogc_template_stmt_operator_t op) +{ + if (op & BLOGC_TEMPLATE_OP_NEQ) + return "!="; + if (op & BLOGC_TEMPLATE_OP_EQ) { + if (op & BLOGC_TEMPLATE_OP_LT) + return "<="; + else if (op & BLOGC_TEMPLATE_OP_GT) + return ">="; + return "=="; + } + if (op & BLOGC_TEMPLATE_OP_LT) + return "<"; + else if (op & BLOGC_TEMPLATE_OP_GT) + return ">"; + return ""; +} + + +void +blogc_debug_template(sb_slist_t *stmts) +{ + for (sb_slist_t *tmp = stmts; tmp != NULL; tmp = tmp->next) { + blogc_template_stmt_t *data = tmp->data; + fprintf(stderr, "DEBUG: <TEMPLATE "); + switch (data->type) { + case BLOGC_TEMPLATE_IFDEF_STMT: + fprintf(stderr, "IFDEF: %s", data->value); + break; + case BLOGC_TEMPLATE_IFNDEF_STMT: + fprintf(stderr, "IFNDEF: %s", data->value); + break; + case BLOGC_TEMPLATE_IF_STMT: + fprintf(stderr, "IF: %s %s %s", data->value, + get_operator(data->op), data->value2); + break; + case BLOGC_TEMPLATE_ELSE_STMT: + fprintf(stderr, "ELSE"); + break; + case BLOGC_TEMPLATE_ENDIF_STMT: + fprintf(stderr, "ENDIF"); + break; + case BLOGC_TEMPLATE_FOREACH_STMT: + fprintf(stderr, "FOREACH: %s", data->value); + break; + case BLOGC_TEMPLATE_ENDFOREACH_STMT: + fprintf(stderr, "ENDFOREACH"); + break; + case BLOGC_TEMPLATE_BLOCK_STMT: + fprintf(stderr, "BLOCK: %s", data->value); + break; + case BLOGC_TEMPLATE_ENDBLOCK_STMT: + fprintf(stderr, "ENDBLOCK"); + break; + case BLOGC_TEMPLATE_VARIABLE_STMT: + fprintf(stderr, "VARIABLE: %s", data->value); + break; + case BLOGC_TEMPLATE_CONTENT_STMT: + fprintf(stderr, "CONTENT: `%s`", data->value); + break; + } + fprintf(stderr, ">\n"); + } +} diff --git a/src/blogc/debug.h b/src/blogc/debug.h new file mode 100644 index 0000000..eb4e2c1 --- /dev/null +++ b/src/blogc/debug.h @@ -0,0 +1,16 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef ___DEBUG_H +#define ___DEBUG_H + +#include "../common/utils.h" + +void blogc_debug_template(sb_slist_t *stmts); + +#endif /* ___DEBUG_H */ diff --git a/src/blogc/error.c b/src/blogc/error.c new file mode 100644 index 0000000..41f4cd0 --- /dev/null +++ b/src/blogc/error.c @@ -0,0 +1,139 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include "error.h" +#include "../common/utils.h" + + +blogc_error_t* +blogc_error_new(blogc_error_type_t type, const char *msg) +{ + blogc_error_t *err = sb_malloc(sizeof(blogc_error_t)); + err->type = type; + err->msg = sb_strdup(msg); + return err; +} + + +blogc_error_t* +blogc_error_new_printf(blogc_error_type_t type, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + char *tmp = sb_strdup_vprintf(format, ap); + va_end(ap); + blogc_error_t *rv = blogc_error_new(type, tmp); + free(tmp); + return rv; +} + + +blogc_error_t* +blogc_error_parser(blogc_error_type_t type, const char *src, size_t src_len, + size_t current, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + char *msg = sb_strdup_vprintf(format, ap); + va_end(ap); + + size_t lineno = 1; + size_t linestart = 0; + size_t lineend = 0; + size_t pos = 1; + + for (size_t i = 0; i < src_len; i++) { + char c = src[i]; + if (i < current) { + if ((i + 1) < src_len) { + if ((c == '\n' && src[i + 1] == '\r') || + (c == '\r' && src[i + 1] == '\n')) + { + lineno++; + i++; + pos = 1; + if ((i + 1) < src_len) + linestart = i + 1; + continue; + } + } + if (c == '\n' || c == '\r') { + lineno++; + pos = 1; + if ((i + 1) < src_len) + linestart = i + 1; + continue; + } + pos++; + } + else if (c == '\n' || c == '\r') { + lineend = i; + break; + } + } + + if (lineend <= linestart && src_len >= linestart) + lineend = src_len; + + char *line = sb_strndup(src + linestart, lineend - linestart); + + blogc_error_t *rv = NULL; + + if (line[0] == '\0') // "near" message isn't useful if line is empty + rv = blogc_error_new(type, msg); + else + rv = blogc_error_new_printf(type, + "%s\nError occurred near line %d, position %d: %s", msg, lineno, + pos, line); + + free(msg); + free(line); + + return rv; +} + + +void +blogc_error_print(blogc_error_t *err) +{ + if (err == NULL) + return; + + switch(err->type) { + case BLOGC_ERROR_SOURCE_PARSER: + fprintf(stderr, "blogc: error: source: %s\n", err->msg); + break; + case BLOGC_ERROR_TEMPLATE_PARSER: + fprintf(stderr, "blogc: error: template: %s\n", err->msg); + break; + case BLOGC_ERROR_LOADER: + fprintf(stderr, "blogc: error: loader: %s\n", err->msg); + break; + case BLOGC_ERROR_FILE: + fprintf(stderr, "blogc: error: file: %s\n", err->msg); + break; + case BLOGC_WARNING_DATETIME_PARSER: + fprintf(stderr, "blogc: warning: datetime: %s\n", err->msg); + break; + default: + fprintf(stderr, "blogc: error: %s\n", err->msg); + } +} + + +void +blogc_error_free(blogc_error_t *err) +{ + if (err == NULL) + return; + free(err->msg); + free(err); +} diff --git a/src/blogc/error.h b/src/blogc/error.h new file mode 100644 index 0000000..31fbaf2 --- /dev/null +++ b/src/blogc/error.h @@ -0,0 +1,34 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _ERROR_H +#define _ERROR_H + +#include <stddef.h> + +typedef enum { + BLOGC_ERROR_SOURCE_PARSER = 1, + BLOGC_ERROR_TEMPLATE_PARSER, + BLOGC_ERROR_LOADER, + BLOGC_ERROR_FILE, + BLOGC_WARNING_DATETIME_PARSER, +} blogc_error_type_t; + +typedef struct { + char *msg; + blogc_error_type_t type; +} blogc_error_t; + +blogc_error_t* blogc_error_new(blogc_error_type_t type, const char *msg); +blogc_error_t* blogc_error_new_printf(blogc_error_type_t type, const char *format, ...); +blogc_error_t* blogc_error_parser(blogc_error_type_t type, const char *src, + size_t src_len, size_t current, const char *format, ...); +void blogc_error_print(blogc_error_t *err); +void blogc_error_free(blogc_error_t *err); + +#endif /* _ERROR_H */ diff --git a/src/blogc/file.c b/src/blogc/file.c new file mode 100644 index 0000000..b3b0c5b --- /dev/null +++ b/src/blogc/file.c @@ -0,0 +1,81 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include "file.h" +#include "error.h" +#include "../common/utf8.h" +#include "../common/utils.h" + +// this would belong to loader.c, but we need it in a separated file to be +// able to mock it when unit testing the loader functions. + + +char* +blogc_file_get_contents(const char *path, size_t *len, blogc_error_t **err) +{ + if (path == NULL || err == NULL || *err != NULL) + return NULL; + + *len = 0; + FILE *fp = fopen(path, "r"); + + if (fp == NULL) { + int tmp_errno = errno; + *err = blogc_error_new_printf(BLOGC_ERROR_FILE, + "Failed to open file (%s): %s", path, strerror(tmp_errno)); + return NULL; + } + + sb_string_t *str = sb_string_new(); + char buffer[BLOGC_FILE_CHUNK_SIZE]; + char *tmp; + + while (!feof(fp)) { + size_t read_len = fread(buffer, sizeof(char), BLOGC_FILE_CHUNK_SIZE, fp); + + tmp = buffer; + + if (str->len == 0 && read_len > 0) { + // skipping BOM before validation, for performance. should be safe + // enough + size_t skip = blogc_utf8_skip_bom((uint8_t*) buffer, read_len); + read_len -= skip; + tmp += skip; + } + + *len += read_len; + sb_string_append_len(str, tmp, read_len); + } + fclose(fp); + + if (!blogc_utf8_validate_str(str)) { + *err = blogc_error_new_printf(BLOGC_ERROR_FILE, + "File content is not valid UTF-8: %s", path); + sb_string_free(str, true); + return NULL; + } + + return sb_string_free(str, false); +} + + +int +blogc_fprintf(FILE *stream, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + int rv = vfprintf(stream, format, ap); + va_end(ap); + return rv; +} diff --git a/src/blogc/file.h b/src/blogc/file.h new file mode 100644 index 0000000..d2c4390 --- /dev/null +++ b/src/blogc/file.h @@ -0,0 +1,21 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _FILE_H +#define _FILE_H + +#include <stddef.h> +#include <stdio.h> +#include "error.h" + +#define BLOGC_FILE_CHUNK_SIZE 1024 + +char* blogc_file_get_contents(const char *path, size_t *len, blogc_error_t **err); +int blogc_fprintf(FILE *stream, const char *format, ...); + +#endif /* _FILE_H */ diff --git a/src/blogc/loader.c b/src/blogc/loader.c new file mode 100644 index 0000000..90f2401 --- /dev/null +++ b/src/blogc/loader.c @@ -0,0 +1,212 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <math.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "file.h" +#include "source-parser.h" +#include "template-parser.h" +#include "loader.h" +#include "error.h" +#include "../common/utils.h" + + +char* +blogc_get_filename(const char *f) +{ + if (f == NULL) + return NULL; + + if (strlen(f) == 0) + return NULL; + + char *filename = sb_strdup(f); + + // keep a pointer to original string + char *tmp = filename; + + bool removed_dot = false; + for (int i = strlen(tmp); i >= 0 ; i--) { + + // remove last extension + if (!removed_dot && tmp[i] == '.') { + tmp[i] = '\0'; + removed_dot = true; + continue; + } + + if (tmp[i] == '/' || tmp[i] == '\\') { + tmp += i + 1; + break; + } + } + + char *final_filename = sb_strdup(tmp); + free(filename); + + return final_filename; +} + + +sb_slist_t* +blogc_template_parse_from_file(const char *f, blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + size_t len; + char *s = blogc_file_get_contents(f, &len, err); + if (s == NULL) + return NULL; + sb_slist_t *rv = blogc_template_parse(s, len, err); + free(s); + return rv; +} + + +sb_trie_t* +blogc_source_parse_from_file(const char *f, blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + size_t len; + char *s = blogc_file_get_contents(f, &len, err); + if (s == NULL) + return NULL; + sb_trie_t *rv = blogc_source_parse(s, len, err); + + // set FILENAME variable + if (rv != NULL) { + char *filename = blogc_get_filename(f); + if (filename != NULL) + sb_trie_insert(rv, "FILENAME", filename); + } + + free(s); + return rv; +} + + +sb_slist_t* +blogc_source_parse_from_files(sb_trie_t *conf, sb_slist_t *l, blogc_error_t **err) +{ + blogc_error_t *tmp_err = NULL; + sb_slist_t *rv = NULL; + unsigned int with_date = 0; + + const char *filter_tag = sb_trie_lookup(conf, "FILTER_TAG"); + const char *filter_page = sb_trie_lookup(conf, "FILTER_PAGE"); + const char *filter_per_page = sb_trie_lookup(conf, "FILTER_PER_PAGE"); + + long page = strtol(filter_page != NULL ? filter_page : "", NULL, 10); + if (page <= 0) + page = 1; + long per_page = strtol(filter_per_page != NULL ? filter_per_page : "10", + NULL, 10); + if (per_page <= 0) + per_page = 10; + + // poor man's pagination + unsigned int start = (page - 1) * per_page; + unsigned int end = start + per_page; + unsigned int counter = 0; + + for (sb_slist_t *tmp = l; tmp != NULL; tmp = tmp->next) { + char *f = tmp->data; + sb_trie_t *s = blogc_source_parse_from_file(f, &tmp_err); + if (s == NULL) { + *err = blogc_error_new_printf(BLOGC_ERROR_LOADER, + "An error occurred while parsing source file: %s\n\n%s", + f, tmp_err->msg); + blogc_error_free(tmp_err); + tmp_err = NULL; + sb_slist_free_full(rv, (sb_free_func_t) sb_trie_free); + rv = NULL; + break; + } + if (filter_tag != NULL) { + const char *tags_str = sb_trie_lookup(s, "TAGS"); + // if user wants to filter by tag and no tag is provided, skip it + if (tags_str == NULL) { + sb_trie_free(s); + continue; + } + char **tags = sb_str_split(tags_str, ' ', 0); + bool found = false; + for (unsigned int i = 0; tags[i] != NULL; i++) { + if (tags[i][0] == '\0') + continue; + if (0 == strcmp(tags[i], filter_tag)) + found = true; + } + sb_strv_free(tags); + if (!found) { + sb_trie_free(s); + continue; + } + } + if (filter_page != NULL) { + if (counter < start || counter >= end) { + counter++; + sb_trie_free(s); + continue; + } + counter++; + } + if (sb_trie_lookup(s, "DATE") != NULL) + with_date++; + rv = sb_slist_append(rv, s); + } + + if (with_date > 0 && with_date < sb_slist_length(rv)) + // fatal error, maybe? + blogc_fprintf(stderr, + "blogc: warning: 'DATE' variable provided for at least one source " + "file, but not for all source files. This means that you may get " + "wrong values for 'DATE_FIRST' and 'DATE_LAST' variables.\n"); + + bool first = true; + for (sb_slist_t *tmp = rv; tmp != NULL; tmp = tmp->next) { + sb_trie_t *s = tmp->data; + if (first) { + const char *val = sb_trie_lookup(s, "DATE"); + if (val != NULL) + sb_trie_insert(conf, "DATE_FIRST", sb_strdup(val)); + val = sb_trie_lookup(s, "FILENAME"); + if (val != NULL) + sb_trie_insert(conf, "FILENAME_FIRST", sb_strdup(val)); + first = false; + } + if (tmp->next == NULL) { // last + const char *val = sb_trie_lookup(s, "DATE"); + if (val != NULL) + sb_trie_insert(conf, "DATE_LAST", sb_strdup(val)); + val = sb_trie_lookup(s, "FILENAME"); + if (val != NULL) + sb_trie_insert(conf, "FILENAME_LAST", sb_strdup(val)); + } + } + + if (filter_page != NULL) { + unsigned int last_page = ceilf(((float) counter) / per_page); + sb_trie_insert(conf, "CURRENT_PAGE", sb_strdup_printf("%ld", page)); + if (page > 1) + sb_trie_insert(conf, "PREVIOUS_PAGE", sb_strdup_printf("%ld", page - 1)); + if (page < last_page) + sb_trie_insert(conf, "NEXT_PAGE", sb_strdup_printf("%ld", page + 1)); + if (sb_slist_length(rv) > 0) + sb_trie_insert(conf, "FIRST_PAGE", sb_strdup("1")); + if (last_page > 0) + sb_trie_insert(conf, "LAST_PAGE", sb_strdup_printf("%d", last_page)); + } + + return rv; +} diff --git a/src/blogc/loader.h b/src/blogc/loader.h new file mode 100644 index 0000000..c54d11a --- /dev/null +++ b/src/blogc/loader.h @@ -0,0 +1,21 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _LOADER_H +#define _LOADER_H + +#include "error.h" +#include "../common/utils.h" + +char* blogc_get_filename(const char *f); +sb_slist_t* blogc_template_parse_from_file(const char *f, blogc_error_t **err); +sb_trie_t* blogc_source_parse_from_file(const char *f, blogc_error_t **err); +sb_slist_t* blogc_source_parse_from_files(sb_trie_t *conf, sb_slist_t *l, + blogc_error_t **err); + +#endif /* _LOADER_H */ diff --git a/src/blogc/main.c b/src/blogc/main.c new file mode 100644 index 0000000..03f43ae --- /dev/null +++ b/src/blogc/main.c @@ -0,0 +1,297 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif /* HAVE_SYS_STAT_H */ + +#include <errno.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "debug.h" +#include "template-parser.h" +#include "loader.h" +#include "renderer.h" +#include "error.h" +#include "../common/utf8.h" +#include "../common/utils.h" + +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION "Unknown" +#endif + + +static void +blogc_print_help(void) +{ + printf( + "usage:\n" + " blogc [-h] [-v] [-d] [-l] [-D KEY=VALUE ...] [-p KEY] [-t TEMPLATE]\n" + " [-o OUTPUT] [SOURCE ...] - A blog compiler.\n" + "\n" + "positional arguments:\n" + " SOURCE source file(s)\n" + "\n" + "optional arguments:\n" + " -h show this help message and exit\n" + " -v show version and exit\n" + " -d enable debug\n" + " -l build listing page, from multiple source files\n" + " -D KEY=VALUE set global configuration parameter\n" + " -p KEY show the value of a global configuration parameter\n" + " after source parsing and exit\n" + " -t TEMPLATE template file\n" + " -o OUTPUT output file\n"); +} + + +static void +blogc_print_usage(void) +{ + printf( + "usage: blogc [-h] [-v] [-d] [-l] [-D KEY=VALUE ...] [-p KEY] [-t TEMPLATE]\n" + " [-o OUTPUT] [SOURCE ...]\n"); +} + + +static void +blogc_mkdir_recursive(const char *filename) +{ + char *fname = sb_strdup(filename); + for (char *tmp = fname; *tmp != '\0'; tmp++) { + if (*tmp != '/' && *tmp != '\\') + continue; +#ifdef HAVE_SYS_STAT_H + char bkp = *tmp; + *tmp = '\0'; + if ((strlen(fname) > 0) && +#if defined(WIN32) || defined(_WIN32) + (-1 == mkdir(fname)) && +#else + (-1 == mkdir(fname, 0777)) && +#endif + (errno != EEXIST)) + { + fprintf(stderr, "blogc: error: failed to create output " + "directory (%s): %s\n", fname, strerror(errno)); + free(fname); + exit(2); + } + *tmp = bkp; +#else + // FIXME: show this warning only if actually trying to create a directory. + fprintf(stderr, "blogc: warning: can't create output directories " + "for your platform. please create the directories yourself.\n"); + break; +#endif + } + free(fname); +} + + +int +main(int argc, char **argv) +{ + setlocale(LC_ALL, ""); + + int rv = 0; + + bool debug = false; + bool listing = false; + char *template = NULL; + char *output = NULL; + char *print = NULL; + char *tmp = NULL; + char **pieces = NULL; + + sb_slist_t *sources = NULL; + sb_trie_t *config = sb_trie_new(free); + sb_trie_insert(config, "BLOGC_VERSION", sb_strdup(PACKAGE_VERSION)); + + for (unsigned int i = 1; i < argc; i++) { + tmp = NULL; + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'h': + blogc_print_help(); + goto cleanup; + case 'v': + printf("%s\n", PACKAGE_STRING); + goto cleanup; + case 'd': + debug = true; + break; + case 'l': + listing = true; + break; + case 't': + if (argv[i][2] != '\0') + template = sb_strdup(argv[i] + 2); + else if (i + 1 < argc) + template = sb_strdup(argv[++i]); + break; + case 'o': + if (argv[i][2] != '\0') + output = sb_strdup(argv[i] + 2); + else if (i + 1 < argc) + output = sb_strdup(argv[++i]); + break; + case 'p': + if (argv[i][2] != '\0') + print = sb_strdup(argv[i] + 2); + else if (i + 1 < argc) + print = sb_strdup(argv[++i]); + break; + case 'D': + if (argv[i][2] != '\0') + tmp = argv[i] + 2; + else if (i + 1 < argc) + tmp = argv[++i]; + if (tmp != NULL) { + if (!blogc_utf8_validate((uint8_t*) tmp, strlen(tmp))) { + fprintf(stderr, "blogc: error: invalid value for " + "-D (must be valid UTF-8 string): %s\n", tmp); + goto cleanup; + } + pieces = sb_str_split(tmp, '=', 2); + if (sb_strv_length(pieces) != 2) { + fprintf(stderr, "blogc: error: invalid value for " + "-D (must have an '='): %s\n", tmp); + sb_strv_free(pieces); + rv = 2; + goto cleanup; + } + for (unsigned int j = 0; pieces[0][j] != '\0'; j++) { + if (!((pieces[0][j] >= 'A' && pieces[0][j] <= 'Z') || + pieces[0][j] == '_')) + { + fprintf(stderr, "blogc: error: invalid value " + "for -D (configuration key must be uppercase " + "with '_'): %s\n", pieces[0]); + sb_strv_free(pieces); + rv = 2; + goto cleanup; + } + } + sb_trie_insert(config, pieces[0], sb_strdup(pieces[1])); + sb_strv_free(pieces); + pieces = NULL; + } + break; + default: + blogc_print_usage(); + fprintf(stderr, "blogc: error: invalid argument: -%c\n", + argv[i][1]); + rv = 2; + goto cleanup; + } + } + else + sources = sb_slist_append(sources, sb_strdup(argv[i])); + } + + if (!listing && sb_slist_length(sources) == 0) { + blogc_print_usage(); + fprintf(stderr, "blogc: error: one source file is required\n"); + rv = 2; + goto cleanup; + } + + if (!listing && sb_slist_length(sources) > 1) { + blogc_print_usage(); + fprintf(stderr, "blogc: error: only one source file should be provided, " + "if running without '-l'\n"); + rv = 2; + goto cleanup; + } + + blogc_error_t *err = NULL; + + sb_slist_t *s = blogc_source_parse_from_files(config, sources, &err); + if (err != NULL) { + blogc_error_print(err); + rv = 2; + goto cleanup2; + } + + if (print != NULL) { + const char *val = sb_trie_lookup(config, print); + if (val == NULL) { + fprintf(stderr, "blogc: error: configuration variable not found: %s\n", + print); + rv = 2; + } + else { + printf("%s\n", val); + } + goto cleanup2; + } + + if (template == NULL) { + blogc_print_usage(); + fprintf(stderr, "blogc: error: argument -t is required when rendering content\n"); + rv = 2; + goto cleanup2; + } + + sb_slist_t* l = blogc_template_parse_from_file(template, &err); + if (err != NULL) { + blogc_error_print(err); + rv = 2; + goto cleanup3; + } + + if (debug) + blogc_debug_template(l); + + char *out = blogc_render(l, s, config, listing); + + bool write_to_stdout = (output == NULL || (0 == strcmp(output, "-"))); + + FILE *fp = stdout; + if (!write_to_stdout) { + blogc_mkdir_recursive(output); + fp = fopen(output, "w"); + if (fp == NULL) { + fprintf(stderr, "blogc: error: failed to open output file (%s): %s\n", + output, strerror(errno)); + rv = 2; + goto cleanup4; + } + } + + if (out != NULL) + fprintf(fp, "%s", out); + + if (!write_to_stdout) + fclose(fp); + +cleanup4: + free(out); +cleanup3: + blogc_template_free_stmts(l); +cleanup2: + sb_slist_free_full(s, (sb_free_func_t) sb_trie_free); + blogc_error_free(err); +cleanup: + sb_trie_free(config); + free(template); + free(output); + free(print); + sb_slist_free_full(sources, free); + return rv; +} diff --git a/src/blogc/renderer.c b/src/blogc/renderer.c new file mode 100644 index 0000000..97a226e --- /dev/null +++ b/src/blogc/renderer.c @@ -0,0 +1,457 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "datetime-parser.h" +#include "error.h" +#include "template-parser.h" +#include "renderer.h" +#include "../common/utils.h" + + +const char* +blogc_get_variable(const char *name, sb_trie_t *global, sb_trie_t *local) +{ + const char *rv = NULL; + if (local != NULL) { + rv = sb_trie_lookup(local, name); + if (rv != NULL) + return rv; + } + if (global != NULL) + rv = sb_trie_lookup(global, name); + return rv; +} + + +char* +blogc_format_date(const char *date, sb_trie_t *global, sb_trie_t *local) +{ + const char *date_format = blogc_get_variable("DATE_FORMAT", global, local); + if (date == NULL) + return NULL; + if (date_format == NULL) + return sb_strdup(date); + + blogc_error_t *err = NULL; + char *rv = blogc_convert_datetime(date, date_format, &err); + if (err != NULL) { + blogc_error_print(err); + blogc_error_free(err); + return sb_strdup(date); + } + return rv; +} + + +char* +blogc_format_variable(const char *name, sb_trie_t *global, sb_trie_t *local, + sb_slist_t *foreach_var) +{ + // if used asked for a variable that exists, just return it right away + const char *value = blogc_get_variable(name, global, local); + if (value != NULL) + return sb_strdup(value); + + // do the same for special variable 'FOREACH_ITEM' + if (0 == strcmp(name, "FOREACH_ITEM")) { + if (foreach_var != NULL && foreach_var->data != NULL) { + return sb_strdup(foreach_var->data); + } + return NULL; + } + + char *var = sb_strdup(name); + + size_t i; + size_t last = strlen(var); + + long int len = -1; + + // just walk till the last '_' + for (i = last - 1; i > 0 && var[i] >= '0' && var[i] <= '9'; i--); + + if (var[i] == '_' && (i + 1) < last) { // var ends with '_[0-9]+' + // passing NULL to endptr because our string was previously validated + len = strtol(var + i + 1, NULL, 10); + if (errno != 0) { + fprintf(stderr, "warning: invalid variable size for '%s' (%s), " + "ignoring.\n", var, strerror(errno)); + len = -1; + } + else { + var[i] = '\0'; + } + } + + bool must_format = false; + + if (sb_str_ends_with(var, "_FORMATTED")) { + var[strlen(var) - 10] = '\0'; + must_format = true; + } + + if ((0 == strcmp(var, "FOREACH_ITEM")) && + (foreach_var != NULL && foreach_var->data != NULL)) + value = foreach_var->data; + else + value = blogc_get_variable(var, global, local); + + if (value == NULL) { + free(var); + return NULL; + } + + char *rv = NULL; + + if (must_format) { + if (sb_str_starts_with(name, "DATE_")) { + rv = blogc_format_date(value, global, local); + } + else { + fprintf(stderr, "warning: no formatter found for '%s', " + "ignoring.\n", var); + rv = sb_strdup(value); + } + } + else { + rv = sb_strdup(value); + } + + free(var); + + if (len > 0) { + char *tmp = sb_strndup(rv, len); + free(rv); + rv = tmp; + } + + return rv; +} + + +sb_slist_t* +blogc_split_list_variable(const char *name, sb_trie_t *global, sb_trie_t *local) +{ + const char *value = blogc_get_variable(name, global, local); + if (value == NULL) + return NULL; + + sb_slist_t *rv = NULL; + + char **tmp = sb_str_split(value, ' ', 0); + for (unsigned int i = 0; tmp[i] != NULL; i++) { + if (tmp[i][0] != '\0') // ignore empty strings + rv = sb_slist_append(rv, tmp[i]); + else + free(tmp[i]); + } + free(tmp); + + return rv; +} + + +char* +blogc_render(sb_slist_t *tmpl, sb_slist_t *sources, sb_trie_t *config, bool listing) +{ + if (tmpl == NULL) + return NULL; + + sb_slist_t *current_source = NULL; + sb_slist_t *listing_start = NULL; + + sb_string_t *str = sb_string_new(); + + sb_trie_t *tmp_source = NULL; + char *config_value = NULL; + char *defined = NULL; + + unsigned int if_count = 0; + + sb_slist_t *foreach_var = NULL; + sb_slist_t *foreach_var_start = NULL; + sb_slist_t *foreach_start = NULL; + + bool if_not = false; + bool inside_block = false; + bool evaluate = false; + bool valid_else = false; + + int cmp = 0; + + sb_slist_t *tmp = tmpl; + while (tmp != NULL) { + blogc_template_stmt_t *stmt = tmp->data; + + switch (stmt->type) { + + case BLOGC_TEMPLATE_CONTENT_STMT: + if (stmt->value != NULL) + sb_string_append(str, stmt->value); + break; + + case BLOGC_TEMPLATE_BLOCK_STMT: + inside_block = true; + if_count = 0; + if (0 == strcmp("entry", stmt->value)) { + if (listing) { + + // we can just skip anything and walk until the next + // 'endblock' + while (stmt->type != BLOGC_TEMPLATE_ENDBLOCK_STMT) { + tmp = tmp->next; + stmt = tmp->data; + } + break; + } + current_source = sources; + tmp_source = current_source->data; + } + else if ((0 == strcmp("listing", stmt->value)) || + (0 == strcmp("listing_once", stmt->value))) { + if (!listing) { + + // we can just skip anything and walk until the next + // 'endblock' + while (stmt->type != BLOGC_TEMPLATE_ENDBLOCK_STMT) { + tmp = tmp->next; + stmt = tmp->data; + } + break; + } + } + if (0 == strcmp("listing", stmt->value)) { + if (sources == NULL) { + + // we can just skip anything and walk until the next + // 'endblock' + while (stmt->type != BLOGC_TEMPLATE_ENDBLOCK_STMT) { + tmp = tmp->next; + stmt = tmp->data; + } + break; + } + if (current_source == NULL) { + listing_start = tmp; + current_source = sources; + } + tmp_source = current_source->data; + } + break; + + case BLOGC_TEMPLATE_VARIABLE_STMT: + if (stmt->value != NULL) { + config_value = blogc_format_variable(stmt->value, + config, inside_block ? tmp_source : NULL, foreach_var); + if (config_value != NULL) { + sb_string_append(str, config_value); + free(config_value); + config_value = NULL; + break; + } + } + break; + + case BLOGC_TEMPLATE_ENDBLOCK_STMT: + inside_block = false; + if (listing_start != NULL && current_source != NULL) { + current_source = current_source->next; + if (current_source != NULL) { + tmp = listing_start; + continue; + } + else + listing_start = NULL; + } + break; + + case BLOGC_TEMPLATE_IFNDEF_STMT: + if_not = true; + + case BLOGC_TEMPLATE_IF_STMT: + case BLOGC_TEMPLATE_IFDEF_STMT: + if_count = 0; + defined = NULL; + if (stmt->value != NULL) + defined = blogc_format_variable(stmt->value, config, + inside_block ? tmp_source : NULL, foreach_var); + evaluate = false; + if (stmt->op != 0) { + // Strings that start with a '"' are actually strings, the + // others are meant to be looked up as a second variable + // check. + char *defined2 = NULL; + if (stmt->value2 != NULL) { + if ((strlen(stmt->value2) >= 2) && + (stmt->value2[0] == '"') && + (stmt->value2[strlen(stmt->value2) - 1] == '"')) + { + defined2 = sb_strndup(stmt->value2 + 1, + strlen(stmt->value2) - 2); + } + else { + defined2 = blogc_format_variable(stmt->value2, + config, inside_block ? tmp_source : NULL, + foreach_var); + } + } + + if (defined != NULL && defined2 != NULL) { + cmp = strcmp(defined, defined2); + if (cmp != 0 && stmt->op & BLOGC_TEMPLATE_OP_NEQ) + evaluate = true; + else if (cmp == 0 && stmt->op & BLOGC_TEMPLATE_OP_EQ) + evaluate = true; + else if (cmp < 0 && stmt->op & BLOGC_TEMPLATE_OP_LT) + evaluate = true; + else if (cmp > 0 && stmt->op & BLOGC_TEMPLATE_OP_GT) + evaluate = true; + } + + free(defined2); + } + else { + if (if_not && defined == NULL) + evaluate = true; + if (!if_not && defined != NULL) + evaluate = true; + } + if (!evaluate) { + + // at this point we can just skip anything, counting the + // number of 'if's, to know how many 'endif's we need to + // skip as well. + while (1) { + tmp = tmp->next; + stmt = tmp->data; + if ((stmt->type == BLOGC_TEMPLATE_IF_STMT) || + (stmt->type == BLOGC_TEMPLATE_IFDEF_STMT) || + (stmt->type == BLOGC_TEMPLATE_IFNDEF_STMT)) + { + if_count++; + continue; + } + if ((stmt->type == BLOGC_TEMPLATE_ELSE_STMT) && + (if_count == 0)) + { + // this is somewhat complex. only an else statement + // right after a non evaluated block should be considered + // valid, because all the inner conditionals were just + // skipped, and all the outter conditionals evaluated + // to true. + valid_else = true; + break; + } + if (stmt->type == BLOGC_TEMPLATE_ENDIF_STMT) { + if (if_count > 0) { + if_count--; + continue; + } + break; + } + } + } + free(defined); + defined = NULL; + if_not = false; + break; + + case BLOGC_TEMPLATE_ELSE_STMT: + if_count = 0; + if (!valid_else) { + + // at this point we can just skip anything, counting the + // number of 'if's, to know how many 'endif's we need to + // skip as well. + while (1) { + tmp = tmp->next; + stmt = tmp->data; + if ((stmt->type == BLOGC_TEMPLATE_IF_STMT) || + (stmt->type == BLOGC_TEMPLATE_IFDEF_STMT) || + (stmt->type == BLOGC_TEMPLATE_IFNDEF_STMT)) + { + if_count++; + continue; + } + // no need to handle else statements here, because every + // if should have an endif. + if (stmt->type == BLOGC_TEMPLATE_ENDIF_STMT) { + if (if_count > 0) { + if_count--; + continue; + } + break; + } + } + } + valid_else = false; + break; + + case BLOGC_TEMPLATE_ENDIF_STMT: + // any endif statement should invalidate valid_else, to avoid + // propagation to outter conditionals. + valid_else = false; + if (if_count > 0) + if_count--; + break; + + case BLOGC_TEMPLATE_FOREACH_STMT: + if (foreach_var_start == NULL) { + if (stmt->value != NULL) + foreach_var_start = blogc_split_list_variable(stmt->value, + config, inside_block ? tmp_source : NULL); + + if (foreach_var_start != NULL) { + foreach_var = foreach_var_start; + foreach_start = tmp; + } + else { + + // we can just skip anything and walk until the next + // 'endforeach' + while (stmt->type != BLOGC_TEMPLATE_ENDFOREACH_STMT) { + tmp = tmp->next; + stmt = tmp->data; + } + break; + } + } + + if (foreach_var == NULL) { + foreach_start = tmp; + foreach_var = foreach_var_start; + } + break; + + case BLOGC_TEMPLATE_ENDFOREACH_STMT: + if (foreach_start != NULL && foreach_var != NULL) { + foreach_var = foreach_var->next; + if (foreach_var != NULL) { + tmp = foreach_start; + continue; + } + } + foreach_start = NULL; + sb_slist_free_full(foreach_var_start, free); + foreach_var_start = NULL; + break; + } + tmp = tmp->next; + } + + // no need to free temporary variables here. the template parser makes sure + // that templates are sane and statements are closed. + + return sb_string_free(str, false); +} diff --git a/src/blogc/renderer.h b/src/blogc/renderer.h new file mode 100644 index 0000000..f7f5328 --- /dev/null +++ b/src/blogc/renderer.h @@ -0,0 +1,24 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _RENDERER_H +#define _RENDERER_H + +#include <stdbool.h> +#include "../common/utils.h" + +const char* blogc_get_variable(const char *name, sb_trie_t *global, sb_trie_t *local); +char* blogc_format_date(const char *date, sb_trie_t *global, sb_trie_t *local); +char* blogc_format_variable(const char *name, sb_trie_t *global, sb_trie_t *local, + sb_slist_t *foreach_var); +sb_slist_t* blogc_split_list_variable(const char *name, sb_trie_t *global, + sb_trie_t *local); +char* blogc_render(sb_slist_t *tmpl, sb_slist_t *sources, sb_trie_t *config, + bool listing); + +#endif /* _RENDERER_H */ diff --git a/src/blogc/source-parser.c b/src/blogc/source-parser.c new file mode 100644 index 0000000..4472096 --- /dev/null +++ b/src/blogc/source-parser.c @@ -0,0 +1,218 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <stdlib.h> +#include <string.h> + +#include "content-parser.h" +#include "source-parser.h" +#include "error.h" +#include "../common/utils.h" + + +typedef enum { + SOURCE_START = 1, + SOURCE_CONFIG_KEY, + SOURCE_CONFIG_VALUE_START, + SOURCE_CONFIG_VALUE, + SOURCE_SEPARATOR, + SOURCE_CONTENT_START, + SOURCE_CONTENT, +} blogc_source_parser_state_t; + + +sb_trie_t* +blogc_source_parse(const char *src, size_t src_len, blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + + size_t current = 0; + size_t start = 0; + size_t end_excerpt = 0; + + char *key = NULL; + char *tmp = NULL; + char *content = NULL; + sb_trie_t *rv = sb_trie_new(free); + + blogc_source_parser_state_t state = SOURCE_START; + + while (current < src_len) { + char c = src[current]; + + switch (state) { + + case SOURCE_START: + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + break; + if (c >= 'A' && c <= 'Z') { + state = SOURCE_CONFIG_KEY; + start = current; + break; + } + if (c == '-') { + state = SOURCE_SEPARATOR; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, + "Can't find a configuration key or the content separator."); + break; + + case SOURCE_CONFIG_KEY: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ':') { + key = sb_strndup(src + start, current - start); + if (((current - start == 8) && + (0 == strncmp("FILENAME", src + start, 8))) || + ((current - start == 7) && + (0 == strncmp("CONTENT", src + start, 7))) || + ((current - start == 14) && + (0 == strncmp("DATE_FORMATTED", src + start, 14))) || + ((current - start == 20) && + (0 == strncmp("DATE_FIRST_FORMATTED", src + start, 20))) || + ((current - start == 19) && + (0 == strncmp("DATE_LAST_FORMATTED", src + start, 19))) || + ((current - start == 10) && + (0 == strncmp("PAGE_FIRST", src + start, 10))) || + ((current - start == 13) && + (0 == strncmp("PAGE_PREVIOUS", src + start, 13))) || + ((current - start == 12) && + (0 == strncmp("PAGE_CURRENT", src + start, 12))) || + ((current - start == 9) && + (0 == strncmp("PAGE_NEXT", src + start, 9))) || + ((current - start == 9) && + (0 == strncmp("PAGE_LAST", src + start, 9))) || + ((current - start == 13) && + (0 == strncmp("BLOGC_VERSION", src + start, 13)))) + { + *err = blogc_error_new_printf(BLOGC_ERROR_SOURCE_PARSER, + "'%s' variable is forbidden in source files. It will " + "be set for you by the compiler.", key); + break; + } + state = SOURCE_CONFIG_VALUE_START; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "Invalid configuration key."); + break; + + case SOURCE_CONFIG_VALUE_START: + if (c != '\n' && c != '\r') { + state = SOURCE_CONFIG_VALUE; + start = current; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "Configuration value not provided for '%s'.", + key); + break; + + case SOURCE_CONFIG_VALUE: + if (c == '\n' || c == '\r') { + tmp = sb_strndup(src + start, current - start); + sb_trie_insert(rv, key, sb_strdup(sb_str_strip(tmp))); + free(tmp); + free(key); + key = NULL; + state = SOURCE_START; + } + break; + + case SOURCE_SEPARATOR: + if (c == '-') + break; + if (c == '\n' || c == '\r') { + state = SOURCE_CONTENT_START; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, + "Invalid content separator. Must be more than one '-' characters."); + break; + + case SOURCE_CONTENT_START: + if (c == '\n' || c == '\r') + break; + start = current; + state = SOURCE_CONTENT; + break; + + case SOURCE_CONTENT: + if (current == (src_len - 1)) { + tmp = sb_strndup(src + start, src_len - start); + sb_trie_insert(rv, "RAW_CONTENT", tmp); + char *description = NULL; + content = blogc_content_parse(tmp, &end_excerpt, &description); + if (description != NULL) { + // do not override source-provided description. + if (NULL == sb_trie_lookup(rv, "DESCRIPTION")) { + // no need to free, because we are transfering memory + // ownership to the trie. + sb_trie_insert(rv, "DESCRIPTION", description); + } + else { + free(description); + } + } + sb_trie_insert(rv, "CONTENT", content); + sb_trie_insert(rv, "EXCERPT", end_excerpt == 0 ? + sb_strdup(content) : sb_strndup(content, end_excerpt)); + } + break; + } + + if (*err != NULL) + break; + + current++; + } + + if (*err == NULL && sb_trie_size(rv) == 0) { + + // ok, nothing found in the config trie, but no error set either. + // let's try to be nice with the users and provide some reasonable + // output. :) + switch (state) { + case SOURCE_START: + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "Your source file is empty."); + break; + case SOURCE_CONFIG_KEY: + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "Your last configuration key is missing ':' and " + "the value"); + break; + case SOURCE_CONFIG_VALUE_START: + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "Configuration value not provided for '%s'.", + key); + break; + case SOURCE_CONFIG_VALUE: + *err = blogc_error_parser(BLOGC_ERROR_SOURCE_PARSER, src, src_len, + current, "No line ending after the configuration value for " + "'%s'.", key); + break; + case SOURCE_SEPARATOR: + case SOURCE_CONTENT_START: + case SOURCE_CONTENT: + break; // won't happen, and if even happen, shouldn't be fatal + } + } + + if (*err != NULL) { + free(key); + sb_trie_free(rv); + return NULL; + } + + return rv; +} diff --git a/src/blogc/source-parser.h b/src/blogc/source-parser.h new file mode 100644 index 0000000..895cb1b --- /dev/null +++ b/src/blogc/source-parser.h @@ -0,0 +1,19 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _SOURCE_PARSER_H +#define _SOURCE_PARSER_H + +#include <stddef.h> +#include "error.h" +#include "../common/utils.h" + +sb_trie_t* blogc_source_parse(const char *src, size_t src_len, + blogc_error_t **err); + +#endif /* _SOURCE_PARSER_H */ diff --git a/src/blogc/template-parser.c b/src/blogc/template-parser.c new file mode 100644 index 0000000..15750f1 --- /dev/null +++ b/src/blogc/template-parser.c @@ -0,0 +1,679 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include "template-parser.h" +#include "error.h" +#include "../common/utils.h" + + +typedef enum { + TEMPLATE_START = 1, + TEMPLATE_OPEN_BRACKET, + TEMPLATE_BLOCK_START, + TEMPLATE_BLOCK_START_WHITESPACE_CLEANER, + TEMPLATE_BLOCK_TYPE, + TEMPLATE_BLOCK_BLOCK_TYPE_START, + TEMPLATE_BLOCK_BLOCK_TYPE, + TEMPLATE_BLOCK_IF_START, + TEMPLATE_BLOCK_IF_VARIABLE, + TEMPLATE_BLOCK_IF_OPERATOR_START, + TEMPLATE_BLOCK_IF_OPERATOR, + TEMPLATE_BLOCK_IF_OPERAND_START, + TEMPLATE_BLOCK_IF_STRING_OPERAND, + TEMPLATE_BLOCK_IF_VARIABLE_OPERAND, + TEMPLATE_BLOCK_FOREACH_START, + TEMPLATE_BLOCK_FOREACH_VARIABLE, + TEMPLATE_BLOCK_END_WHITESPACE_CLEANER, + TEMPLATE_BLOCK_END, + TEMPLATE_VARIABLE_START, + TEMPLATE_VARIABLE, + TEMPLATE_VARIABLE_END, + TEMPLATE_CLOSE_BRACKET, +} blogc_template_parser_state_t; + + +sb_slist_t* +blogc_template_parse(const char *src, size_t src_len, blogc_error_t **err) +{ + if (err == NULL || *err != NULL) + return NULL; + + size_t current = 0; + size_t start = 0; + size_t end = 0; + size_t op_start = 0; + size_t op_end = 0; + size_t start2 = 0; + size_t end2 = 0; + + blogc_template_stmt_operator_t tmp_op = 0; + + unsigned int if_count = 0; + unsigned int block_if_count = 0; + bool else_open = false; + bool foreach_open = false; + bool block_foreach_open = false; + + sb_slist_t *stmts = NULL; + blogc_template_stmt_t *stmt = NULL; + + /* + * this is a reference to the content of previous node in the singly-linked + * list. The "correct" solution here would be implement a doubly-linked + * list, but here are a few reasons to avoid it: + * + * - i'm too tired to implement it :P + * - template parser never walk backwards, then the list itself does not + * need to know its previous node. + */ + blogc_template_stmt_t *previous = NULL; + + bool lstrip_next = false; + char *tmp = NULL; + char *block_type = NULL; + + blogc_template_parser_state_t state = TEMPLATE_START; + blogc_template_stmt_type_t type = BLOGC_TEMPLATE_CONTENT_STMT; + + bool block_open = false; + + while (current < src_len) { + char c = src[current]; + bool last = current == src_len - 1; + + switch (state) { + + case TEMPLATE_START: + if (last) { + stmt = sb_malloc(sizeof(blogc_template_stmt_t)); + stmt->type = type; + if (lstrip_next) { + tmp = sb_strndup(src + start, src_len - start); + stmt->value = sb_strdup(sb_str_lstrip(tmp)); + free(tmp); + tmp = NULL; + lstrip_next = false; + } + else { + stmt->value = sb_strndup(src + start, src_len - start); + } + stmt->op = 0; + stmt->value2 = NULL; + stmts = sb_slist_append(stmts, stmt); + previous = stmt; + stmt = NULL; + } + if (c == '{') { + end = current; + state = TEMPLATE_OPEN_BRACKET; + } + break; + + case TEMPLATE_OPEN_BRACKET: + if (c == '%' || c == '{') { + if (c == '%') + state = TEMPLATE_BLOCK_START_WHITESPACE_CLEANER; + else + state = TEMPLATE_VARIABLE_START; + if (end > start) { + stmt = sb_malloc(sizeof(blogc_template_stmt_t)); + stmt->type = type; + if (lstrip_next) { + tmp = sb_strndup(src + start, end - start); + stmt->value = sb_strdup(sb_str_lstrip(tmp)); + free(tmp); + tmp = NULL; + lstrip_next = false; + } + else { + stmt->value = sb_strndup(src + start, end - start); + } + stmt->op = 0; + stmt->value2 = NULL; + stmts = sb_slist_append(stmts, stmt); + previous = stmt; + stmt = NULL; + } + break; + } + state = TEMPLATE_START; + break; + + case TEMPLATE_BLOCK_START_WHITESPACE_CLEANER: + if (c == '-') { + if ((previous != NULL) && + (previous->type == BLOGC_TEMPLATE_CONTENT_STMT)) + { + previous->value = sb_str_rstrip(previous->value); // does not need copy + } + state = TEMPLATE_BLOCK_START; + break; + } + state = TEMPLATE_BLOCK_START; + + case TEMPLATE_BLOCK_START: + if (c == ' ') + break; + if (c >= 'a' && c <= 'z') { + state = TEMPLATE_BLOCK_TYPE; + start = current; + break; + } + if (c == '-') { + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Duplicated whitespace " + "cleaner before statement."); + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Must begin with lowercase letter."); + break; + + case TEMPLATE_BLOCK_TYPE: + if (c >= 'a' && c <= 'z') + break; + if (c == ' ') { + if ((current - start == 5) && + (0 == strncmp("block", src + start, 5))) + { + if (!block_open) { + state = TEMPLATE_BLOCK_BLOCK_TYPE_START; + type = BLOGC_TEMPLATE_BLOCK_STMT; + start = current; + block_if_count = if_count; + block_foreach_open = foreach_open; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, "Blocks can't be nested."); + break; + } + else if ((current - start == 8) && + (0 == strncmp("endblock", src + start, 8))) + { + if (block_open) { + if (if_count != block_if_count) { + *err = blogc_error_new_printf(BLOGC_ERROR_TEMPLATE_PARSER, + "%d open 'if', 'ifdef' and/or 'ifndef' statements " + "were not closed inside a '%s' block!", + if_count - block_if_count, block_type); + break; + } + if (!block_foreach_open && foreach_open) { + *err = blogc_error_new_printf(BLOGC_ERROR_TEMPLATE_PARSER, + "An open 'foreach' statement was not closed " + "inside a '%s' block!", block_type); + break; + } + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + type = BLOGC_TEMPLATE_ENDBLOCK_STMT; + block_open = false; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, + "'endblock' statement without an open 'block' statement."); + break; + } + else if ((current - start == 5) && + (0 == strncmp("ifdef", src + start, 5))) + { + state = TEMPLATE_BLOCK_IF_START; + type = BLOGC_TEMPLATE_IFDEF_STMT; + start = current; + if_count++; + else_open = false; + break; + } + else if ((current - start == 6) && + (0 == strncmp("ifndef", src + start, 6))) + { + state = TEMPLATE_BLOCK_IF_START; + type = BLOGC_TEMPLATE_IFNDEF_STMT; + start = current; + if_count++; + else_open = false; + break; + } + else if ((current - start == 2) && + (0 == strncmp("if", src + start, 2))) + { + state = TEMPLATE_BLOCK_IF_START; + type = BLOGC_TEMPLATE_IF_STMT; + start = current; + if_count++; + else_open = false; + break; + } + else if ((current - start == 4) && + (0 == strncmp("else", src + start, 4))) + { + if ((block_open && if_count > block_if_count) || + (!block_open && if_count > 0)) + { + if (!else_open) { + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + type = BLOGC_TEMPLATE_ELSE_STMT; + else_open = true; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, + "More than one 'else' statement for an open 'if', " + "'ifdef' or 'ifndef' statement."); + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, + "'else' statement without an open 'if', 'ifdef' or " + "'ifndef' statement."); + break; + } + else if ((current - start == 5) && + (0 == strncmp("endif", src + start, 5))) + { + if ((block_open && if_count > block_if_count) || + (!block_open && if_count > 0)) + { + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + type = BLOGC_TEMPLATE_ENDIF_STMT; + if_count--; + else_open = false; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, + "'endif' statement without an open 'if', 'ifdef' or " + "'ifndef' statement."); + break; + } + else if ((current - start == 7) && + (0 == strncmp("foreach", src + start, 7))) + { + if (!foreach_open) { + state = TEMPLATE_BLOCK_FOREACH_START; + type = BLOGC_TEMPLATE_FOREACH_STMT; + start = current; + foreach_open = true; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, "'foreach' statements can't " + "be nested."); + break; + } + else if ((current - start == 10) && + (0 == strncmp("endforeach", src + start, 10))) + { + if ((block_open && !block_foreach_open && foreach_open) || + (!block_open && foreach_open)) + { + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + type = BLOGC_TEMPLATE_ENDFOREACH_STMT; + foreach_open = false; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, current, + "'endforeach' statement without an open 'foreach' " + "statement."); + break; + } + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement type: Allowed types are: 'block', " + "'endblock', 'if', 'ifdef', 'ifndef', 'else', 'endif', " + "'foreach' and 'endforeach'."); + break; + + case TEMPLATE_BLOCK_BLOCK_TYPE_START: + if (c == ' ') + break; + if (c >= 'a' && c <= 'z') { + state = TEMPLATE_BLOCK_BLOCK_TYPE; + start = current; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid block syntax. Must begin with lowercase letter."); + break; + + case TEMPLATE_BLOCK_BLOCK_TYPE: + if ((c >= 'a' && c <= 'z') || c == '_') + break; + if (c == ' ') { + if ((current - start == 5) && + (0 == strncmp("entry", src + start, 5))) + { + block_open = true; + end = current; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + break; + } + else if ((current - start == 7) && + (0 == strncmp("listing", src + start, 7))) + { + block_open = true; + end = current; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + break; + } + else if ((current - start == 12) && + (0 == strncmp("listing_once", src + start, 12))) + { + block_open = true; + end = current; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + break; + } + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid block type. Allowed types are: 'entry', 'listing' " + "and 'listing_once'."); + break; + + case TEMPLATE_BLOCK_IF_START: + if (c == ' ') + break; + if (c >= 'A' && c <= 'Z') { + state = TEMPLATE_BLOCK_IF_VARIABLE; + start = current; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid variable name. Must begin with uppercase letter."); + break; + + case TEMPLATE_BLOCK_IF_VARIABLE: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ' ') { + end = current; + if (type == BLOGC_TEMPLATE_IF_STMT) + state = TEMPLATE_BLOCK_IF_OPERATOR_START; + else + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid variable name. Must be uppercase letter, number " + "or '_'."); + break; + + case TEMPLATE_BLOCK_IF_OPERATOR_START: + if (c == ' ') { + break; + } + state = TEMPLATE_BLOCK_IF_OPERATOR; + op_start = current; + break; + + case TEMPLATE_BLOCK_IF_OPERATOR: + if (c != ' ') + break; + state = TEMPLATE_BLOCK_IF_OPERAND_START; + op_end = current; + break; + + case TEMPLATE_BLOCK_IF_OPERAND_START: + if (c == ' ') + break; + if (c >= 'A' && c <= 'Z') { + state = TEMPLATE_BLOCK_IF_VARIABLE_OPERAND; + start2 = current; + break; + } + if (c == '"') { + state = TEMPLATE_BLOCK_IF_STRING_OPERAND; + start2 = current; + break; + } + op_start = 0; + op_end = 0; + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid 'if' operand. Must be double-quoted static " + "string or variable."); + break; + + case TEMPLATE_BLOCK_IF_STRING_OPERAND: + if (c != '"') + break; + if (c == '"' && src[current - 1] == '\\') + break; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + end2 = current + 1; + break; + + case TEMPLATE_BLOCK_IF_VARIABLE_OPERAND: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + end2 = current; + break; + + case TEMPLATE_BLOCK_FOREACH_START: + if (c == ' ') + break; + if (c >= 'A' && c <= 'Z') { + state = TEMPLATE_BLOCK_FOREACH_VARIABLE; + start = current; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid foreach variable name. Must begin with uppercase " + "letter."); + break; + + case TEMPLATE_BLOCK_FOREACH_VARIABLE: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ' ') { + end = current; + state = TEMPLATE_BLOCK_END_WHITESPACE_CLEANER; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid foreach variable name. Must be uppercase letter, " + "number or '_'."); + break; + + case TEMPLATE_BLOCK_END_WHITESPACE_CLEANER: + if (c == ' ') + break; + if (c == '-') { + lstrip_next = true; + state = TEMPLATE_BLOCK_END; + break; + } + state = TEMPLATE_BLOCK_END; + + case TEMPLATE_BLOCK_END: + if (c == '%') { + state = TEMPLATE_CLOSE_BRACKET; + break; + } + if (c == '-') { + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Duplicated whitespace " + "cleaner after statement."); + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Must end with '%%}'."); + break; + + case TEMPLATE_VARIABLE_START: + if (c == ' ') + break; + if (c >= 'A' && c <= 'Z') { + state = TEMPLATE_VARIABLE; + type = BLOGC_TEMPLATE_VARIABLE_STMT; + start = current; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid variable name. Must begin with uppercase letter."); + break; + + case TEMPLATE_VARIABLE: + if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') + break; + if (c == ' ') { + end = current; + state = TEMPLATE_VARIABLE_END; + break; + } + if (c == '}') { + end = current; + state = TEMPLATE_CLOSE_BRACKET; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid variable name. Must be uppercase letter, number " + "or '_'."); + break; + + case TEMPLATE_VARIABLE_END: + if (c == ' ') + break; + if (c == '}') { + state = TEMPLATE_CLOSE_BRACKET; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Must end with '}}'."); + break; + + case TEMPLATE_CLOSE_BRACKET: + if (c == '}') { + tmp_op = 0; + if (op_end > op_start) { + if (op_end - op_start == 1) { + if (0 == strncmp("<", src + op_start, 1)) + tmp_op = BLOGC_TEMPLATE_OP_LT; + else if (0 == strncmp(">", src + op_start, 1)) + tmp_op = BLOGC_TEMPLATE_OP_GT; + } + else if (op_end - op_start == 2) { + if (0 == strncmp("<=", src + op_start, 2)) + tmp_op = BLOGC_TEMPLATE_OP_LT | BLOGC_TEMPLATE_OP_EQ; + else if (0 == strncmp(">=", src + op_start, 2)) + tmp_op = BLOGC_TEMPLATE_OP_GT | BLOGC_TEMPLATE_OP_EQ; + else if (0 == strncmp("==", src + op_start, 2)) + tmp_op = BLOGC_TEMPLATE_OP_EQ; + else if (0 == strncmp("!=", src + op_start, 2)) + tmp_op = BLOGC_TEMPLATE_OP_NEQ; + } + if (tmp_op == 0) { + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, + src, src_len, op_start, + "Invalid 'if' operator. Must be '<', '>', " + "'<=', '>=', '==' or '!='."); + op_start = 0; + op_end = 0; + break; + } + op_start = 0; + op_end = 0; + } + stmt = sb_malloc(sizeof(blogc_template_stmt_t)); + stmt->type = type; + stmt->value = NULL; + stmt->op = tmp_op; + stmt->value2 = NULL; + if (end > start) + stmt->value = sb_strndup(src + start, end - start); + if (end2 > start2) { + stmt->value2 = sb_strndup(src + start2, end2 - start2); + start2 = 0; + end2 = 0; + } + if (type == BLOGC_TEMPLATE_BLOCK_STMT) + block_type = stmt->value; + stmts = sb_slist_append(stmts, stmt); + previous = stmt; + stmt = NULL; + state = TEMPLATE_START; + type = BLOGC_TEMPLATE_CONTENT_STMT; + start = current + 1; + break; + } + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, + src_len, current, + "Invalid statement syntax. Must end with '}'."); + break; + + } + + if (*err != NULL) + break; + + current++; + } + + if (*err == NULL) { + if (state == TEMPLATE_BLOCK_IF_STRING_OPERAND) + *err = blogc_error_parser(BLOGC_ERROR_TEMPLATE_PARSER, src, src_len, + start2, "Found an open double-quoted string."); + else if (if_count != 0) + *err = blogc_error_new_printf(BLOGC_ERROR_TEMPLATE_PARSER, + "%d open 'if', 'ifdef' and/or 'ifndef' statements were not closed!", + if_count); + else if (block_open) + *err = blogc_error_new(BLOGC_ERROR_TEMPLATE_PARSER, + "An open block was not closed!"); + else if (foreach_open) + *err = blogc_error_new(BLOGC_ERROR_TEMPLATE_PARSER, + "An open 'foreach' statement was not closed!"); + } + + if (*err != NULL) { + if (stmt != NULL) { + free(stmt->value); + free(stmt); + } + blogc_template_free_stmts(stmts); + return NULL; + } + + return stmts; +} + + +void +blogc_template_free_stmts(sb_slist_t *stmts) +{ + for (sb_slist_t *tmp = stmts; tmp != NULL; tmp = tmp->next) { + blogc_template_stmt_t *data = tmp->data; + if (data == NULL) + continue; + free(data->value); + free(data->value2); + free(data); + } + sb_slist_free(stmts); +} diff --git a/src/blogc/template-parser.h b/src/blogc/template-parser.h new file mode 100644 index 0000000..41a2b3a --- /dev/null +++ b/src/blogc/template-parser.h @@ -0,0 +1,53 @@ +/* + * blogc: A blog compiler. + * Copyright (C) 2015-2016 Rafael G. Martins <rafael@rafaelmartins.eng.br> + * + * This program can be distributed under the terms of the BSD License. + * See the file LICENSE. + */ + +#ifndef _TEMPLATE_PARSER_H +#define _TEMPLATE_PARSER_H + +#include <stddef.h> +#include "error.h" +#include "../common/utils.h" + +/* + * note: whitespace cleaners are NOT added to ast. we fix strings right during + * template parsing. renderer does not need to care about it, for the sake of + * simplicity. + */ +typedef enum { + BLOGC_TEMPLATE_IFDEF_STMT = 1, + BLOGC_TEMPLATE_IFNDEF_STMT, + BLOGC_TEMPLATE_IF_STMT, + BLOGC_TEMPLATE_ELSE_STMT, + BLOGC_TEMPLATE_ENDIF_STMT, + BLOGC_TEMPLATE_FOREACH_STMT, + BLOGC_TEMPLATE_ENDFOREACH_STMT, + BLOGC_TEMPLATE_BLOCK_STMT, + BLOGC_TEMPLATE_ENDBLOCK_STMT, + BLOGC_TEMPLATE_VARIABLE_STMT, + BLOGC_TEMPLATE_CONTENT_STMT, +} blogc_template_stmt_type_t; + +typedef enum { + BLOGC_TEMPLATE_OP_NEQ = 1 << 0, + BLOGC_TEMPLATE_OP_EQ = 1 << 1, + BLOGC_TEMPLATE_OP_LT = 1 << 2, + BLOGC_TEMPLATE_OP_GT = 1 << 3, +} blogc_template_stmt_operator_t; + +typedef struct { + blogc_template_stmt_type_t type; + char *value; + char *value2; + blogc_template_stmt_operator_t op; +} blogc_template_stmt_t; + +sb_slist_t* blogc_template_parse(const char *src, size_t src_len, + blogc_error_t **err); +void blogc_template_free_stmts(sb_slist_t *stmts); + +#endif /* _TEMPLATE_PARSER_H */ |