aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/content-parser.c220
-rw-r--r--src/content-parser.h1
-rw-r--r--tests/check_content_parser.c20
3 files changed, 236 insertions, 5 deletions
diff --git a/src/content-parser.c b/src/content-parser.c
index 8249322..29a20bb 100644
--- a/src/content-parser.c
+++ b/src/content-parser.c
@@ -55,6 +55,217 @@ typedef enum {
char*
+blogc_content_parse_inline(const char *src)
+{
+ // this function is always called by blogc_content_parse, then its safe to
+ // assume that src is always nul-terminated.
+ size_t src_len = strlen(src);
+
+ size_t current = 0;
+
+ b_string_t *rv = b_string_new();
+
+ bool open_em_ast = false;
+ bool open_strong_ast = false;
+ bool open_em_und = false;
+ bool open_strong_und = false;
+ bool open_code = false;
+ bool open_code_double = false;
+ unsigned int link_state = 0;
+ unsigned int image_state = 0;
+ size_t link_start = 0;
+ size_t image_start = 0;
+
+ char *tmp = NULL;
+ char *title = NULL;
+
+ while (current < src_len) {
+ char c = src[current];
+ bool is_last = current == src_len - 1;
+
+ switch (c) {
+ case '*':
+ case '_':
+ if (!is_last && src[current + 1] == c) {
+ current++;
+ if ((c == '*' && open_strong_ast) ||
+ (c == '_' && open_strong_und))
+ {
+ b_string_append(rv, "</strong>");
+ if (c == '*')
+ open_strong_ast = false;
+ else
+ open_strong_und = false;
+ }
+ else {
+ b_string_append(rv, "<strong>");
+ if (c == '*')
+ open_strong_ast = true;
+ else
+ open_strong_und = true;
+ }
+ }
+ else {
+ if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) {
+ b_string_append(rv, "</em>");
+ if (c == '*')
+ open_em_ast = false;
+ else
+ open_em_und = false;
+ }
+ else {
+ b_string_append(rv, "<em>");
+ if (c == '*')
+ open_em_ast = true;
+ else
+ open_em_und = true;
+ }
+ }
+ break;
+
+ case '`':
+ if (!is_last && src[current + 1] == c) {
+ current++;
+ if (open_code_double)
+ b_string_append(rv, "</code>");
+ else
+ b_string_append(rv, "<code>");
+ open_code_double = !open_code_double;
+ }
+ else {
+ if (open_code)
+ b_string_append(rv, "</code>");
+ else
+ b_string_append(rv, "<code>");
+ open_code = !open_code;
+ }
+ break;
+
+ case '[':
+ if (link_state == 0 && image_state == 0) {
+ tmp = strchr(src + current, ']');
+ if (tmp != NULL) {
+ if (strlen(tmp) > 1 && tmp[1] == '(') {
+ tmp = strchr(tmp, ')');
+ if (tmp != NULL) { // this is a link
+ link_start = current + 1; // its safe
+ link_state = 1;
+ break;
+ }
+ }
+ }
+ b_string_append_c(rv, c);
+ }
+ break;
+
+ case '!':
+ if (link_state == 0 && image_state == 0) {
+ if (!is_last && src[current + 1] == '[') {
+ tmp = strchr(src + current + 1, ']');
+ if (tmp != NULL) {
+ if (strlen(tmp) > 1 && tmp[1] == '(') {
+ tmp = strchr(tmp, ')');
+ if (tmp != NULL) { // this is an image
+ image_start = current + 2; // its safe
+ image_state = 1;
+ break;
+ }
+ }
+ }
+ }
+ b_string_append_c(rv, c);
+ }
+ break;
+
+ case ']':
+ if (link_state == 1) {
+ link_state = 2;
+ title = b_strndup(src + link_start, current - link_start);
+ break;
+ }
+ if (image_state == 1) {
+ image_state = 2;
+ title = b_strndup(src + image_start, current - image_start);
+ break;
+ }
+ b_string_append_c(rv, c);
+ break;
+
+ case '(':
+ if (link_state == 2) {
+ link_state = 3;
+ link_start = current + 1; // its safe
+ break;
+ }
+ if (image_state == 2) {
+ image_state = 3;
+ image_start = current + 1; // its safe
+ break;
+ }
+ b_string_append_c(rv, c);
+ break;
+
+ case ')':
+ if (link_state == 3) {
+ link_state = 0;
+ tmp = b_strndup(src + link_start, current - link_start);
+ b_string_append_printf(rv, "<a href=\"%s\">%s</a>", tmp, title);
+ free(tmp);
+ tmp = NULL;
+ free(title);
+ title = NULL;
+ break;
+ }
+ if (image_state == 3) {
+ image_state = 0;
+ tmp = b_strndup(src + image_start, current - image_start);
+ b_string_append_printf(rv, "<img src=\"%s\" alt=\"%s\">", tmp, title);
+ free(tmp);
+ tmp = NULL;
+ free(title);
+ title = NULL;
+ break;
+ }
+ b_string_append_c(rv, c);
+ break;
+
+ case '&':
+ b_string_append(rv, "&amp;");
+ break;
+
+ case '<':
+ b_string_append(rv, "&lt;");
+ break;
+
+ case '>':
+ b_string_append(rv, "&gt;");
+ break;
+
+ case '"':
+ b_string_append(rv, "&quot;");
+ break;
+
+ case '\'':
+ b_string_append(rv, "&#x27;");
+ break;
+
+ case '/':
+ b_string_append(rv, "&#x2F;");
+ break;
+
+ default:
+ if (link_state == 0 && image_state == 0)
+ b_string_append_c(rv, c);
+ }
+
+ current++;
+ }
+
+ return b_string_free(rv, false);
+}
+
+
+char*
blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
{
if (err == NULL || *err != NULL)
@@ -69,6 +280,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
size_t prefix_len = 0;
char *tmp = NULL;
char *tmp2 = NULL;
+ char *parsed = NULL;
char **tmpv = NULL;
char d;
@@ -150,8 +362,11 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
if (c == '\n' || c == '\r' || is_last) {
end = is_last && c != '\n' && c != '\r' ? src_len : current;
tmp = b_strndup(src + start, end - start);
+ parsed = blogc_content_parse_inline(tmp);
b_string_append_printf(rv, "<h%d>%s</h%d>\n", header_level,
- tmp, header_level);
+ parsed, header_level);
+ free(parsed);
+ parsed = NULL;
free(tmp);
tmp = NULL;
state = CONTENT_START_LINE;
@@ -316,8 +531,7 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)
d = '\0';
break;
}
- *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len,
- current, "Malformed horizontal rule, must use only '%c'", d);
+ state = CONTENT_PARAGRAPH;
break;
case CONTENT_UNORDERED_LIST_START:
diff --git a/src/content-parser.h b/src/content-parser.h
index 0a55fd9..db65332 100644
--- a/src/content-parser.h
+++ b/src/content-parser.h
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include "error.h"
+char* blogc_content_parse_inline(const char *src);
char* blogc_content_parse(const char *src, size_t src_len,
blogc_error_t **err);
diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c
index b3e7092..a613052 100644
--- a/tests/check_content_parser.c
+++ b/tests/check_content_parser.c
@@ -56,7 +56,9 @@ test_content_parse(void **state)
"</style>\n"
"\n"
"guda\n"
- "yay";
+ "yay\n"
+ "\n"
+ "**bola**\n";
blogc_error_t *err = NULL;
char *html = blogc_content_parse(a, strlen(a), &err);
assert_null(err);
@@ -91,7 +93,8 @@ test_content_parse(void **state)
" chunda\n"
"</style>\n"
"<p>guda\n"
- "yay</p>\n");
+ "yay</p>\n"
+ "<p>**bola**</p>\n");
free(html);
}
@@ -319,6 +322,18 @@ test_content_parse_invalid_code(void **state)
}
+void
+test_content_parse_inline(void **state)
+{
+ char *html = blogc_content_parse_inline("**bola***asd* ``chunda``");
+ assert_string_equal(html, "<strong>bola</strong><em>asd</em> <code>chunda</code>");
+ free(html);
+ html = blogc_content_parse_inline("*bola*");
+ assert_string_equal(html, "<em>bola</em>");
+ free(html);
+}
+
+
int
main(void)
{
@@ -332,6 +347,7 @@ main(void)
unit_test(test_content_parse_invalid_header_empty),
unit_test(test_content_parse_invalid_blockquote),
unit_test(test_content_parse_invalid_code),
+ unit_test(test_content_parse_inline),
};
return run_tests(tests);
}