diff options
| -rw-r--r-- | src/content-parser.c | 189 | ||||
| -rw-r--r-- | src/error.c | 3 | ||||
| -rw-r--r-- | src/error.h | 1 | ||||
| -rw-r--r-- | tests/check_content_parser.c | 16 | 
4 files changed, 201 insertions, 8 deletions
| diff --git a/src/content-parser.c b/src/content-parser.c index e083243..75c0f34 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -22,7 +22,6 @@  // expected. feel free to improve the parser and and new features. -// TODO: block elements: list, horizontal rule  // TODO: inline elements: links, emphasis, code, images, line breaks  // TODO: automatic scaping of html entities  // TODO: automatic links @@ -42,6 +41,14 @@ typedef enum {      CONTENT_CODE,      CONTENT_CODE_START,      CONTENT_CODE_END, +    CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE, +    CONTENT_HORIZONTAL_RULE, +    CONTENT_UNORDERED_LIST_START, +    CONTENT_UNORDERED_LIST_END, +    CONTENT_ORDERED_LIST, +    CONTENT_ORDERED_LIST_SPACE, +    CONTENT_ORDERED_LIST_START, +    CONTENT_ORDERED_LIST_END,      CONTENT_PARAGRAPH,      CONTENT_PARAGRAPH_END,  } blogc_content_parser_state_t; @@ -59,7 +66,12 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)      unsigned int header_level = 0;      char *prefix = NULL; +    size_t prefix_len = 0;      char *tmp = NULL; +    char *tmp2 = NULL; +    char **tmpv = NULL; + +    char d;      b_slist_t *lines = NULL; @@ -82,6 +94,17 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                      state = CONTENT_HEADER;                      break;                  } +                if (c == '*' || c == '+' || c == '-') { +                    state = CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE; +                    start = current; +                    d = c; +                    break; +                } +                if (c >= '0' && c <= '9') { +                    state = CONTENT_ORDERED_LIST; +                    start = current; +                    break; +                }                  if (c == ' ' || c == '\t') {                      state = CONTENT_CODE;                      start = current; @@ -110,7 +133,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                      state = CONTENT_HEADER_TITLE_START;                      break;                  } -                // error; +                *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                    current, "Malformed header, no space or tab after '#'");                  break;              case CONTENT_HEADER_TITLE_START: @@ -121,7 +145,8 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                      state = CONTENT_HEADER_TITLE;                      break;                  } -                // error; +                *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                    current, "Empty header");                  break;              case CONTENT_HEADER_TITLE: @@ -138,7 +163,6 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                  }                  break; -              case CONTENT_HTML:                  if (c == '\n' || c == '\r' || is_last) {                      state = CONTENT_HTML_END; @@ -172,13 +196,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                      tmp = b_strndup(src + start, end - start);                      if (b_str_starts_with(tmp, prefix)) {                          lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); +                        state = CONTENT_BLOCKQUOTE_END;                      }                      else { -                        // error +                        *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                            current, "Malformed blockquote, must use same prefix " +                            "as previous line(s): %s", prefix);                      }                      free(tmp);                      tmp = NULL; -                    state = CONTENT_BLOCKQUOTE_END;                  }                  if (!is_last)                      break; @@ -226,13 +252,15 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                      tmp = b_strndup(src + start, end - start);                      if (b_str_starts_with(tmp, prefix)) {                          lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); +                        state = CONTENT_CODE_END;                      }                      else { -                        // error +                        *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                            current, "Malformed code block, must use same prefix " +                            "as previous line(s): '%s'", prefix);                      }                      free(tmp);                      tmp = NULL; -                    state = CONTENT_CODE_END;                  }                  if (!is_last)                      break; @@ -260,6 +288,151 @@ blogc_content_parse(const char *src, size_t src_len, blogc_error_t **err)                  }                  break; +            case CONTENT_UNORDERED_LIST_OR_HORIZONTAL_RULE: +                if (c == d) { +                    state = CONTENT_HORIZONTAL_RULE; +                    break; +                } +                if (c == ' ' || c == '\t') +                    break; +                prefix = b_strndup(src + start, current - start); +                state = CONTENT_UNORDERED_LIST_START; +                break; + +            case CONTENT_HORIZONTAL_RULE: +                if (c == d) { +                    break; +                } +                if (c == '\n' || c == '\r' || is_last) { +                    b_string_append(rv, "<hr />\n"); +                    state = CONTENT_START_LINE; +                    start = current; +                    d = '\0'; +                    break; +                } +                *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                    current, "Malformed horizontal rule, must use only '%c'", d); +                break; + +            case CONTENT_UNORDERED_LIST_START: +                if (c == '\n' || c == '\r' || is_last) { +                    end = is_last ? src_len : current; +                    tmp = b_strndup(src + start, end - start); +                    if (b_str_starts_with(tmp, prefix)) { +                        lines = b_slist_append(lines, b_strdup(tmp + strlen(prefix))); +                    } +                    else { +                        *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                            current, "Malformed unordered list, must use same prefix " +                            "as previous line(s): %s", prefix); +                    } +                    free(tmp); +                    tmp = NULL; +                    state = CONTENT_UNORDERED_LIST_END; +                } +                if (!is_last) +                    break; + +            case CONTENT_UNORDERED_LIST_END: +                if (c == '\n' || c == '\r' || is_last) { +                    b_string_append(rv, "<ul>\n"); +                    for (b_slist_t *l = lines; l != NULL; l = l->next) +                        b_string_append_printf(rv, "<li>%s</li>\n", l->data); +                    b_string_append(rv, "</ul>\n"); +                    b_slist_free_full(lines, free); +                    lines = NULL; +                    free(prefix); +                    prefix = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else { +                    start = current; +                    state = CONTENT_UNORDERED_LIST_START; +                } +                break; + +            case CONTENT_ORDERED_LIST: +                if (c >= '0' && c <= '9') +                    break; +                if (c == '.') { +                    state = CONTENT_ORDERED_LIST_SPACE; +                    break; +                } +                state = CONTENT_PARAGRAPH; +                break; + +            case CONTENT_ORDERED_LIST_SPACE: +                if (c == ' ' || c == '\t') +                    break; +                prefix_len = current - start; +                state = CONTENT_ORDERED_LIST_START; + +            case CONTENT_ORDERED_LIST_START: +                if (c == '\n' || c == '\r' || is_last) { +                    end = is_last ? src_len : current; +                    tmp = b_strndup(src + start, end - start); +                    if (strlen(tmp) >= prefix_len) { +                        tmp2 = b_strndup(tmp, prefix_len); +                        tmpv = b_str_split(tmp2, '.', 2); +                        free(tmp2); +                        tmp2 = NULL; +                        if (b_strv_length(tmpv) != 2) { +                            *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                                current, "Malformed ordered list, prefix must be a " +                                "number, followed by a '.', followed by the content. " +                                "Content must be aligned with content from previous line(s)"); +                            goto err_li; +                        } +                        for (unsigned int i = 0; tmpv[0][i] != '\0'; i++) { +                            if (!(tmpv[0][i] >= '0' && tmpv[0][i] <= '9')) { +                                *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                                    current, "Malformed ordered list, prefix must be a " +                                    "number, followed by a '.', followed by the content. " +                                    "Content must be aligned with content from previous line(s)"); +                                goto err_li; +                            } +                        } +                        for (unsigned int i = 0; tmpv[1][i] != '\0'; i++) { +                            if (!(tmpv[1][i] == ' ' || tmpv[1][i] == '\t')) { +                                *err = blogc_error_parser(BLOGC_ERROR_CONTENT_PARSER, src, src_len, +                                    current, "Malformed ordered list, prefix must be a " +                                    "number, followed by a '.', followed by the content. " +                                    "Content must be aligned with content from previous line(s)"); +                                goto err_li; +                            } +                        } +                        lines = b_slist_append(lines, b_strdup(tmp + prefix_len)); +                        state = CONTENT_ORDERED_LIST_END; +err_li: +                        b_strv_free(tmpv); +                        tmpv = NULL; +                    } +                    free(tmp); +                    tmp = NULL; +                } +                if (!is_last) +                    break; + +            case CONTENT_ORDERED_LIST_END: +                if (c == '\n' || c == '\r' || is_last) { +                    b_string_append(rv, "<ol>\n"); +                    for (b_slist_t *l = lines; l != NULL; l = l->next) +                        b_string_append_printf(rv, "<li>%s</li>\n", l->data); +                    b_string_append(rv, "</ol>\n"); +                    b_slist_free_full(lines, free); +                    lines = NULL; +                    free(prefix); +                    prefix = NULL; +                    state = CONTENT_START_LINE; +                    start = current; +                } +                else { +                    start = current; +                    state = CONTENT_ORDERED_LIST_START; +                } +                break; +              case CONTENT_PARAGRAPH:                  if (c == '\n' || c == '\r' || is_last) {                      state = CONTENT_PARAGRAPH_END; diff --git a/src/error.c b/src/error.c index 3850304..68c7b00 100644 --- a/src/error.c +++ b/src/error.c @@ -96,6 +96,9 @@ blogc_error_print(blogc_error_t *err)          case BLOGC_ERROR_LOADER:              tmp = b_strdup("Loader error");              break; +        case BLOGC_ERROR_CONTENT_PARSER: +            tmp = b_strdup("Content parser error"); +            break;          default:              tmp = b_strdup("Unknown error");      } diff --git a/src/error.h b/src/error.h index 98aeeb9..b4960e2 100644 --- a/src/error.h +++ b/src/error.h @@ -17,6 +17,7 @@ typedef enum {      BLOGC_ERROR_SOURCE_PARSER = 1,      BLOGC_ERROR_TEMPLATE_PARSER,      BLOGC_ERROR_LOADER, +    BLOGC_ERROR_CONTENT_PARSER,  } blogc_error_type_t;  typedef struct { diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index 86ba8d8..df14df3 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -44,6 +44,13 @@ test_content_parse(void **state)          "     asd\n"          "    qwewer\n"          "\n" +        "+++\n" +        "1. chunda\n" +        "3. fuuuu\n" +        "\n" +        "+  chunda2\n" +        "+  fuuuu2\n" +        "\n"          "<style>\n"          "   chunda\n"          "</style>\n" @@ -71,6 +78,15 @@ test_content_parse(void **state)          "<pre><code>bola\n"          " asd\n"          "qwewer</code></pre>\n" +        "<hr />\n" +        "<ol>\n" +        "<li>chunda</li>\n" +        "<li>fuuuu</li>\n" +        "</ol>\n" +        "<ul>\n" +        "<li>chunda2</li>\n" +        "<li>fuuuu2</li>\n" +        "</ul>\n"          "<style>\n"          "   chunda\n"          "</style>\n" | 
