diff options
| -rw-r--r-- | src/content-parser.c | 197 | ||||
| -rw-r--r-- | tests/check_content_parser.c | 10 | 
2 files changed, 112 insertions, 95 deletions
| diff --git a/src/content-parser.c b/src/content-parser.c index 23c8647..b8df868 100644 --- a/src/content-parser.c +++ b/src/content-parser.c @@ -55,11 +55,12 @@ typedef enum {  char*  blogc_content_parse_inline(const char *src)  { -    // this function is always called by blogc_content_parse, then its safe to -    // assume that src is always nul-terminated. +    // this function is always called by blogc_content_parse or by itself, +    // then its safe to assume that src is always nul-terminated.      size_t src_len = strlen(src);      size_t current = 0; +    size_t start = 0;      b_string_t *rv = b_string_new(); @@ -69,20 +70,40 @@ blogc_content_parse_inline(const char *src)      bool open_strong_und = false;      bool open_code = false;      bool open_code_double = false; -    unsigned int link_state = 0; -    unsigned int image_state = 0; -    size_t link_start = 0; -    size_t image_start = 0; + +    unsigned int state = 0; +    bool is_image = false;      char *tmp = NULL; -    char *title = NULL; -    char *alt = NULL; +    char *tmp2 = NULL; + +    unsigned int open_bracket = 0; + +    bool escape = false;      while (current < src_len) {          char c = src[current];          bool is_last = current == src_len - 1; +        if (escape) { +            if (state == 0) +                b_string_append_c(rv, c); +            current++; +            escape = false; +            continue; +        } +          switch (c) { + +            case '\\': +                if (open_code || open_code_double) { +                    b_string_append_c(rv, c); +                    break; +                } +                if (!escape) +                    escape = true; +                break; +              case '*':              case '_':                  if (open_code || open_code_double) { @@ -94,14 +115,16 @@ blogc_content_parse_inline(const char *src)                      if ((c == '*' && open_strong_ast) ||                          (c == '_' && open_strong_und))                      { -                        b_string_append(rv, "</strong>"); +                        if (state == 0) +                            b_string_append(rv, "</strong>");                          if (c == '*')                              open_strong_ast = false;                          else                              open_strong_und = false;                      }                      else { -                        b_string_append(rv, "<strong>"); +                        if (state == 0) +                            b_string_append(rv, "<strong>");                          if (c == '*')                              open_strong_ast = true;                          else @@ -110,14 +133,16 @@ blogc_content_parse_inline(const char *src)                  }                  else {                      if ((c == '*' && open_em_ast) || (c == '_' && open_em_und)) { -                        b_string_append(rv, "</em>"); +                        if (state == 0) +                            b_string_append(rv, "</em>");                          if (c == '*')                              open_em_ast = false;                          else                              open_em_und = false;                      }                      else { -                        b_string_append(rv, "<em>"); +                        if (state == 0) +                            b_string_append(rv, "<em>");                          if (c == '*')                              open_em_ast = true;                          else @@ -129,62 +154,48 @@ blogc_content_parse_inline(const char *src)              case '`':                  if (!is_last && src[current + 1] == c) {                      current++; -                    if (open_code_double) -                        b_string_append(rv, "</code>"); -                    else -                        b_string_append(rv, "<code>"); +                    if (state == 0) { +                        if (open_code_double) +                            b_string_append(rv, "</code>"); +                        else +                            b_string_append(rv, "<code>"); +                    }                      open_code_double = !open_code_double;                  }                  else { -                    if (open_code) -                        b_string_append(rv, "</code>"); -                    else -                        b_string_append(rv, "<code>"); +                    if (state == 0) { +                        if (open_code) +                            b_string_append(rv, "</code>"); +                        else +                            b_string_append(rv, "<code>"); +                    }                      open_code = !open_code;                  }                  break; -            case '[': +            case '!':                  if (open_code || open_code_double) {                      b_string_append_c(rv, c);                      break;                  } -                if (link_state == 0 && image_state == 0) { -                    tmp = strchr(src + current, ']'); -                    if (tmp != NULL) { -                        if (strlen(tmp) > 1 && tmp[1] == '(') { -                            tmp = strchr(tmp, ')'); -                            if (tmp != NULL) {  // this is a link -                                link_start = current + 1;  // its safe -                                link_state = 1; -                                break; -                            } -                        } -                    } -                    b_string_append_c(rv, c); -                } +                if (state == 0) +                    is_image = true;                  break; -            case '!': +            case '[':                  if (open_code || open_code_double) {                      b_string_append_c(rv, c);                      break;                  } -                if (link_state == 0 && image_state == 0) { -                    if (!is_last && src[current + 1] == '[') { -                        tmp = strchr(src + current + 1, ']'); -                        if (tmp != NULL) { -                            if (strlen(tmp) > 1 && tmp[1] == '(') { -                                tmp = strchr(tmp, ')'); -                                if (tmp != NULL) {  // this is an image -                                    image_start = current + 2;  // its safe -                                    image_state = 1; -                                    break; -                                } -                            } -                        } -                    } -                    b_string_append_c(rv, c); +                if (state == 0) { +                    state = 1; +                    start = current + 1; +                    open_bracket = 0; +                    break; +                } +                if (state == 1) { +                    open_bracket++; +                    break;                  }                  break; @@ -193,17 +204,18 @@ blogc_content_parse_inline(const char *src)                      b_string_append_c(rv, c);                      break;                  } -                if (link_state == 1) { -                    link_state = 2; -                    title = b_strndup(src + link_start, current - link_start); -                    break; -                } -                if (image_state == 1) { -                    image_state = 2; -                    alt = b_strndup(src + image_start, current - image_start); +                if (state == 1) { +                    if (open_bracket-- == 0) { +                        state = 2; +                        tmp = b_strndup(src + start, current - start); +                        tmp2 = blogc_content_parse_inline(tmp); +                        free(tmp); +                        tmp = NULL; +                    }                      break;                  } -                b_string_append_c(rv, c); +                if (state == 0) +                    b_string_append_c(rv, c);                  break;              case '(': @@ -211,17 +223,13 @@ blogc_content_parse_inline(const char *src)                      b_string_append_c(rv, c);                      break;                  } -                if (link_state == 2) { -                    link_state = 3; -                    link_start = current + 1;  // its safe +                if (state == 2) { +                    state = 3; +                    start = current + 1;                      break;                  } -                if (image_state == 2) { -                    image_state = 3; -                    image_start = current + 1;  // its safe -                    break; -                } -                b_string_append_c(rv, c); +                if (state == 0) +                    b_string_append_c(rv, c);                  break;              case ')': @@ -229,55 +237,58 @@ blogc_content_parse_inline(const char *src)                      b_string_append_c(rv, c);                      break;                  } -                if (link_state == 3) { -                    link_state = 0; -                    tmp = b_strndup(src + link_start, current - link_start); -                    b_string_append_printf(rv, "<a href=\"%s\">%s</a>", tmp, title); -                    free(tmp); -                    tmp = NULL; -                    free(title); -                    title = NULL; -                    break; -                } -                if (image_state == 3) { -                    image_state = 0; -                    tmp = b_strndup(src + image_start, current - image_start); -                    b_string_append_printf(rv, "<img src=\"%s\" alt=\"%s\">", tmp, alt); +                if (state == 3) { +                    state = 0; +                    tmp = b_strndup(src + start, current - start); +                    if (is_image) { +                        b_string_append_printf(rv, "<img src=\"%s\" alt=\"%s\">", tmp, tmp2); +                    } +                    else { +                        b_string_append_printf(rv, "<a href=\"%s\">%s</a>", tmp, tmp2); +                    }                      free(tmp);                      tmp = NULL; -                    free(alt); -                    alt = NULL; +                    free(tmp2); +                    tmp2 = NULL; +                    is_image = false;                      break;                  } -                b_string_append_c(rv, c); +                if (state == 0) +                    b_string_append_c(rv, c);                  break;              case '&': -                b_string_append(rv, "&"); +                if (state == 0) +                    b_string_append(rv, "&");                  break;              case '<': -                b_string_append(rv, "<"); +                if (state == 0) +                    b_string_append(rv, "<");                  break;              case '>': -                b_string_append(rv, ">"); +                if (state == 0) +                    b_string_append(rv, ">");                  break;              case '"': -                b_string_append(rv, """); +                if (state == 0) +                    b_string_append(rv, """);                  break;              case '\'': -                b_string_append(rv, "'"); +                if (state == 0) +                    b_string_append(rv, "'");                  break;              case '/': -                b_string_append(rv, "/"); +                if (state == 0) +                    b_string_append(rv, "/");                  break;              default: -                if (link_state == 0 && image_state == 0) +                if (state == 0)                      b_string_append_c(rv, c);          } diff --git a/tests/check_content_parser.c b/tests/check_content_parser.c index ab1bf49..6fec86c 100644 --- a/tests/check_content_parser.c +++ b/tests/check_content_parser.c @@ -325,8 +325,14 @@ test_content_parse_invalid_code(void **state)  void  test_content_parse_inline(void **state)  { -    char *html = blogc_content_parse_inline("**bola***asd* ``chunda``"); -    assert_string_equal(html, "<strong>bola</strong><em>asd</em> <code>chunda</code>"); +    char *html = blogc_content_parse_inline( +        "**bola***asd* [ **lol** " +        "\\[asd\\]\\(qwe\\)](http://google.com) ``chunda``"); +    assert_string_equal(html, +        "<strong>bola</strong><em>asd</em> " +        "<a href=\"http://google.com\"><img src=\"http://google.com/lol.png\" " +        "alt=\"lol\"> <strong>lol</strong> [asd](qwe)</a> " +        "<code>chunda</code>");      free(html);      html = blogc_content_parse_inline("*bola*");      assert_string_equal(html, "<em>bola</em>"); | 
