diff options
| author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2016-07-03 21:52:51 +0200 | 
|---|---|---|
| committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2016-07-04 01:07:34 +0200 | 
| commit | 1099a4d991942655c0291a74b488322d5da533bd (patch) | |
| tree | 0a91f413d3675122a15f49a87fd9413baf46c655 /src | |
| parent | 4aac65c4b8f2d8415ca8d9d8449e0158e0ff1e9c (diff) | |
| download | blogc-1099a4d991942655c0291a74b488322d5da533bd.tar.gz blogc-1099a4d991942655c0291a74b488322d5da533bd.tar.bz2 blogc-1099a4d991942655c0291a74b488322d5da533bd.zip | |
utf8: skip BOM, if found
Diffstat (limited to 'src')
| -rw-r--r-- | src/file.c | 14 | ||||
| -rw-r--r-- | src/utf8.c | 13 | ||||
| -rw-r--r-- | src/utf8.h | 1 | 
3 files changed, 27 insertions, 1 deletions
| @@ -41,11 +41,23 @@ blogc_file_get_contents(const char *path, size_t *len, blogc_error_t **err)      sb_string_t *str = sb_string_new();      char buffer[BLOGC_FILE_CHUNK_SIZE]; +    char *tmp;      while (!feof(fp)) {          size_t read_len = fread(buffer, sizeof(char), BLOGC_FILE_CHUNK_SIZE, fp); + +        tmp = buffer; + +        if (str->len == 0 && read_len > 0) { +            // skipping BOM before validation, for performance. should be safe +            // enough +            size_t skip = blogc_utf8_skip_bom((uint8_t*) buffer, read_len); +            read_len -= skip; +            tmp += skip; +        } +          *len += read_len; -        sb_string_append_len(str, buffer, read_len); +        sb_string_append_len(str, tmp, read_len);      }      fclose(fp); @@ -86,3 +86,16 @@ blogc_utf8_validate_str(sb_string_t *str)  {      return blogc_utf8_validate((uint8_t*) str->str, str->len);  } + + +size_t +blogc_utf8_skip_bom(const uint8_t *str, size_t len) +{ +    if (len < 3) +        return 0; + +    if (str[0] == 0xef && str[1] == 0xbb && str[2] == 0xbf) +        return 3; + +    return 0; +} @@ -15,5 +15,6 @@  bool blogc_utf8_validate(const uint8_t *str, size_t len);  bool blogc_utf8_validate_str(sb_string_t *str); +size_t blogc_utf8_skip_bom(const uint8_t *str, size_t len);  #endif /* _UTF_8_H */ | 
