diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/file.c | 14 | ||||
-rw-r--r-- | src/utf8.c | 13 | ||||
-rw-r--r-- | src/utf8.h | 1 |
3 files changed, 27 insertions, 1 deletions
@@ -41,11 +41,23 @@ blogc_file_get_contents(const char *path, size_t *len, blogc_error_t **err) sb_string_t *str = sb_string_new(); char buffer[BLOGC_FILE_CHUNK_SIZE]; + char *tmp; while (!feof(fp)) { size_t read_len = fread(buffer, sizeof(char), BLOGC_FILE_CHUNK_SIZE, fp); + + tmp = buffer; + + if (str->len == 0 && read_len > 0) { + // skipping BOM before validation, for performance. should be safe + // enough + size_t skip = blogc_utf8_skip_bom((uint8_t*) buffer, read_len); + read_len -= skip; + tmp += skip; + } + *len += read_len; - sb_string_append_len(str, buffer, read_len); + sb_string_append_len(str, tmp, read_len); } fclose(fp); @@ -86,3 +86,16 @@ blogc_utf8_validate_str(sb_string_t *str) { return blogc_utf8_validate((uint8_t*) str->str, str->len); } + + +size_t +blogc_utf8_skip_bom(const uint8_t *str, size_t len) +{ + if (len < 3) + return 0; + + if (str[0] == 0xef && str[1] == 0xbb && str[2] == 0xbf) + return 3; + + return 0; +} @@ -15,5 +15,6 @@ bool blogc_utf8_validate(const uint8_t *str, size_t len); bool blogc_utf8_validate_str(sb_string_t *str); +size_t blogc_utf8_skip_bom(const uint8_t *str, size_t len); #endif /* _UTF_8_H */ |