aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRafael G. Martins <rafael@rafaelmartins.eng.br>2016-07-03 21:52:51 +0200
committerRafael G. Martins <rafael@rafaelmartins.eng.br>2016-07-04 01:07:34 +0200
commit1099a4d991942655c0291a74b488322d5da533bd (patch)
tree0a91f413d3675122a15f49a87fd9413baf46c655 /src
parent4aac65c4b8f2d8415ca8d9d8449e0158e0ff1e9c (diff)
downloadblogc-1099a4d991942655c0291a74b488322d5da533bd.tar.gz
blogc-1099a4d991942655c0291a74b488322d5da533bd.tar.bz2
blogc-1099a4d991942655c0291a74b488322d5da533bd.zip
utf8: skip BOM, if found
Diffstat (limited to 'src')
-rw-r--r--src/file.c14
-rw-r--r--src/utf8.c13
-rw-r--r--src/utf8.h1
3 files changed, 27 insertions, 1 deletions
diff --git a/src/file.c b/src/file.c
index 7171f31..6da94a2 100644
--- a/src/file.c
+++ b/src/file.c
@@ -41,11 +41,23 @@ blogc_file_get_contents(const char *path, size_t *len, blogc_error_t **err)
sb_string_t *str = sb_string_new();
char buffer[BLOGC_FILE_CHUNK_SIZE];
+ char *tmp;
while (!feof(fp)) {
size_t read_len = fread(buffer, sizeof(char), BLOGC_FILE_CHUNK_SIZE, fp);
+
+ tmp = buffer;
+
+ if (str->len == 0 && read_len > 0) {
+ // skipping BOM before validation, for performance. should be safe
+ // enough
+ size_t skip = blogc_utf8_skip_bom((uint8_t*) buffer, read_len);
+ read_len -= skip;
+ tmp += skip;
+ }
+
*len += read_len;
- sb_string_append_len(str, buffer, read_len);
+ sb_string_append_len(str, tmp, read_len);
}
fclose(fp);
diff --git a/src/utf8.c b/src/utf8.c
index deea46d..0c04d60 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -86,3 +86,16 @@ blogc_utf8_validate_str(sb_string_t *str)
{
return blogc_utf8_validate((uint8_t*) str->str, str->len);
}
+
+
+size_t
+blogc_utf8_skip_bom(const uint8_t *str, size_t len)
+{
+ if (len < 3)
+ return 0;
+
+ if (str[0] == 0xef && str[1] == 0xbb && str[2] == 0xbf)
+ return 3;
+
+ return 0;
+}
diff --git a/src/utf8.h b/src/utf8.h
index 837d02f..582ae1c 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -15,5 +15,6 @@
bool blogc_utf8_validate(const uint8_t *str, size_t len);
bool blogc_utf8_validate_str(sb_string_t *str);
+size_t blogc_utf8_skip_bom(const uint8_t *str, size_t len);
#endif /* _UTF_8_H */