diff options
| author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2020-11-05 19:13:47 +0100 | 
|---|---|---|
| committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2020-11-05 19:13:49 +0100 | 
| commit | de14affe2e316f00663759100d658731fb8fc0ca (patch) | |
| tree | b31937440eedb113f5e936a23c941c7831ee9c23 /src/common | |
| parent | 39986b29f34795d346b8feb10ce4fe9caac8878e (diff) | |
| download | blogc-de14affe2e316f00663759100d658731fb8fc0ca.tar.gz blogc-de14affe2e316f00663759100d658731fb8fc0ca.tar.bz2 blogc-de14affe2e316f00663759100d658731fb8fc0ca.zip  | |
common: utf: simplified utf-8 validation
we don't need to evaluate codepoints, just to check if the byte
sequences are valid.
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/utf8.c | 16 | 
1 files changed, 1 insertions, 15 deletions
diff --git a/src/common/utf8.c b/src/common/utf8.c index f029e5b..df5e2d2 100644 --- a/src/common/utf8.c +++ b/src/common/utf8.c @@ -56,27 +56,13 @@ static const uint8_t utf8d[] = {  }; -static uint32_t inline -decode(uint32_t* state, uint32_t* codep, uint32_t byte) { -    uint32_t type = utf8d[byte]; - -    *codep = (*state != UTF8_ACCEPT) ? -        (byte & 0x3fu) | (*codep << 6) : -        (0xff >> type) & (byte); - -    *state = utf8d[256 + *state + type]; -    return *state; -} - -  bool  bc_utf8_validate(const uint8_t *str, size_t len)  { -    uint32_t codepoint;      uint32_t state = 0;      for (size_t i = 0; i < len; i++) -        decode(&state, &codepoint, str[i]); +        state = utf8d[256 + state + utf8d[str[i]]];      return state == UTF8_ACCEPT;  }  | 
