diff options
author | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2018-04-05 22:35:25 +0200 |
---|---|---|
committer | Rafael G. Martins <rafael@rafaelmartins.eng.br> | 2018-04-05 22:35:35 +0200 |
commit | c43b487246fdfd2ddc5c794763b18255ac6a318e (patch) | |
tree | c5b2b5ef837e9e796842155a6f7943745c61d135 /src/common | |
parent | 0a7f6a2659b363e1d68202b9fd863b57f7420f4f (diff) | |
download | blogc-c43b487246fdfd2ddc5c794763b18255ac6a318e.tar.gz blogc-c43b487246fdfd2ddc5c794763b18255ac6a318e.tar.bz2 blogc-c43b487246fdfd2ddc5c794763b18255ac6a318e.zip |
*: use squareball
yeah, changed my mind again :)
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/config-parser.c | 431 | ||||
-rw-r--r-- | src/common/config-parser.h | 31 | ||||
-rw-r--r-- | src/common/error.c | 152 | ||||
-rw-r--r-- | src/common/error.h | 46 | ||||
-rw-r--r-- | src/common/file.c | 66 | ||||
-rw-r--r-- | src/common/file.h | 20 | ||||
-rw-r--r-- | src/common/stdin.c | 24 | ||||
-rw-r--r-- | src/common/stdin.h | 14 | ||||
-rw-r--r-- | src/common/utf8.c | 102 | ||||
-rw-r--r-- | src/common/utf8.h | 21 | ||||
-rw-r--r-- | src/common/utils.c | 667 | ||||
-rw-r--r-- | src/common/utils.h | 107 |
12 files changed, 0 insertions, 1681 deletions
diff --git a/src/common/config-parser.c b/src/common/config-parser.c deleted file mode 100644 index 235209e..0000000 --- a/src/common/config-parser.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> -#include "error.h" -#include "utils.h" -#include "config-parser.h" - - -typedef enum { - CONFIG_START = 1, - CONFIG_SECTION_START, - CONFIG_SECTION, - CONFIG_SECTION_KEY, - CONFIG_SECTION_VALUE_START, - CONFIG_SECTION_VALUE_QUOTE, - CONFIG_SECTION_VALUE_POST_QUOTED, - CONFIG_SECTION_VALUE, - CONFIG_SECTION_LIST_START, - CONFIG_SECTION_LIST_QUOTE, - CONFIG_SECTION_LIST_POST_QUOTED, - CONFIG_SECTION_LIST, -} bc_configparser_state_t; - -typedef enum { - CONFIG_SECTION_TYPE_MAP = 1, - CONFIG_SECTION_TYPE_LIST, -} bc_configparser_section_type_t; - -typedef struct { - bc_configparser_section_type_t type; - void *data; -} bc_configparser_section_t; - - -static void -free_section(bc_configparser_section_t *section) -{ - if (section == NULL) - return; - - switch (section->type) { - case CONFIG_SECTION_TYPE_MAP: - bc_trie_free(section->data); - break; - case CONFIG_SECTION_TYPE_LIST: - bc_slist_free_full(section->data, free); - break; - } - free(section); -} - - -bc_config_t* -bc_config_parse(const char *src, size_t src_len, const char *list_sections[], - bc_error_t **err) -{ - if (err == NULL || *err != NULL) - return NULL; - - size_t current = 0; - size_t start = 0; - - bc_configparser_section_t *section = NULL; - - char *section_name = NULL; - char *key = NULL; - bc_string_t *value = NULL; - bool escaped = false; - - bc_config_t *rv = bc_malloc(sizeof(bc_config_t)); - rv->root = bc_trie_new((bc_free_func_t) free_section); - - bc_configparser_state_t state = CONFIG_START; - - while (current < src_len) { - char c = src[current]; - bool is_last = current == src_len - 1; - - if (escaped) { - bc_string_append_c(value, c); - escaped = false; - current++; - continue; - } - - if (value != NULL && c == '\\') { - escaped = true; - current++; - continue; - } - - switch (state) { - - case CONFIG_START: - if (c == '#' || c == ';') { - while (current < src_len) { - if (src[current] == '\r' || src[current] == '\n') - break; - current++; - } - break; - } - if (c == ' ' || c == '\t' || c == '\r' || c == '\n') - break; - if (c == '[') { - state = CONFIG_SECTION_START; - break; - } - if (section != NULL) { - start = current; - switch (section->type) { - case CONFIG_SECTION_TYPE_MAP: - state = CONFIG_SECTION_KEY; - break; - case CONFIG_SECTION_TYPE_LIST: - state = CONFIG_SECTION_LIST_START; - if (value == NULL) - value = bc_string_new(); - break; - } - continue; - } - *err = bc_error_parser(BC_ERROR_CONFIG_PARSER, src, src_len, - current, "File must start with section."); - break; - - case CONFIG_SECTION_START: - start = current; - state = CONFIG_SECTION; - break; - - case CONFIG_SECTION: - if (c == ']') { - section_name = bc_strndup(src + start, current - start); - section = bc_malloc(sizeof(bc_configparser_section_t)); - section->type = CONFIG_SECTION_TYPE_MAP; - if (list_sections != NULL) { - for (size_t i = 0; list_sections[i] != NULL; i++) { - if (0 == strcmp(section_name, list_sections[i])) { - section->type = CONFIG_SECTION_TYPE_LIST; - break; - } - } - } - switch (section->type) { - case CONFIG_SECTION_TYPE_MAP: - section->data = bc_trie_new(free); - break; - case CONFIG_SECTION_TYPE_LIST: - section->data = NULL; - break; - } - bc_trie_insert(rv->root, section_name, section); - free(section_name); - section_name = NULL; - state = CONFIG_START; - break; - } - if (c != '\r' && c != '\n') - break; - *err = bc_error_parser(BC_ERROR_CONFIG_PARSER, src, src_len, - current, "Section names can't have new lines."); - break; - - case CONFIG_SECTION_KEY: - if (c == '=') { - key = bc_strndup(src + start, current - start); - state = CONFIG_SECTION_VALUE_START; - if (value == NULL) - value = bc_string_new(); - break; - } - if (c != '\r' && c != '\n' && !is_last) - break; - // key without value, should we support it? - size_t end = is_last && c != '\n' && c != '\r' ? src_len : - current; - key = bc_strndup(src + start, end - start); - *err = bc_error_parser(BC_ERROR_CONFIG_PARSER, src, src_len, - current, "Key without value: %s.", key); - free(key); - key = NULL; - break; - - case CONFIG_SECTION_VALUE_START: - if (c == ' ' || c == '\t' || c == '\f' || c == '\v') - break; - if (c == '"') { - state = CONFIG_SECTION_VALUE_QUOTE; - break; - } - bc_string_append_c(value, c); - state = CONFIG_SECTION_VALUE; - break; - - case CONFIG_SECTION_VALUE_QUOTE: - if (c == '"') { - bc_trie_insert(section->data, bc_str_strip(key), - bc_string_free(value, false)); - free(key); - key = NULL; - value = NULL; - state = CONFIG_SECTION_VALUE_POST_QUOTED; - break; - } - bc_string_append_c(value, c); - break; - - case CONFIG_SECTION_VALUE_POST_QUOTED: - if (c == ' ' || c == '\t' || c == '\f' || c == '\v') - break; - if (c == '\r' || c == '\n' || is_last) { - state = CONFIG_START; - break; - } - *err = bc_error_parser(BC_ERROR_CONFIG_PARSER, src, src_len, - current, "Invalid value for key, should not have anything " - "after quotes."); - break; - - case CONFIG_SECTION_VALUE: - if (c == '\r' || c == '\n' || is_last) { - if (is_last && c != '\r' && c != '\n') - bc_string_append_c(value, c); - bc_trie_insert(section->data, bc_str_strip(key), - bc_strdup(bc_str_rstrip(value->str))); - free(key); - key = NULL; - bc_string_free(value, true); - value = NULL; - state = CONFIG_START; - break; - } - bc_string_append_c(value, c); - break; - - case CONFIG_SECTION_LIST_START: - if (c == ' ' || c == '\t' || c == '\f' || c == '\v') - break; - if (c == '"') { - state = CONFIG_SECTION_LIST_QUOTE; - break; - } - bc_string_append_c(value, c); - state = CONFIG_SECTION_LIST; - break; - - case CONFIG_SECTION_LIST_QUOTE: - if (c == '"') { - section->data = bc_slist_append(section->data, - bc_string_free(value, false)); - value = NULL; - state = CONFIG_SECTION_LIST_POST_QUOTED; - break; - - } - bc_string_append_c(value, c); - break; - - case CONFIG_SECTION_LIST_POST_QUOTED: - if (c == ' ' || c == '\t' || c == '\f' || c == '\v') - break; - if (c == '\r' || c == '\n' || is_last) { - state = CONFIG_START; - break; - } - *err = bc_error_parser(BC_ERROR_CONFIG_PARSER, src, src_len, - current, "Invalid value for list item, should not have " - "anything after quotes."); - break; - - case CONFIG_SECTION_LIST: - if (c == '\r' || c == '\n' || is_last) { - if (is_last && c != '\r' && c != '\n') - bc_string_append_c(value, c); - section->data = bc_slist_append(section->data, - bc_strdup(bc_str_strip(value->str))); - bc_string_free(value, true); - value = NULL; - state = CONFIG_START; - break; - - } - bc_string_append_c(value, c); - break; - - } - - if (*err != NULL) { - bc_config_free(rv); - rv = NULL; - break; - } - - current++; - } - - free(section_name); - free(key); - bc_string_free(value, true); - - return rv; -} - - -static void -list_keys(const char *key, const char *value, bc_slist_t **l) -{ - *l = bc_slist_append(*l, bc_strdup(key)); -} - - -char** -bc_config_list_sections(bc_config_t *config) -{ - if (config == NULL) - return NULL; - - bc_slist_t *l = NULL; - bc_trie_foreach(config->root, (bc_trie_foreach_func_t) list_keys, &l); - - char **rv = bc_malloc(sizeof(char*) * (bc_slist_length(l) + 1)); - - size_t i = 0; - for (bc_slist_t *tmp = l; tmp != NULL; tmp = tmp->next, i++) - rv[i] = tmp->data; - rv[i] = NULL; - - bc_slist_free(l); - - return rv; -} - - -char** -bc_config_list_keys(bc_config_t *config, const char *section) -{ - if (config == NULL) - return NULL; - - bc_configparser_section_t *s = bc_trie_lookup(config->root, section); - if (s == NULL) - return NULL; - - if (s->type != CONFIG_SECTION_TYPE_MAP) - return NULL; - - bc_slist_t *l = NULL; - bc_trie_foreach(s->data, (bc_trie_foreach_func_t) list_keys, &l); - - char **rv = bc_malloc(sizeof(char*) * (bc_slist_length(l) + 1)); - - size_t i = 0; - for (bc_slist_t *tmp = l; tmp != NULL; tmp = tmp->next, i++) - rv[i] = tmp->data; - rv[i] = NULL; - - bc_slist_free(l); - - return rv; -} - - -const char* -bc_config_get(bc_config_t *config, const char *section, const char *key) -{ - if (config == NULL) - return NULL; - - bc_configparser_section_t *s = bc_trie_lookup(config->root, section); - if (s == NULL) - return NULL; - - if (s->type != CONFIG_SECTION_TYPE_MAP) - return NULL; - - return bc_trie_lookup(s->data, key); -} - - -const char* -bc_config_get_with_default(bc_config_t *config, const char *section, const char *key, - const char *default_) -{ - const char *rv = bc_config_get(config, section, key); - if (rv == NULL) - return default_; - return rv; -} - - -char** -bc_config_get_list(bc_config_t *config, const char *section) -{ - if (config == NULL) - return NULL; - - bc_configparser_section_t *s = bc_trie_lookup(config->root, section); - if (s == NULL) - return NULL; - - if (s->type != CONFIG_SECTION_TYPE_LIST) - return NULL; - - char **rv = bc_malloc(sizeof(char*) * (bc_slist_length(s->data) + 1)); - - size_t i = 0; - for (bc_slist_t *tmp = s->data; tmp != NULL; tmp = tmp->next, i++) - rv[i] = bc_strdup(tmp->data); - rv[i] = NULL; - - return rv; -} - - -void -bc_config_free(bc_config_t *config) -{ - if (config == NULL) - return; - bc_trie_free(config->root); - free(config); -} diff --git a/src/common/config-parser.h b/src/common/config-parser.h deleted file mode 100644 index e8068f6..0000000 --- a/src/common/config-parser.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _CONFIG_PARSER_H -#define _CONFIG_PARSER_H - -#include <stddef.h> -#include "utils.h" -#include "error.h" - -typedef struct { - bc_trie_t *root; -} bc_config_t; - -bc_config_t* bc_config_parse(const char *src, size_t src_len, - const char *list_sections[], bc_error_t **err); -char** bc_config_list_sections(bc_config_t *config); -char** bc_config_list_keys(bc_config_t *config, const char *section); -const char* bc_config_get(bc_config_t *config, const char *section, - const char *key); -const char* bc_config_get_with_default(bc_config_t *config, const char *section, - const char *key, const char *default_); -char** bc_config_get_list(bc_config_t *config, const char *section); -void bc_config_free(bc_config_t *config); - -#endif /* _CONFIG_PARSER_H */ diff --git a/src/common/error.c b/src/common/error.c deleted file mode 100644 index 19f369c..0000000 --- a/src/common/error.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include "error.h" -#include "utils.h" - - -bc_error_t* -bc_error_new(bc_error_type_t type, const char *msg) -{ - bc_error_t *err = bc_malloc(sizeof(bc_error_t)); - err->type = type; - err->msg = bc_strdup(msg); - return err; -} - - -bc_error_t* -bc_error_new_printf(bc_error_type_t type, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - char *tmp = bc_strdup_vprintf(format, ap); - va_end(ap); - bc_error_t *rv = bc_error_new(type, tmp); - free(tmp); - return rv; -} - - -bc_error_t* -bc_error_parser(bc_error_type_t type, const char *src, size_t src_len, - size_t current, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - char *msg = bc_strdup_vprintf(format, ap); - va_end(ap); - - size_t lineno = 1; - size_t linestart = 0; - size_t lineend = 0; - size_t pos = 1; - - for (size_t i = 0; i < src_len; i++) { - char c = src[i]; - if (i < current) { - if ((i + 1) < src_len) { - if ((c == '\n' && src[i + 1] == '\r') || - (c == '\r' && src[i + 1] == '\n')) - { - lineno++; - i++; - pos = 1; - if ((i + 1) < src_len) - linestart = i + 1; - continue; - } - } - if (c == '\n' || c == '\r') { - lineno++; - pos = 1; - if ((i + 1) < src_len) - linestart = i + 1; - continue; - } - pos++; - } - else if (c == '\n' || c == '\r') { - lineend = i; - break; - } - } - - if (lineend <= linestart && src_len >= linestart) - lineend = src_len; - - char *line = bc_strndup(src + linestart, lineend - linestart); - - bc_error_t *rv = NULL; - - if (line[0] == '\0') // "near" message isn't useful if line is empty - rv = bc_error_new(type, msg); - else - rv = bc_error_new_printf(type, - "%s\nError occurred near line %d, position %d: %s", msg, lineno, - pos, line); - - free(msg); - free(line); - - return rv; -} - - -// error handling is centralized here for the sake of simplicity :/ -void -bc_error_print(bc_error_t *err, const char *prefix) -{ - if (err == NULL) - return; - - if (prefix != NULL) - fprintf(stderr, "%s: ", prefix); - - switch(err->type) { - case BC_ERROR_CONFIG_PARSER: - fprintf(stderr, "error: config-parser: %s\n", err->msg); - break; - case BC_ERROR_FILE: - fprintf(stderr, "error: file: %s\n", err->msg); - break; - case BLOGC_ERROR_SOURCE_PARSER: - fprintf(stderr, "error: source: %s\n", err->msg); - break; - case BLOGC_ERROR_TEMPLATE_PARSER: - fprintf(stderr, "error: template: %s\n", err->msg); - break; - case BLOGC_ERROR_LOADER: - fprintf(stderr, "error: loader: %s\n", err->msg); - break; - case BLOGC_WARNING_DATETIME_PARSER: - fprintf(stderr, "warning: datetime: %s\n", err->msg); - break; - case BLOGC_MAKE_ERROR_SETTINGS: - fprintf(stderr, "error: settings: %s\n", err->msg); - break; - case BLOGC_MAKE_ERROR_EXEC: - fprintf(stderr, "error: exec: %s\n", err->msg); - break; - default: - fprintf(stderr, "error: %s\n", err->msg); - } -} - - -void -bc_error_free(bc_error_t *err) -{ - if (err == NULL) - return; - free(err->msg); - free(err); -} diff --git a/src/common/error.h b/src/common/error.h deleted file mode 100644 index 34aab74..0000000 --- a/src/common/error.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _ERROR_H -#define _ERROR_H - -#include <stddef.h> - -// error handling is centralized here for the sake of simplicity :/ -typedef enum { - - // errors for src/common - BC_ERROR_CONFIG_PARSER = 1, - BC_ERROR_FILE, - - // errors for src/blogc - BLOGC_ERROR_SOURCE_PARSER = 100, - BLOGC_ERROR_TEMPLATE_PARSER, - BLOGC_ERROR_LOADER, - BLOGC_WARNING_DATETIME_PARSER, - - // errors for src/blogc-make - BLOGC_MAKE_ERROR_SETTINGS = 300, - BLOGC_MAKE_ERROR_EXEC, - BLOGC_MAKE_ERROR_ATOM, - -} bc_error_type_t; - -typedef struct { - char *msg; - bc_error_type_t type; -} bc_error_t; - -bc_error_t* bc_error_new(bc_error_type_t type, const char *msg); -bc_error_t* bc_error_new_printf(bc_error_type_t type, const char *format, ...); -bc_error_t* bc_error_parser(bc_error_type_t type, const char *src, - size_t src_len, size_t current, const char *format, ...); -void bc_error_print(bc_error_t *err, const char *prefix); -void bc_error_free(bc_error_t *err); - -#endif /* _ERROR_H */ diff --git a/src/common/file.c b/src/common/file.c deleted file mode 100644 index adfc22e..0000000 --- a/src/common/file.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#include <errno.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> -#include "file.h" -#include "error.h" -#include "utf8.h" -#include "utils.h" - - -char* -bc_file_get_contents(const char *path, bool utf8, size_t *len, bc_error_t **err) -{ - if (path == NULL || err == NULL || *err != NULL) - return NULL; - - *len = 0; - FILE *fp = fopen(path, "r"); - - if (fp == NULL) { - int tmp_errno = errno; - *err = bc_error_new_printf(BC_ERROR_FILE, - "Failed to open file (%s): %s", path, strerror(tmp_errno)); - return NULL; - } - - bc_string_t *str = bc_string_new(); - char buffer[BC_FILE_CHUNK_SIZE]; - char *tmp; - - while (!feof(fp)) { - size_t read_len = fread(buffer, sizeof(char), BC_FILE_CHUNK_SIZE, fp); - - tmp = buffer; - - if (utf8 && str->len == 0 && read_len > 0) { - // skipping BOM before validation, for performance. should be safe - // enough - size_t skip = bc_utf8_skip_bom((uint8_t*) buffer, read_len); - read_len -= skip; - tmp += skip; - } - - *len += read_len; - bc_string_append_len(str, tmp, read_len); - } - fclose(fp); - - if (utf8 && !bc_utf8_validate_str(str)) { - *err = bc_error_new_printf(BC_ERROR_FILE, - "File content is not valid UTF-8: %s", path); - bc_string_free(str, true); - return NULL; - } - - return bc_string_free(str, false); -} diff --git a/src/common/file.h b/src/common/file.h deleted file mode 100644 index 91224fc..0000000 --- a/src/common/file.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _FILE_H -#define _FILE_H - -#include <stddef.h> -#include <stdbool.h> -#include "error.h" - -#define BC_FILE_CHUNK_SIZE 1024 - -char* bc_file_get_contents(const char *path, bool utf8, size_t *len, bc_error_t **err); - -#endif /* _FILE_H */ diff --git a/src/common/stdin.c b/src/common/stdin.c deleted file mode 100644 index 0c2afd1..0000000 --- a/src/common/stdin.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#include <stdbool.h> -#include <stdio.h> -#include "utils.h" -#include "stdin.h" - - -// splitted in single file to make it easier to test -char* -bc_stdin_read(void) -{ - int c; - bc_string_t *rv = bc_string_new(); - while (EOF != (c = fgetc(stdin))) - bc_string_append_c(rv, c); - return bc_string_free(rv, false); -} diff --git a/src/common/stdin.h b/src/common/stdin.h deleted file mode 100644 index 825b1bb..0000000 --- a/src/common/stdin.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _STDIN_H -#define _STDIN_H - -char* bc_stdin_read(void); - -#endif /* _STDIN_H */ diff --git a/src/common/utf8.c b/src/common/utf8.c deleted file mode 100644 index 5c7b51f..0000000 --- a/src/common/utf8.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> - * Copyright (c) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -// Based on Bjoern Hoehrmann's algorithm. -// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include "utils.h" - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 12 - - -static const uint8_t utf8d[] = { - // The first part of the table maps bytes to character classes that - // to reduce the size of the transition table and create bitmasks. - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - - // The second part is a transition table that maps a combination - // of a state of the automaton and a character class to a state. - 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, - 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, - 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, - 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, - 12,36,12,12,12,12,12,12,12,12,12,12, -}; - - -static uint32_t inline -decode(uint32_t* state, uint32_t* codep, uint32_t byte) { - uint32_t type = utf8d[byte]; - - *codep = (*state != UTF8_ACCEPT) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); - - *state = utf8d[256 + *state + type]; - return *state; -} - - -bool -bc_utf8_validate(const uint8_t *str, size_t len) -{ - uint32_t codepoint; - uint32_t state = 0; - - for (size_t i = 0; i < len; i++) - decode(&state, &codepoint, str[i]); - - return state == UTF8_ACCEPT; -} - - -bool -bc_utf8_validate_str(bc_string_t *str) -{ - return bc_utf8_validate((uint8_t*) str->str, str->len); -} - - -size_t -bc_utf8_skip_bom(const uint8_t *str, size_t len) -{ - if (len < 3) - return 0; - - if (str[0] == 0xef && str[1] == 0xbb && str[2] == 0xbf) - return 3; - - return 0; -} diff --git a/src/common/utf8.h b/src/common/utf8.h deleted file mode 100644 index 206a2cc..0000000 --- a/src/common/utf8.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _UTF_8_H -#define _UTF_8_H - -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include "utils.h" - -bool bc_utf8_validate(const uint8_t *str, size_t len); -bool bc_utf8_validate_str(bc_string_t *str); -size_t bc_utf8_skip_bom(const uint8_t *str, size_t len); - -#endif /* _UTF_8_H */ diff --git a/src/common/utils.c b/src/common/utils.c deleted file mode 100644 index 563d8ab..0000000 --- a/src/common/utils.c +++ /dev/null @@ -1,667 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#define BC_STRING_CHUNK_SIZE 128 - -#include <string.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdlib.h> -#include <stdio.h> - -#include "utils.h" - - -void* -bc_malloc(size_t size) -{ - // simple things simple! - void *rv = malloc(size); - if (rv == NULL) { - fprintf(stderr, "fatal: Failed to allocate memory!\n"); - abort(); - } - return rv; -} - - -void* -bc_realloc(void *ptr, size_t size) -{ - // simple things even simpler :P - void *rv = realloc(ptr, size); - if (rv == NULL && size != 0) { - fprintf(stderr, "fatal: Failed to reallocate memory!\n"); - free(ptr); - abort(); - } - return rv; -} - - -bc_slist_t* -bc_slist_append(bc_slist_t *l, void *data) -{ - bc_slist_t *node = bc_malloc(sizeof(bc_slist_t)); - node->data = data; - node->next = NULL; - if (l == NULL) { - l = node; - } - else { - bc_slist_t *tmp; - for (tmp = l; tmp->next != NULL; tmp = tmp->next); - tmp->next = node; - } - return l; -} - - -bc_slist_t* -bc_slist_prepend(bc_slist_t *l, void *data) -{ - bc_slist_t *node = bc_malloc(sizeof(bc_slist_t)); - node->data = data; - node->next = l; - l = node; - return l; -} - - -void -bc_slist_free_full(bc_slist_t *l, bc_free_func_t free_func) -{ - while (l != NULL) { - bc_slist_t *tmp = l->next; - if ((free_func != NULL) && (l->data != NULL)) - free_func(l->data); - free(l); - l = tmp; - } -} - - -void -bc_slist_free(bc_slist_t *l) -{ - bc_slist_free_full(l, NULL); -} - - -size_t -bc_slist_length(bc_slist_t *l) -{ - if (l == NULL) - return 0; - size_t i; - bc_slist_t *tmp; - for (tmp = l, i = 0; tmp != NULL; tmp = tmp->next, i++); - return i; -} - - -char* -bc_strdup(const char *s) -{ - if (s == NULL) - return NULL; - size_t l = strlen(s); - char *tmp = malloc(l + 1); - if (tmp == NULL) - return NULL; - memcpy(tmp, s, l + 1); - return tmp; -} - - -char* -bc_strndup(const char *s, size_t n) -{ - if (s == NULL) - return NULL; - size_t l = strnlen(s, n); - char *tmp = malloc(l + 1); - if (tmp == NULL) - return NULL; - memcpy(tmp, s, l); - tmp[l] = '\0'; - return tmp; -} - - -char* -bc_strdup_vprintf(const char *format, va_list ap) -{ - va_list ap2; - va_copy(ap2, ap); - int l = vsnprintf(NULL, 0, format, ap2); - va_end(ap2); - if (l < 0) - return NULL; - char *tmp = malloc(l + 1); - if (!tmp) - return NULL; - int l2 = vsnprintf(tmp, l + 1, format, ap); - if (l2 < 0) { - free(tmp); - return NULL; - } - return tmp; -} - - -char* -bc_strdup_printf(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - char *tmp = bc_strdup_vprintf(format, ap); - va_end(ap); - return tmp; -} - - -bool -bc_str_starts_with(const char *str, const char *prefix) -{ - int str_l = strlen(str); - int str_lp = strlen(prefix); - if (str_lp > str_l) - return false; - return strncmp(str, prefix, str_lp) == 0; -} - - -bool -bc_str_ends_with(const char *str, const char *suffix) -{ - int str_l = strlen(str); - int str_ls = strlen(suffix); - if (str_ls > str_l) - return false; - return strcmp(str + str_l - str_ls, suffix) == 0; -} - - -char* -bc_str_lstrip(char *str) -{ - if (str == NULL) - return NULL; - int i; - size_t str_len = strlen(str); - for (i = 0; i < str_len; i++) { - if ((str[i] != ' ') && (str[i] != '\t') && (str[i] != '\n') && - (str[i] != '\r') && (str[i] != '\t') && (str[i] != '\f') && - (str[i] != '\v')) - { - str += i; - break; - } - if (i == str_len - 1) { - str += str_len; - break; - } - } - return str; -} - - -char* -bc_str_rstrip(char *str) -{ - if (str == NULL) - return NULL; - int i; - size_t str_len = strlen(str); - for (i = str_len - 1; i >= 0; i--) { - if ((str[i] != ' ') && (str[i] != '\t') && (str[i] != '\n') && - (str[i] != '\r') && (str[i] != '\t') && (str[i] != '\f') && - (str[i] != '\v')) - { - str[i + 1] = '\0'; - break; - } - if (i == 0) { - str[0] = '\0'; - break; - } - } - return str; -} - - -char* -bc_str_strip(char *str) -{ - return bc_str_lstrip(bc_str_rstrip(str)); -} - - -char** -bc_str_split(const char *str, char c, size_t max_pieces) -{ - if (str == NULL) - return NULL; - char **rv = bc_malloc(sizeof(char*)); - size_t i, start = 0, count = 0; - for (i = 0; i < strlen(str) + 1; i++) { - if (str[0] == '\0') - break; - if ((str[i] == c && (!max_pieces || count + 1 < max_pieces)) || str[i] == '\0') { - rv = bc_realloc(rv, (count + 1) * sizeof(char*)); - rv[count] = bc_malloc(i - start + 1); - memcpy(rv[count], str + start, i - start); - rv[count++][i - start] = '\0'; - start = i + 1; - } - } - rv = bc_realloc(rv, (count + 1) * sizeof(char*)); - rv[count] = NULL; - return rv; -} - - -char* -bc_str_replace(const char *str, const char search, const char *replace) -{ - char **pieces = bc_str_split(str, search, 0); - if (pieces == NULL) - return NULL; - char* rv = bc_strv_join(pieces, replace); - bc_strv_free(pieces); - if (rv == NULL) - return bc_strdup(str); - return rv; -} - - -char* -bc_str_find(const char *str, char c) -{ - // this is somewhat similar to strchr, but respects '\' escaping. - if (str == NULL) - return NULL; - if (c == '\0') - return (char*) str + strlen(str); - for (size_t i = 0; str[i] != '\0'; i++) { - if (str[i] == '\\') { - i++; - continue; - } - if (str[i] == c) { - return (char*) str + i; - } - } - return NULL; -} - - -void -bc_strv_free(char **strv) -{ - if (strv == NULL) - return; - for (size_t i = 0; strv[i] != NULL; i++) - free(strv[i]); - free(strv); -} - - -char* -bc_strv_join(char **strv, const char *separator) -{ - if (strv == NULL || separator == NULL) - return NULL; - bc_string_t *str = bc_string_new(); - for (size_t i = 0; strv[i] != NULL; i++) { - str = bc_string_append(str, strv[i]); - if (strv[i + 1] != NULL) - str = bc_string_append(str, separator); - } - return bc_string_free(str, false); -} - - -size_t -bc_strv_length(char **strv) -{ - if (strv == NULL) - return 0; - size_t i; - for (i = 0; strv[i] != NULL; i++); - return i; -} - - -bc_string_t* -bc_string_new(void) -{ - bc_string_t* rv = bc_malloc(sizeof(bc_string_t)); - rv->str = NULL; - rv->len = 0; - rv->allocated_len = 0; - - // initialize with empty string - rv = bc_string_append(rv, ""); - - return rv; -} - - -char* -bc_string_free(bc_string_t *str, bool free_str) -{ - if (str == NULL) - return NULL; - char *rv = NULL; - if (free_str) - free(str->str); - else - rv = str->str; - free(str); - return rv; -} - - -bc_string_t* -bc_string_dup(bc_string_t *str) -{ - if (str == NULL) - return NULL; - bc_string_t* new = bc_string_new(); - return bc_string_append_len(new, str->str, str->len); -} - - -bc_string_t* -bc_string_append_len(bc_string_t *str, const char *suffix, size_t len) -{ - if (str == NULL) - return NULL; - if (suffix == NULL) - return str; - size_t old_len = str->len; - str->len += len; - if (str->len + 1 > str->allocated_len) { - str->allocated_len = (((str->len + 1) / BC_STRING_CHUNK_SIZE) + 1) * BC_STRING_CHUNK_SIZE; - str->str = bc_realloc(str->str, str->allocated_len); - } - memcpy(str->str + old_len, suffix, len); - str->str[str->len] = '\0'; - return str; -} - - -bc_string_t* -bc_string_append(bc_string_t *str, const char *suffix) -{ - if (str == NULL) - return NULL; - const char *my_suffix = suffix == NULL ? "" : suffix; - return bc_string_append_len(str, my_suffix, strlen(my_suffix)); -} - - -bc_string_t* -bc_string_append_c(bc_string_t *str, char c) -{ - if (str == NULL) - return NULL; - size_t old_len = str->len; - str->len += 1; - if (str->len + 1 > str->allocated_len) { - str->allocated_len = (((str->len + 1) / BC_STRING_CHUNK_SIZE) + 1) * BC_STRING_CHUNK_SIZE; - str->str = bc_realloc(str->str, str->allocated_len); - } - str->str[old_len] = c; - str->str[str->len] = '\0'; - return str; -} - - -bc_string_t* -bc_string_append_printf(bc_string_t *str, const char *format, ...) -{ - if (str == NULL) - return NULL; - va_list ap; - va_start(ap, format); - char *tmp = bc_strdup_vprintf(format, ap); - va_end(ap); - str = bc_string_append(str, tmp); - free(tmp); - return str; -} - - -bc_string_t* -bc_string_append_escaped(bc_string_t *str, const char *suffix) -{ - if (str == NULL) - return NULL; - if (suffix == NULL) - return str; - bool escaped = false; - for (size_t i = 0; suffix[i] != '\0'; i++) { - if (suffix[i] == '\\' && !escaped) { - escaped = true; - continue; - } - escaped = false; - str = bc_string_append_c(str, suffix[i]); - } - return str; -} - - -bc_trie_t* -bc_trie_new(bc_free_func_t free_func) -{ - bc_trie_t *trie = bc_malloc(sizeof(bc_trie_t)); - trie->root = NULL; - trie->free_func = free_func; - return trie; -} - - -static void -bc_trie_free_node(bc_trie_t *trie, bc_trie_node_t *node) -{ - if (trie == NULL || node == NULL) - return; - if (node->data != NULL && trie->free_func != NULL) - trie->free_func(node->data); - bc_trie_free_node(trie, node->next); - bc_trie_free_node(trie, node->child); - free(node); -} - - -void -bc_trie_free(bc_trie_t *trie) -{ - if (trie == NULL) - return; - bc_trie_free_node(trie, trie->root); - free(trie); -} - - -void -bc_trie_insert(bc_trie_t *trie, const char *key, void *data) -{ - if (trie == NULL || key == NULL || data == NULL) - return; - - bc_trie_node_t *parent = NULL; - bc_trie_node_t *previous; - bc_trie_node_t *current; - bc_trie_node_t *tmp; - - while (1) { - - if (trie->root == NULL || (parent != NULL && parent->child == NULL)) { - current = bc_malloc(sizeof(bc_trie_node_t)); - current->key = *key; - current->data = NULL; - current->next = NULL; - current->child = NULL; - if (trie->root == NULL) - trie->root = current; - else - parent->child = current; - parent = current; - goto clean; - } - - tmp = parent == NULL ? trie->root : parent->child; - previous = NULL; - - while (tmp != NULL && tmp->key != *key) { - previous = tmp; - tmp = tmp->next; - } - - parent = tmp; - - if (previous == NULL || parent != NULL) - goto clean; - - current = bc_malloc(sizeof(bc_trie_node_t)); - current->key = *key; - current->data = NULL; - current->next = NULL; - current->child = NULL; - previous->next = current; - parent = current; - -clean: - if (*key == '\0') { - if (parent->data != NULL && trie->free_func != NULL) - trie->free_func(parent->data); - parent->data = data; - break; - } - key++; - } -} - - -void* -bc_trie_lookup(bc_trie_t *trie, const char *key) -{ - if (trie == NULL || trie->root == NULL || key == NULL) - return NULL; - - bc_trie_node_t *parent = trie->root; - bc_trie_node_t *tmp; - while (1) { - for (tmp = parent; tmp != NULL; tmp = tmp->next) { - - if (tmp->key == *key) { - if (tmp->key == '\0') - return tmp->data; - parent = tmp->child; - break; - } - } - if (tmp == NULL) - return NULL; - - if (*key == '\0') - break; - key++; - } - return NULL; -} - - -static void -bc_trie_size_node(bc_trie_node_t *node, size_t *count) -{ - if (node == NULL || count == NULL) - return; - - if (node->key == '\0') - (*count)++; - - bc_trie_size_node(node->next, count); - bc_trie_size_node(node->child, count); -} - - -size_t -bc_trie_size(bc_trie_t *trie) -{ - if (trie == NULL) - return 0; - - size_t count = 0; - bc_trie_size_node(trie->root, &count); - return count; -} - - -static void -bc_trie_foreach_node(bc_trie_node_t *node, bc_string_t *str, - bc_trie_foreach_func_t func, void *user_data) -{ - if (node == NULL || str == NULL || func == NULL) - return; - - if (node->key == '\0') - func(str->str, node->data, user_data); - - if (node->child != NULL) { - bc_string_t *child = bc_string_dup(str); - child = bc_string_append_c(child, node->key); - bc_trie_foreach_node(node->child, child, func, user_data); - bc_string_free(child, true); - } - - if (node->next != NULL) - bc_trie_foreach_node(node->next, str, func, user_data); -} - - -void -bc_trie_foreach(bc_trie_t *trie, bc_trie_foreach_func_t func, - void *user_data) -{ - if (trie == NULL || trie->root == NULL || func == NULL) - return; - - bc_string_t *str = bc_string_new(); - bc_trie_foreach_node(trie->root, str, func, user_data); - bc_string_free(str, true); -} - - -char* -bc_shell_quote(const char *command) -{ - bc_string_t *rv = bc_string_new(); - bc_string_append_c(rv, '\''); - if (command != NULL) { - for (size_t i = 0; i < strlen(command); i++) { - switch (command[i]) { - case '!': - bc_string_append(rv, "'\\!'"); - break; - case '\'': - bc_string_append(rv, "'\\''"); - break; - default: - bc_string_append_c(rv, command[i]); - } - } - } - bc_string_append_c(rv, '\''); - return bc_string_free(rv, false); -} diff --git a/src/common/utils.h b/src/common/utils.h deleted file mode 100644 index 0f05c96..0000000 --- a/src/common/utils.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * blogc: A blog compiler. - * Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br> - * - * This program can be distributed under the terms of the BSD License. - * See the file LICENSE. - */ - -#ifndef _UTILS_H -#define _UTILS_H - -#include <stddef.h> -#include <stdarg.h> -#include <stdbool.h> - - -// memory - -typedef void (*bc_free_func_t) (void *ptr); - -void* bc_malloc(size_t size); -void* bc_realloc(void *ptr, size_t size); - - -// slist - -typedef struct _bc_slist_t { - struct _bc_slist_t *next; - void *data; -} bc_slist_t; - -bc_slist_t* bc_slist_append(bc_slist_t *l, void *data); -bc_slist_t* bc_slist_prepend(bc_slist_t *l, void *data); -void bc_slist_free(bc_slist_t *l); -void bc_slist_free_full(bc_slist_t *l, bc_free_func_t free_func); -size_t bc_slist_length(bc_slist_t *l); - - -// strfuncs - -char* bc_strdup(const char *s); -char* bc_strndup(const char *s, size_t n); -char* bc_strdup_vprintf(const char *format, va_list ap); -char* bc_strdup_printf(const char *format, ...); -bool bc_str_starts_with(const char *str, const char *prefix); -bool bc_str_ends_with(const char *str, const char *suffix); -char* bc_str_lstrip(char *str); -char* bc_str_rstrip(char *str); -char* bc_str_strip(char *str); -char** bc_str_split(const char *str, char c, size_t max_pieces); -char* bc_str_replace(const char *str, const char search, const char *replace); -char* bc_str_find(const char *str, char c); -void bc_strv_free(char **strv); -char* bc_strv_join(char **strv, const char *separator); -size_t bc_strv_length(char **strv); - - -// string - -typedef struct { - char *str; - size_t len; - size_t allocated_len; -} bc_string_t; - -bc_string_t* bc_string_new(void); -char* bc_string_free(bc_string_t *str, bool free_str); -bc_string_t* bc_string_dup(bc_string_t *str); -bc_string_t* bc_string_append_len(bc_string_t *str, const char *suffix, size_t len); -bc_string_t* bc_string_append(bc_string_t *str, const char *suffix); -bc_string_t* bc_string_append_c(bc_string_t *str, char c); -bc_string_t* bc_string_append_printf(bc_string_t *str, const char *format, ...); -bc_string_t* bc_string_append_escaped(bc_string_t *str, const char *suffix); - - -// trie - -typedef struct _bc_trie_node_t { - char key; - void *data; - struct _bc_trie_node_t *next, *child; -} bc_trie_node_t; - -struct _bc_trie_t { - bc_trie_node_t *root; - bc_free_func_t free_func; -}; - -typedef struct _bc_trie_t bc_trie_t; - -typedef void (*bc_trie_foreach_func_t)(const char *key, void *data, - void *user_data); - -bc_trie_t* bc_trie_new(bc_free_func_t free_func); -void bc_trie_free(bc_trie_t *trie); -void bc_trie_insert(bc_trie_t *trie, const char *key, void *data); -void* bc_trie_lookup(bc_trie_t *trie, const char *key); -size_t bc_trie_size(bc_trie_t *trie); -void bc_trie_foreach(bc_trie_t *trie, bc_trie_foreach_func_t func, - void *user_data); - - -// shell - -char* bc_shell_quote(const char *command); - -#endif /* _UTILS_H */ |