From a5b119807dc213b031b2d9b2fd0140a52444b9c5 Mon Sep 17 00:00:00 2001 From: Kyler Date: Fri, 7 Nov 2025 23:31:19 -0700 Subject: [PATCH] Implemented string helper functions --- SLS_C/SlsStr.md | 4 +- SLS_C/include/sls/errors.h | 6 +- SLS_C/include/sls/string.h | 7 +- SLS_C/src/string.c | 252 ++++++++++++++++++++++++++++++++++++- 4 files changed, 261 insertions(+), 8 deletions(-) diff --git a/SLS_C/SlsStr.md b/SLS_C/SlsStr.md index 9353158..3ae2748 100644 --- a/SLS_C/SlsStr.md +++ b/SLS_C/SlsStr.md @@ -6,8 +6,8 @@ - `c` char\* - `d` int32_t - `l` int64_t -- `ul` uint64_t -- `zu` size_t +- `u` uint64_t +- `z` size_t - `f` double - `s` string.h SlsStr - `t` lexer.h TokenType diff --git a/SLS_C/include/sls/errors.h b/SLS_C/include/sls/errors.h index 6fdeda7..c20a73f 100644 --- a/SLS_C/include/sls/errors.h +++ b/SLS_C/include/sls/errors.h @@ -9,8 +9,10 @@ #include #include +#include "./string.h" + typedef struct { - const char *message; + SlsStr message; int32_t code; } SlsError; @@ -20,7 +22,7 @@ typedef enum { } SlsResultType; typedef struct { - const char *filename; + SlsStr filename; size_t line; size_t column; size_t length; diff --git a/SLS_C/include/sls/string.h b/SLS_C/include/sls/string.h index 62d5a93..0c4edcc 100644 --- a/SLS_C/include/sls/string.h +++ b/SLS_C/include/sls/string.h @@ -12,9 +12,9 @@ #include "bool.h" typedef struct { - const size_t len; + size_t len; const char *str; - const Boolean allocated; + Boolean allocated; } SlsStr; #define SLS_STR(s) (SlsStr){ sizeof(s) - 1, (s), FALSE } @@ -23,6 +23,9 @@ int isascii(unsigned char c); size_t strnlen(const char *s, size_t maxlen); SlsStr malloc_str(const char *s, size_t maxlen); +SlsStr new_str(size_t length); +SlsStr copy_str(SlsStr s); +void free_str(SlsStr *s); SlsStr format(const SlsStr s, ...); #endif // SLS_STRING_H diff --git a/SLS_C/src/string.c b/SLS_C/src/string.c index cabfb91..2e630a9 100644 --- a/SLS_C/src/string.c +++ b/SLS_C/src/string.c @@ -4,9 +4,14 @@ // November 2025 #include +#include +#include #include #include +#include "sls/string.h" +#include "sls/lexer.h" + int isascii(unsigned char c) { return c < 128; } @@ -18,9 +23,252 @@ size_t strnlen(const char *s, size_t maxlen) { return i; } -char *malloc_str(const char *s, size_t maxlen) { +SlsStr malloc_str(const char *s, size_t maxlen) { size_t length = strnlen(s, maxlen); char *new_str = (char *)malloc(sizeof(char) * length); strncpy(new_str, s, length); - return new_str; + return (SlsStr){length, new_str, TRUE}; +} + +SlsStr new_str(size_t length) { + char *new_str = (char *)malloc(sizeof(char) * length); + for (size_t i = 0; i < length; i++) new_str[i] = '\0'; + return (SlsStr){length, new_str, TRUE}; +} + +SlsStr copy_str(SlsStr s) { + return malloc_str(s.str, s.len); +} + +void free_str(SlsStr *s) { + if (s->allocated) { + free(s->str); + s->len = 0; + s->str = 0; + s->allocated = FALSE; + } +} + +typedef enum { + FORMAT_C_STRINGS, + FORMAT_INTEGER_32, + FORMAT_INTEGER_64, + FORMAT_UNSIGNED_INTEGER_64, + FORMAT_SIZE_INTEGER, + FORMAT_FLOAT, + FORMAT_SLS_STR, + FORMAT_SLS_TOKEN_TYPE, + FORMAT_SLS_ARRAY_TYPE, + FORMAT_SLS_BUILTIN_INTEGER, + FORMAT_SLS_ERROR, + FORMAT_SLS_BOOLEAN, +} FormatStringTypes; + +typedef struct { + FormatStringTypes type; + union { + const char *c_string; + int32_t integer_32; + int64_t integer_64; + uint64_t unsigned_integer_64; + size_t size_integer; + double ffloat; + SlsStr sls_str; + TokenType token_type; + ArrayType array_type; + IntegerBuiltInType builtin_integer; + SlsError error; + Boolean boolean; + }; + size_t str_index; + size_t self_length; +} FormatStringItem; + +SlsStr format(const SlsStr s, ...) { + va_list args; + va_start(args, s); + size_t count = 0; + const char *current = strchr(s.str, '%'); + do { + switch (current[1]) { + case 'c': + case 'd': + case 'l': + case 'u': + case 'z': + case 'f': + case 's': + case 't': + case 'a': + case 'i': + case 'e': + case 'b': + count++; + break; + } + } while (current = strchr(current + 2, '%')); + + FormatStringItem *items = (FormatStringItem *)malloc(sizeof(FormatStringItem) * count); + size_t i = 0; + size_t last_index = 0; + current = strchr(s.str, '%'); + do { + switch (current[1]) { + case 'c': + items[i].type = FORMAT_C_STRINGS; + items[i].c_string = va_arg(args, const char *); + break; + case 'd': + items[i].type = FORMAT_INTEGER_32; + items[i].integer_32 = va_arg(args, int32_t); + break; + case 'l': + items[i].type = FORMAT_INTEGER_64; + items[i].integer_64 = va_arg(args, int64_t); + break; + case 'u': + items[i].type = FORMAT_UNSIGNED_INTEGER_64; + items[i].unsigned_integer_64 = va_arg(args, uint64_t); + break; + case 'z': + items[i].type = FORMAT_SIZE_INTEGER; + items[i].size_integer = va_arg(args, size_t); + break; + case 'f': + items[i].type = FORMAT_FLOAT; + items[i].ffloat = va_arg(args, double); + break; + case 's': + items[i].type = FORMAT_SLS_STR; + items[i].sls_str = va_arg(args, SlsStr); + break; + case 't': + items[i].type = FORMAT_SLS_TOKEN_TYPE; + items[i].token_type = va_arg(args, TokenType); + break; + case 'a': + items[i].type = FORMAT_SLS_ARRAY_TYPE; + items[i].array_type = va_arg(args, ArrayType); + break; + case 'i': + items[i].type = FORMAT_SLS_BUILTIN_INTEGER; + items[i].builtin_integer = va_arg(args, IntegerBuiltInType); + break; + case 'e': + items[i].type = FORMAT_SLS_ERROR; + items[i].error = va_arg(args, SlsError); + break; + case 'b': + items[i].type = FORMAT_SLS_BOOLEAN; + items[i].boolean = va_arg(args, Boolean); + break; + } + items[i].str_index = current - last_index; + last_index = current + 2; + i++; + } while (current = strchr(current + 2, '%')); + + size_t length = s.len - (count * 2); + for (size_t i = 0; i < count; i++) { + switch (items[i].type) { + case FORMAT_C_STRINGS: + length += items[i].self_length = strlen(items[i].c_string); + break; + case FORMAT_INTEGER_32: + length += items[i].self_length = ceil(log10(items[i].integer_32 + 1)); + break; + case FORMAT_INTEGER_64: + length += items[i].self_length = ceil(log10(items[i].integer_64 + 1)); + break; + case FORMAT_UNSIGNED_INTEGER_64: + length += items[i].self_length = ceil(log10(items[i].unsigned_integer_64 + 1)); + break; + case FORMAT_SIZE_INTEGER: + length += items[i].self_length = ceil(log10(items[i].size_integer + 1)); + break; + case FORMAT_FLOAT: + length += items[i].self_length = ceil(log10(items[i].ffloat + 1) + 3); + break; + case FORMAT_SLS_STR: + length += items[i].self_length = items[i].sls_str.len - 1; + break; + case FORMAT_SLS_TOKEN_TYPE: + length += items[i].self_length = strnlen(TOKEN_TYPES_NAMES[items[i].token_type], TYPE_NAMES_SAFE_LENGTH); + break; + case FORMAT_SLS_ARRAY_TYPE: + length += items[i].self_length = strnlen(ARRAY_TYPES_NAMES[items[i].array_type], TYPE_NAMES_SAFE_LENGTH); + break; + case FORMAT_SLS_BUILTIN_INTEGER: + length += items[i].self_length = strnlen(INTEGER_TYPES_NAMES[items[i].builtin_integer], TYPE_NAMES_SAFE_LENGTH); + break; + case FORMAT_SLS_ERROR: + length += items[i].self_length = items[i].error.message.len - 1; + break; + case FORMAT_SLS_BOOLEAN: + length += items[i].self_length = (items[i].boolean ? 4 : 5); + break; + } + } + + char *temp = (char *)malloc(sizeof(char) * length); + SlsStr str_new = new_str(length); + char *str = (char *)str_new.str; + size_t item_i = 0; + size_t target_i = 0; + size_t source_i = 0; + + while (item_i < count) { + memcpy(str + target_i, s.str + source_i, items[item_i].str_index); + target_i += items[item_i].str_index; + source_i += items[item_i].str_index + 2; + + switch (items[i].type) { + case FORMAT_C_STRINGS: + snprintf(temp, items[item_i].self_length + 1, "%s", items[item_i].c_string); + break; + case FORMAT_INTEGER_32: + snprintf(temp, items[item_i].self_length + 1, "%d", items[item_i].integer_32); + break; + case FORMAT_INTEGER_64: + snprintf(temp, items[item_i].self_length + 1, "%d", items[item_i].integer_64); + break; + case FORMAT_UNSIGNED_INTEGER_64: + snprintf(temp, items[item_i].self_length + 1, "%lu", items[item_i].unsigned_integer_64); + break; + case FORMAT_SIZE_INTEGER: + snprintf(temp, items[item_i].self_length + 1, "%zu", items[item_i].size_integer); + break; + case FORMAT_FLOAT: + snprintf(temp, items[item_i].self_length + 1, "%.2f", items[item_i].ffloat); + break; + case FORMAT_SLS_STR: + snprintf(temp, items[item_i].self_length + 1, "%s", items[item_i].sls_str); + break; + case FORMAT_SLS_TOKEN_TYPE: + snprintf(temp, items[item_i].self_length + 1, "%s", TOKEN_TYPES_NAMES[items[item_i].token_type]); + break; + case FORMAT_SLS_ARRAY_TYPE: + snprintf(temp, items[item_i].self_length + 1, "%s", ARRAY_TYPES_NAMES[items[item_i].array_type]); + break; + case FORMAT_SLS_BUILTIN_INTEGER: + snprintf(temp, items[item_i].self_length + 1, "%s", INTEGER_TYPES_NAMES[items[item_i].builtin_integer]); + break; + case FORMAT_SLS_ERROR: + snprintf(temp, items[item_i].self_length + 1, "%s", items[item_i].error.message); + break; + case FORMAT_SLS_BOOLEAN: + if (items[item_i].boolean) memcpy(temp, "TRUE", 5); + else memcpy(temp, "FALSE", 6); + break; + } + + memcpy(str + target_i, temp, items[item_i].self_length); + target_i += items[item_i].self_length; + item_i++; + } + memcpy(str + target_i, s.str + source_i, s.len - source_i); + + free(items); + free(temp); + return str_new; }