I've been working on a string library in C with C++ standard features of std::basic_string<char> just for learning purposes. It's called cstring and the full source is here.
The basic idea is storing metadata in a struct right behind the character buffer, so a cstring is just a char* underneath and works with any standard C function that takes const char*.
typedef struct {
size_t size;
size_t capacity;
bool is_malloced;
} pvt_cstr_metadata_t;
// access metadata from pointer
#define pvt_dat_to_mdata_(ptr) \
(((pvt_cstr_metadata_t *)(ptr)) - 1)
Here is a small example that shows how to create, use and destroy a cstring:
#include "cstring.h"
int main(void) {
// create a cstring from a string literal
cstring str = cstr_init("hello");
cstr_println(str);
// append to it
cstr_append(&str, " world");
cstr_println(str);
// size and capacity
printf("size: %zu\n", cstr_size(str));
printf("cap: %zu\n", cstr_capacity(str));
// insert at index
cstr_insert(&str, 5, ",");
cstr_println(str);
// find a character
size_t pos = cstr_find(str, 'w');
printf("'w' at index: %zu\n", pos);
// substring
cstring sub = cstr_substr(str, 0, 5);
cstr_println(sub);
cstr_free(&sub);
// free the string
cstr_free(&str);
return 0;
}
Every function uses pvt_buf_grow_ to allocate or resize the buffer. In cstring.c:
static cstring pvt_buf_grow_(cstring ptr, size_t count)
{
size_t cap_001_ = (ptr) ?
CSTRMAX(count, pvt_total_cap_(ptr) << 1) :
CSTRMAX(count, CSTRING_DEFAULT_CAP);
cap_001_ = CSTRMIN(cap_001_, cstr_max_size());
size_t tmp_size_001_ = ((cap_001_ + 1) * sizeof(*ptr))
+ sizeof(pvt_cstr_metadata_t);
pvt_cstr_metadata_t *base_ptr_001_;
if(ptr){
pvt_cstr_assert(pvt_is_malloced_(ptr),
"Literals(const char[]) are not modifiable");
base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_realloc(
pvt_dat_to_mdata_(ptr), tmp_size_001_);
}
else {
base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_malloc(tmp_size_001_);
}
pvt_cstr_assert(base_ptr_001_ != NULL, "allocation failed!!");
base_ptr_001_->is_malloced = true;
base_ptr_001_->capacity = (size_t)(cap_001_);
if(!ptr) base_ptr_001_->size = 0;
return (cstring)(base_ptr_001_ + 1);
}
/*
* Functions to multiple args Constructors
* ---------------------------------------
*/
CSTR_NODISCARD __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_ch(const size_t cnt, const char ch)
{
cstring str_ = NULL;
if(!cnt){
str_ = pvt_buf_grow_(NULL, CSTRING_DEFAULT_CAP);
str_[0] = (char)0;
pvt_set_total_size_(str_, 0);
}
intl_assign_cnt_ch(&str_, cnt, ch);
return str_;
}
CSTR_NODISCARD __attribute__((malloc))
cstring __cstr__unused
intl_copy_constructor(const_cstring other)
{
size_t size_ = pvt_cstr_strlen(other);
cstring str_ = pvt_buf_grow_(NULL, size_ + 1);
pvt_copy_(str_, other, size_);
return str_;
}
CSTR_NODISCARD __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_iter(cstr_iterator begin,
cstr_iterator end)
{
pvt_cstr_assert((begin.it < end.it),
"Unsupported arguments passed for Constructor");
size_t size_ = cstr_distance(begin, end);
cstring str_ = pvt_buf_grow_(NULL, size_ + 1);
pvt_copy_(str_, begin.it, size_);
return str_;
}
CSTR_NODISCARD __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_off(const_cstring other,
const size_t start, const size_t offset)
{
size_t size_ = pvt_cstr_strlen(other),
count_ = CSTRMIN(offset, size_ - start);
pvt_cstr_assert(other != NULL && (start < size_),
"Unsupported arguments passed for Constructor");
cstring str_ = pvt_buf_grow_(NULL, count_ + 1);
pvt_copy_(str_, other + start, count_);
return str_;
}
I also use _Generic so you can call the same function name with different arguments like you would in C++, so instead of having like cstr_init_from_string, cstr_init_with_size etc you just call cstr_init and it figures out what you mean.
I have some specific things I'd love feedback on
right now a lot of the intl_* implementation functions are static inline in the header which I know causes code per translation unit.Is it a bloat?
#define cstr_insert(...) \
__cstr_insert_chooser(__VA_ARGS__)(__VA_ARGS__)
/* INSERT
*------------
*/
#define __cstr_insert_get_macro(_1,_2,_3,_4,_5,NAME,...) NAME
#define __cstr_insert_chooser(...) \
__cstr_insert_get_macro(__VA_ARGS__,\
__cstr_insert_5, \
__cstr_insert_4, \
__cstr_insert_3)
#define __cstr_insert_3(str, pos, x) \
_Generic((pos), \
PVT_GENERIC_SIZE_TYPES(_Generic((x), \
PVT_GENERIC_STRING_TYPES(intl_insert_str), \
default: dummy_func \
)), \
cstr_iterator : intl_insert_iter_ch, \
cstr_const_iterator : intl_insert_citer_ch \
)(str, pos, x)
#define __cstr_insert_4(str, pos, a, b) \
_Generic((pos), \
PVT_GENERIC_SIZE_TYPES(_Generic((b), \
PVT_GENERIC_CHAR_TYPES(intl_insert_cnt_ch), \
PVT_GENERIC_STRING_TYPES(intl_insert_str_range),\
default: dummy_func \
)), \
cstr_iterator : intl_insert_iter_cnt_ch, \
cstr_const_iterator : intl_insert_citer_cnt_ch \
)(str, pos, a, b)
#define __cstr_insert_5(str, index, other, pos, count) \
intl_insert_substr(str, index, other, pos, count)
/*
* Functions to insert characters
* -------------------------------
*/
cstring __cstr__unused
intl_insert_cnt_ch(cstring *str, const size_t index,
const size_t count, const char ch);
cstring __cstr__unused
intl_insert_str_range(cstring *str, const size_t index,
const size_t count, const_cstring other);
static inline cstring __cstr__unused
intl_insert_str(cstring *str, const size_t index, const_cstring other)
{
return intl_insert_str_range(str, index,
pvt_cstr_strlen(other), other);
}
static inline cstring __cstr__unused
intl_insert_substr(cstring *str, const size_t index,
const_cstring other, const size_t pos,
const size_t count)
{
return intl_insert_str(str, index, cstr_substr(other, pos, count));
}
cstr_iterator __cstr__unused
intl_insert_iter_cnt_ch(cstring *str, cstr_iterator pos,
const size_t count, const char ch);
static inline cstr_iterator __cstr__unused
intl_insert_citer_cnt_ch(cstring *str, cstr_const_iterator pos,
const size_t count, const char ch)
{
return intl_insert_iter_cnt_ch(str,
(cstr_iterator){ (cstring)pos.it }, count, ch);
}
static inline cstr_iterator __cstr__unused
intl_insert_iter_ch(cstring *str, cstr_iterator pos, const char ch)
{
return intl_insert_iter_cnt_ch(str, pos, 1, ch);
}
static inline cstr_iterator __cstr__unused
intl_insert_citer_ch(cstring *str, cstr_const_iterator pos,
const char ch)
{
return intl_insert_citer_cnt_ch(str, pos, 1, ch);
}
Is it considered a good design?
The library is ASCII/byte-based only — no Unicode support, that's intentional for now.
I'm still pretty new to this so any feedback is welcome. I mainly want to know if the overall design makes sense or if I'm doing something obviously wrong.
reinventing-the-wheel? \$\endgroup\$