Skip to main content
Became Hot Network Question
Post Reopened by toolic, Loki Astari, Toby Speight
added example program showing create, use and destroy as requested
Added to review
Source Link

Here is a small example that shows how to create, use and destroy a cstring:

#include "cstring.h"

int main(void) {
    // create a cstring from a string literal
    cstring str = cstr_init("hello");
    cstr_println(str);

    // append to it
    cstr_append(&str, " world");
    cstr_println(str);

    // size and capacity
    printf("size: %zu\n", cstr_size(str));
    printf("cap:  %zu\n", cstr_capacity(str));

    // insert at index
    cstr_insert(&str, 5, ",");
    cstr_println(str);

    // find a character
    size_t pos = cstr_find(str, 'w');
    printf("'w' at index: %zu\n", pos);

    // substring
    cstring sub = cstr_substr(str, 0, 5);
    cstr_println(sub);
    cstr_free(&sub);

    // free the string
    cstr_free(&str);
    return 0;
}

Every function uses pvt_buf_grow_ to allocate or resize the buffer. In cstring.c:

static cstring pvt_buf_grow_(cstring ptr, size_t count)
{
    size_t cap_001_ = (ptr) ?
                        CSTRMAX(count, pvt_total_cap_(ptr) << 1) :
                        CSTRMAX(count, CSTRING_DEFAULT_CAP);
    cap_001_ = CSTRMIN(cap_001_, cstr_max_size());
    size_t tmp_size_001_ = ((cap_001_ + 1) * sizeof(*ptr))
                            + sizeof(pvt_cstr_metadata_t);
    pvt_cstr_metadata_t *base_ptr_001_;

    if(ptr){
        pvt_cstr_assert(pvt_is_malloced_(ptr),
                     "Literals(const char[]) are not modifiable");
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_realloc(
                                pvt_dat_to_mdata_(ptr), tmp_size_001_);
    }
    else {
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_malloc(tmp_size_001_);
    }

    pvt_cstr_assert(base_ptr_001_ != NULL, "allocation failed!!");
    
    base_ptr_001_->is_malloced = true;
    base_ptr_001_->capacity    = (size_t)(cap_001_);
    if(!ptr) base_ptr_001_->size = 0;

    return (cstring)(base_ptr_001_ + 1);  
}

/*
 *  Functions to multiple args Constructors
 *  ---------------------------------------
 */

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_ch(const size_t cnt, const char ch)
{
    cstring str_ = NULL;
    if(!cnt){
        str_ = pvt_buf_grow_(NULL, CSTRING_DEFAULT_CAP);
        str_[0] = (char)0;
        pvt_set_total_size_(str_, 0);
    }
    intl_assign_cnt_ch(&str_, cnt, ch);
    
    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_copy_constructor(const_cstring other)
{
    size_t size_ = pvt_cstr_strlen(other);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);
    
    pvt_copy_(str_, other, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_iter(cstr_iterator begin, 
                         cstr_iterator end)
{
    pvt_cstr_assert((begin.it < end.it), 
    "Unsupported arguments passed for Constructor");

    size_t size_ = cstr_distance(begin, end);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);

    pvt_copy_(str_, begin.it, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_off(const_cstring other,
                        const size_t start, const size_t offset) 
{
    size_t size_  = pvt_cstr_strlen(other),
           count_ = CSTRMIN(offset, size_ - start);
    pvt_cstr_assert(other != NULL && (start < size_), 
                    "Unsupported arguments passed for Constructor");

    cstring str_ = pvt_buf_grow_(NULL, count_ + 1);
    pvt_copy_(str_, other + start, count_);
    
    return str_;
}

right now a lot of the intl_* implementation functions are static inline in the header which I know causes code bloat per translation unit. Is moving them to .c with just declarations in the header the right fixIs it a bloat?

Also I have a helper functions for find in the library and i thought of the find function to accept a strategy parameter at the end so the user can choose the search algorithm in the macro itself, instead of having separate functions for each one. But the problem is implementation. Does anyone have any suggestions?

below is the macro for cstr_find:

/**
 *  Generic replace function for cstring.
 * 
 *  cstr_find(str, ch)                  -> find first character ch in str
 *  cstr_find(str, other)               -> find first substring equal to other in str
 *  cstr_find(str, ch, pos)             -> find first character ch in (str + pos)
 *  cstr_find(str, other, pos)          -> find first substring equal to other in (str + pos)
 *  cstr_find(str, other, pos, count)   -> find first substring equal to other in str[pos, pos+count)
 *  
 *  returns `size_t`
 */
#define cstr_find(...) \
    __cstr_find_chooser(__VA_ARGS__)(__VA_ARGS__)


/*
 *      FIND
 * ---------
 */
#define __cstr_find_get_macro(_1,_2,_3,_4,NAME,...) NAME

#define __cstr_find_chooser(...)    \
    __cstr_find_get_macro(__VA_ARGS__,  \
        intl_find_str_range,            \
        __cstr_find_3,                  \
        __cstr_find_2)
    

#define __cstr_find_2(str, _2)                      \
    _Generic( (_2),                                 \
        PVT_GENERIC_STRING_TYPES(intl_find_str),    \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch),       \
        default : dummy_func                        \
    )(str, _2)

#define __cstr_find_3(str, _2, pos)                     \
    _Generic( (_2),                                     \
        PVT_GENERIC_STRING_TYPES(intl_find_str_offset), \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch_offset),    \
        default : dummy_func                            \
    )(str, _2, pos)

Any obvious bugs or safety issues I might have missed.

right now a lot of the intl_* implementation functions are static inline in the header which I know causes code bloat per translation unit. Is moving them to .c with just declarations in the header the right fix?

Also I have a helper functions for find in the library and i thought of the find function to accept a strategy parameter at the end so the user can choose the search algorithm in the macro itself, instead of having separate functions for each one. But the problem is implementation. Does anyone have any suggestions?

below is the macro for cstr_find:

/**
 *  Generic replace function for cstring.
 * 
 *  cstr_find(str, ch)                  -> find first character ch in str
 *  cstr_find(str, other)               -> find first substring equal to other in str
 *  cstr_find(str, ch, pos)             -> find first character ch in (str + pos)
 *  cstr_find(str, other, pos)          -> find first substring equal to other in (str + pos)
 *  cstr_find(str, other, pos, count)   -> find first substring equal to other in str[pos, pos+count)
 *  
 *  returns `size_t`
 */
#define cstr_find(...) \
    __cstr_find_chooser(__VA_ARGS__)(__VA_ARGS__)


/*
 *      FIND
 * ---------
 */
#define __cstr_find_get_macro(_1,_2,_3,_4,NAME,...) NAME

#define __cstr_find_chooser(...)    \
    __cstr_find_get_macro(__VA_ARGS__,  \
        intl_find_str_range,            \
        __cstr_find_3,                  \
        __cstr_find_2)
    

#define __cstr_find_2(str, _2)                      \
    _Generic( (_2),                                 \
        PVT_GENERIC_STRING_TYPES(intl_find_str),    \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch),       \
        default : dummy_func                        \
    )(str, _2)

#define __cstr_find_3(str, _2, pos)                     \
    _Generic( (_2),                                     \
        PVT_GENERIC_STRING_TYPES(intl_find_str_offset), \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch_offset),    \
        default : dummy_func                            \
    )(str, _2, pos)

Any obvious bugs or safety issues I might have missed.

Here is a small example that shows how to create, use and destroy a cstring:

#include "cstring.h"

int main(void) {
    // create a cstring from a string literal
    cstring str = cstr_init("hello");
    cstr_println(str);

    // append to it
    cstr_append(&str, " world");
    cstr_println(str);

    // size and capacity
    printf("size: %zu\n", cstr_size(str));
    printf("cap:  %zu\n", cstr_capacity(str));

    // insert at index
    cstr_insert(&str, 5, ",");
    cstr_println(str);

    // find a character
    size_t pos = cstr_find(str, 'w');
    printf("'w' at index: %zu\n", pos);

    // substring
    cstring sub = cstr_substr(str, 0, 5);
    cstr_println(sub);
    cstr_free(&sub);

    // free the string
    cstr_free(&str);
    return 0;
}

Every function uses pvt_buf_grow_ to allocate or resize the buffer. In cstring.c:

static cstring pvt_buf_grow_(cstring ptr, size_t count)
{
    size_t cap_001_ = (ptr) ?
                        CSTRMAX(count, pvt_total_cap_(ptr) << 1) :
                        CSTRMAX(count, CSTRING_DEFAULT_CAP);
    cap_001_ = CSTRMIN(cap_001_, cstr_max_size());
    size_t tmp_size_001_ = ((cap_001_ + 1) * sizeof(*ptr))
                            + sizeof(pvt_cstr_metadata_t);
    pvt_cstr_metadata_t *base_ptr_001_;

    if(ptr){
        pvt_cstr_assert(pvt_is_malloced_(ptr),
                     "Literals(const char[]) are not modifiable");
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_realloc(
                                pvt_dat_to_mdata_(ptr), tmp_size_001_);
    }
    else {
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_malloc(tmp_size_001_);
    }

    pvt_cstr_assert(base_ptr_001_ != NULL, "allocation failed!!");
    
    base_ptr_001_->is_malloced = true;
    base_ptr_001_->capacity    = (size_t)(cap_001_);
    if(!ptr) base_ptr_001_->size = 0;

    return (cstring)(base_ptr_001_ + 1);  
}

/*
 *  Functions to multiple args Constructors
 *  ---------------------------------------
 */

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_ch(const size_t cnt, const char ch)
{
    cstring str_ = NULL;
    if(!cnt){
        str_ = pvt_buf_grow_(NULL, CSTRING_DEFAULT_CAP);
        str_[0] = (char)0;
        pvt_set_total_size_(str_, 0);
    }
    intl_assign_cnt_ch(&str_, cnt, ch);
    
    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_copy_constructor(const_cstring other)
{
    size_t size_ = pvt_cstr_strlen(other);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);
    
    pvt_copy_(str_, other, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_iter(cstr_iterator begin, 
                         cstr_iterator end)
{
    pvt_cstr_assert((begin.it < end.it), 
    "Unsupported arguments passed for Constructor");

    size_t size_ = cstr_distance(begin, end);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);

    pvt_copy_(str_, begin.it, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_off(const_cstring other,
                        const size_t start, const size_t offset) 
{
    size_t size_  = pvt_cstr_strlen(other),
           count_ = CSTRMIN(offset, size_ - start);
    pvt_cstr_assert(other != NULL && (start < size_), 
                    "Unsupported arguments passed for Constructor");

    cstring str_ = pvt_buf_grow_(NULL, count_ + 1);
    pvt_copy_(str_, other + start, count_);
    
    return str_;
}

right now a lot of the intl_* implementation functions are static inline in the header which I know causes code per translation unit.Is it a bloat?

added 5193 characters in body
Source Link
#define cstr_insert(...) \
    __cstr_insert_chooser(__VA_ARGS__)(__VA_ARGS__)


/*      INSERT
 *------------
 */

#define __cstr_insert_get_macro(_1,_2,_3,_4,_5,NAME,...) NAME

#define __cstr_insert_chooser(...)      \
    __cstr_insert_get_macro(__VA_ARGS__,\
        __cstr_insert_5,                \
        __cstr_insert_4,                \
        __cstr_insert_3)

#define __cstr_insert_3(str, pos, x)                    \
    _Generic((pos),                                     \
        PVT_GENERIC_SIZE_TYPES(_Generic((x),            \
            PVT_GENERIC_STRING_TYPES(intl_insert_str),  \
            default: dummy_func                         \
        )),                                             \
        cstr_iterator : intl_insert_iter_ch,            \
        cstr_const_iterator : intl_insert_citer_ch      \
    )(str, pos, x)

#define __cstr_insert_4(str, pos, a, b)                     \
    _Generic((pos),                                         \
        PVT_GENERIC_SIZE_TYPES(_Generic((b),                \
            PVT_GENERIC_CHAR_TYPES(intl_insert_cnt_ch),     \
            PVT_GENERIC_STRING_TYPES(intl_insert_str_range),\
            default: dummy_func                             \
        )),                                                 \
        cstr_iterator : intl_insert_iter_cnt_ch,            \
        cstr_const_iterator : intl_insert_citer_cnt_ch      \
    )(str, pos, a, b)

#define __cstr_insert_5(str, index, other, pos, count)      \
    intl_insert_substr(str, index, other, pos, count)



/*
 *  Functions to insert characters
 * -------------------------------
 */

cstring __cstr__unused
intl_insert_cnt_ch(cstring *str, const size_t index, 
                    const size_t count, const char ch);

cstring __cstr__unused
intl_insert_str_range(cstring *str, const size_t index, 
                    const size_t count, const_cstring other);

static inline cstring __cstr__unused
intl_insert_str(cstring *str, const size_t index, const_cstring other)
{
    return intl_insert_str_range(str, index,
                    pvt_cstr_strlen(other), other);
}

static inline cstring __cstr__unused
intl_insert_substr(cstring *str, const size_t index, 
                   const_cstring other, const size_t pos,
                   const size_t count)
{
    return intl_insert_str(str, index, cstr_substr(other, pos, count));
}



cstr_iterator __cstr__unused
intl_insert_iter_cnt_ch(cstring *str, cstr_iterator pos, 
                    const size_t count, const char ch);

static inline cstr_iterator __cstr__unused
intl_insert_citer_cnt_ch(cstring *str, cstr_const_iterator pos,
                    const size_t count, const char ch)
{
    return intl_insert_iter_cnt_ch(str, 
                    (cstr_iterator){ (cstring)pos.it }, count, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_iter_ch(cstring *str, cstr_iterator pos, const char ch)
{
    return intl_insert_iter_cnt_ch(str, pos, 1, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_citer_ch(cstring *str, cstr_const_iterator pos, 
                    const char ch)
{
    return intl_insert_citer_cnt_ch(str, pos, 1, ch);
}

Is it considered a good design?

Also I have a helper functions for find in the library and i thought of the find function to accept a strategy parameter at the end so the user can choose the search algorithm in the macro itself, instead of having separate functions for each one. But the problem is implementation. Does anyone have any suggestions?

below is the macro for cstr_find:

/**
 *  Generic replace function for cstring.
 * 
 *  cstr_find(str, ch)                  -> find first character ch in str
 *  cstr_find(str, other)               -> find first substring equal to other in str
 *  cstr_find(str, ch, pos)             -> find first character ch in (str + pos)
 *  cstr_find(str, other, pos)          -> find first substring equal to other in (str + pos)
 *  cstr_find(str, other, pos, count)   -> find first substring equal to other in str[pos, pos+count)
 *  
 *  returns `size_t`
 */
#define cstr_find(...) \
    __cstr_find_chooser(__VA_ARGS__)(__VA_ARGS__)


/*
 *      FIND
 * ---------
 */
#define __cstr_find_get_macro(_1,_2,_3,_4,NAME,...) NAME

#define __cstr_find_chooser(...)    \
    __cstr_find_get_macro(__VA_ARGS__,  \
        intl_find_str_range,            \
        __cstr_find_3,                  \
        __cstr_find_2)
    

#define __cstr_find_2(str, _2)                      \
    _Generic( (_2),                                 \
        PVT_GENERIC_STRING_TYPES(intl_find_str),    \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch),       \
        default : dummy_func                        \
    )(str, _2)

#define __cstr_find_3(str, _2, pos)                     \
    _Generic( (_2),                                     \
        PVT_GENERIC_STRING_TYPES(intl_find_str_offset), \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch_offset),    \
        default : dummy_func                            \
    )(str, _2, pos)
#define cstr_insert(...) \
    __cstr_insert_chooser(__VA_ARGS__)(__VA_ARGS__)


/*      INSERT
 *------------
 */

#define __cstr_insert_get_macro(_1,_2,_3,_4,_5,NAME,...) NAME

#define __cstr_insert_chooser(...)      \
    __cstr_insert_get_macro(__VA_ARGS__,\
        __cstr_insert_5,                \
        __cstr_insert_4,                \
        __cstr_insert_3)

#define __cstr_insert_3(str, pos, x)                    \
    _Generic((pos),                                     \
        PVT_GENERIC_SIZE_TYPES(_Generic((x),            \
            PVT_GENERIC_STRING_TYPES(intl_insert_str),  \
            default: dummy_func                         \
        )),                                             \
        cstr_iterator : intl_insert_iter_ch,            \
        cstr_const_iterator : intl_insert_citer_ch      \
    )(str, pos, x)

#define __cstr_insert_4(str, pos, a, b)                     \
    _Generic((pos),                                         \
        PVT_GENERIC_SIZE_TYPES(_Generic((b),                \
            PVT_GENERIC_CHAR_TYPES(intl_insert_cnt_ch),     \
            PVT_GENERIC_STRING_TYPES(intl_insert_str_range),\
            default: dummy_func                             \
        )),                                                 \
        cstr_iterator : intl_insert_iter_cnt_ch,            \
        cstr_const_iterator : intl_insert_citer_cnt_ch      \
    )(str, pos, a, b)

#define __cstr_insert_5(str, index, other, pos, count)      \
    intl_insert_substr(str, index, other, pos, count)



/*
 *  Functions to insert characters
 * -------------------------------
 */

cstring __cstr__unused
intl_insert_cnt_ch(cstring *str, const size_t index, 
                    const size_t count, const char ch);

cstring __cstr__unused
intl_insert_str_range(cstring *str, const size_t index, 
                    const size_t count, const_cstring other);

static inline cstring __cstr__unused
intl_insert_str(cstring *str, const size_t index, const_cstring other)
{
    return intl_insert_str_range(str, index,
                    pvt_cstr_strlen(other), other);
}

static inline cstring __cstr__unused
intl_insert_substr(cstring *str, const size_t index, 
                   const_cstring other, const size_t pos,
                   const size_t count)
{
    return intl_insert_str(str, index, cstr_substr(other, pos, count));
}



cstr_iterator __cstr__unused
intl_insert_iter_cnt_ch(cstring *str, cstr_iterator pos, 
                    const size_t count, const char ch);

static inline cstr_iterator __cstr__unused
intl_insert_citer_cnt_ch(cstring *str, cstr_const_iterator pos,
                    const size_t count, const char ch)
{
    return intl_insert_iter_cnt_ch(str, 
                    (cstr_iterator){ (cstring)pos.it }, count, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_iter_ch(cstring *str, cstr_iterator pos, const char ch)
{
    return intl_insert_iter_cnt_ch(str, pos, 1, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_citer_ch(cstring *str, cstr_const_iterator pos, 
                    const char ch)
{
    return intl_insert_citer_cnt_ch(str, pos, 1, ch);
}

Is it considered a good design?

Also I have a helper functions for find in the library and i thought of the find function to accept a strategy parameter at the end so the user can choose the search algorithm in the macro itself, instead of having separate functions for each one. But the problem is implementation. Does anyone have any suggestions?

below is the macro for cstr_find:

/**
 *  Generic replace function for cstring.
 * 
 *  cstr_find(str, ch)                  -> find first character ch in str
 *  cstr_find(str, other)               -> find first substring equal to other in str
 *  cstr_find(str, ch, pos)             -> find first character ch in (str + pos)
 *  cstr_find(str, other, pos)          -> find first substring equal to other in (str + pos)
 *  cstr_find(str, other, pos, count)   -> find first substring equal to other in str[pos, pos+count)
 *  
 *  returns `size_t`
 */
#define cstr_find(...) \
    __cstr_find_chooser(__VA_ARGS__)(__VA_ARGS__)


/*
 *      FIND
 * ---------
 */
#define __cstr_find_get_macro(_1,_2,_3,_4,NAME,...) NAME

#define __cstr_find_chooser(...)    \
    __cstr_find_get_macro(__VA_ARGS__,  \
        intl_find_str_range,            \
        __cstr_find_3,                  \
        __cstr_find_2)
    

#define __cstr_find_2(str, _2)                      \
    _Generic( (_2),                                 \
        PVT_GENERIC_STRING_TYPES(intl_find_str),    \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch),       \
        default : dummy_func                        \
    )(str, _2)

#define __cstr_find_3(str, _2, pos)                     \
    _Generic( (_2),                                     \
        PVT_GENERIC_STRING_TYPES(intl_find_str_offset), \
        PVT_GENERIC_CHAR_TYPES(intl_find_ch_offset),    \
        default : dummy_func                            \
    )(str, _2, pos)
Post Closed as "Not suitable for this site" by Toby Speight, Chris, coderodde
Source Link

A string library in C

I've been working on a string library in C with C++ standard features of std::basic_string<char> just for learning purposes. It's called cstring and the full source is here.

The basic idea is storing metadata in a struct right behind the character buffer, so a cstring is just a char* underneath and works with any standard C function that takes const char*.

typedef struct {
    size_t size;
    size_t capacity;
    bool is_malloced;
} pvt_cstr_metadata_t;

// access metadata from pointer
#define pvt_dat_to_mdata_(ptr) \
    (((pvt_cstr_metadata_t *)(ptr)) - 1)

I also use _Generic so you can call the same function name with different arguments like you would in C++, so instead of having like cstr_init_from_string, cstr_init_with_size etc you just call cstr_init and it figures out what you mean.

I have some specific things I'd love feedback on

right now a lot of the intl_* implementation functions are static inline in the header which I know causes code bloat per translation unit. Is moving them to .c with just declarations in the header the right fix?

Any obvious bugs or safety issues I might have missed.

The library is ASCII/byte-based only — no Unicode support, that's intentional for now.

I'm still pretty new to this so any feedback is welcome. I mainly want to know if the overall design makes sense or if I'm doing something obviously wrong.