2
\$\begingroup\$

I've been working on a string library in C with C++ standard features of std::basic_string<char> just for learning purposes. It's called cstring and the full source is here.

The basic idea is storing metadata in a struct right behind the character buffer, so a cstring is just a char* underneath and works with any standard C function that takes const char*.

typedef struct {
    size_t size;
    size_t capacity;
    bool is_malloced;
} pvt_cstr_metadata_t;

// access metadata from pointer
#define pvt_dat_to_mdata_(ptr) \
    (((pvt_cstr_metadata_t *)(ptr)) - 1)

Here is a small example that shows how to create, use and destroy a cstring:

#include "cstring.h"

int main(void) {
    // create a cstring from a string literal
    cstring str = cstr_init("hello");
    cstr_println(str);

    // append to it
    cstr_append(&str, " world");
    cstr_println(str);

    // size and capacity
    printf("size: %zu\n", cstr_size(str));
    printf("cap:  %zu\n", cstr_capacity(str));

    // insert at index
    cstr_insert(&str, 5, ",");
    cstr_println(str);

    // find a character
    size_t pos = cstr_find(str, 'w');
    printf("'w' at index: %zu\n", pos);

    // substring
    cstring sub = cstr_substr(str, 0, 5);
    cstr_println(sub);
    cstr_free(&sub);

    // free the string
    cstr_free(&str);
    return 0;
}

Every function uses pvt_buf_grow_ to allocate or resize the buffer. In cstring.c:

static cstring pvt_buf_grow_(cstring ptr, size_t count)
{
    size_t cap_001_ = (ptr) ?
                        CSTRMAX(count, pvt_total_cap_(ptr) << 1) :
                        CSTRMAX(count, CSTRING_DEFAULT_CAP);
    cap_001_ = CSTRMIN(cap_001_, cstr_max_size());
    size_t tmp_size_001_ = ((cap_001_ + 1) * sizeof(*ptr))
                            + sizeof(pvt_cstr_metadata_t);
    pvt_cstr_metadata_t *base_ptr_001_;

    if(ptr){
        pvt_cstr_assert(pvt_is_malloced_(ptr),
                     "Literals(const char[]) are not modifiable");
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_realloc(
                                pvt_dat_to_mdata_(ptr), tmp_size_001_);
    }
    else {
        base_ptr_001_ = (pvt_cstr_metadata_t*)pvt_cstr_malloc(tmp_size_001_);
    }

    pvt_cstr_assert(base_ptr_001_ != NULL, "allocation failed!!");
    
    base_ptr_001_->is_malloced = true;
    base_ptr_001_->capacity    = (size_t)(cap_001_);
    if(!ptr) base_ptr_001_->size = 0;

    return (cstring)(base_ptr_001_ + 1);  
}

/*
 *  Functions to multiple args Constructors
 *  ---------------------------------------
 */

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_ch(const size_t cnt, const char ch)
{
    cstring str_ = NULL;
    if(!cnt){
        str_ = pvt_buf_grow_(NULL, CSTRING_DEFAULT_CAP);
        str_[0] = (char)0;
        pvt_set_total_size_(str_, 0);
    }
    intl_assign_cnt_ch(&str_, cnt, ch);
    
    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_copy_constructor(const_cstring other)
{
    size_t size_ = pvt_cstr_strlen(other);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);
    
    pvt_copy_(str_, other, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_iter(cstr_iterator begin, 
                         cstr_iterator end)
{
    pvt_cstr_assert((begin.it < end.it), 
    "Unsupported arguments passed for Constructor");

    size_t size_ = cstr_distance(begin, end);
    cstring str_ = pvt_buf_grow_(NULL, size_ + 1);

    pvt_copy_(str_, begin.it, size_);

    return str_;
}

CSTR_NODISCARD  __attribute__((malloc))
cstring __cstr__unused
intl_init_cpy_str_w_off(const_cstring other,
                        const size_t start, const size_t offset) 
{
    size_t size_  = pvt_cstr_strlen(other),
           count_ = CSTRMIN(offset, size_ - start);
    pvt_cstr_assert(other != NULL && (start < size_), 
                    "Unsupported arguments passed for Constructor");

    cstring str_ = pvt_buf_grow_(NULL, count_ + 1);
    pvt_copy_(str_, other + start, count_);
    
    return str_;
}

I also use _Generic so you can call the same function name with different arguments like you would in C++, so instead of having like cstr_init_from_string, cstr_init_with_size etc you just call cstr_init and it figures out what you mean.

I have some specific things I'd love feedback on

right now a lot of the intl_* implementation functions are static inline in the header which I know causes code per translation unit.Is it a bloat?

#define cstr_insert(...) \
    __cstr_insert_chooser(__VA_ARGS__)(__VA_ARGS__)


/*      INSERT
 *------------
 */

#define __cstr_insert_get_macro(_1,_2,_3,_4,_5,NAME,...) NAME

#define __cstr_insert_chooser(...)      \
    __cstr_insert_get_macro(__VA_ARGS__,\
        __cstr_insert_5,                \
        __cstr_insert_4,                \
        __cstr_insert_3)

#define __cstr_insert_3(str, pos, x)                    \
    _Generic((pos),                                     \
        PVT_GENERIC_SIZE_TYPES(_Generic((x),            \
            PVT_GENERIC_STRING_TYPES(intl_insert_str),  \
            default: dummy_func                         \
        )),                                             \
        cstr_iterator : intl_insert_iter_ch,            \
        cstr_const_iterator : intl_insert_citer_ch      \
    )(str, pos, x)

#define __cstr_insert_4(str, pos, a, b)                     \
    _Generic((pos),                                         \
        PVT_GENERIC_SIZE_TYPES(_Generic((b),                \
            PVT_GENERIC_CHAR_TYPES(intl_insert_cnt_ch),     \
            PVT_GENERIC_STRING_TYPES(intl_insert_str_range),\
            default: dummy_func                             \
        )),                                                 \
        cstr_iterator : intl_insert_iter_cnt_ch,            \
        cstr_const_iterator : intl_insert_citer_cnt_ch      \
    )(str, pos, a, b)

#define __cstr_insert_5(str, index, other, pos, count)      \
    intl_insert_substr(str, index, other, pos, count)



/*
 *  Functions to insert characters
 * -------------------------------
 */

cstring __cstr__unused
intl_insert_cnt_ch(cstring *str, const size_t index, 
                    const size_t count, const char ch);

cstring __cstr__unused
intl_insert_str_range(cstring *str, const size_t index, 
                    const size_t count, const_cstring other);

static inline cstring __cstr__unused
intl_insert_str(cstring *str, const size_t index, const_cstring other)
{
    return intl_insert_str_range(str, index,
                    pvt_cstr_strlen(other), other);
}

static inline cstring __cstr__unused
intl_insert_substr(cstring *str, const size_t index, 
                   const_cstring other, const size_t pos,
                   const size_t count)
{
    return intl_insert_str(str, index, cstr_substr(other, pos, count));
}



cstr_iterator __cstr__unused
intl_insert_iter_cnt_ch(cstring *str, cstr_iterator pos, 
                    const size_t count, const char ch);

static inline cstr_iterator __cstr__unused
intl_insert_citer_cnt_ch(cstring *str, cstr_const_iterator pos,
                    const size_t count, const char ch)
{
    return intl_insert_iter_cnt_ch(str, 
                    (cstr_iterator){ (cstring)pos.it }, count, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_iter_ch(cstring *str, cstr_iterator pos, const char ch)
{
    return intl_insert_iter_cnt_ch(str, pos, 1, ch);
}

static inline cstr_iterator __cstr__unused
intl_insert_citer_ch(cstring *str, cstr_const_iterator pos, 
                    const char ch)
{
    return intl_insert_citer_cnt_ch(str, pos, 1, ch);
}

Is it considered a good design?

The library is ASCII/byte-based only — no Unicode support, that's intentional for now.

I'm still pretty new to this so any feedback is welcome. I mainly want to know if the overall design makes sense or if I'm doing something obviously wrong.

New contributor
Abhiram is a new contributor to this site. Take care in asking for clarification, commenting, and answering. Check out our Code of Conduct.
\$\endgroup\$
5
  • \$\begingroup\$ Thanks for adding the code to the question. That's better, but do note that your request for help implementing selectable search strategy is off-topic - we can review your code for problems, but asking for help with changing the functionality is specifically off-topic here - help center. \$\endgroup\$ Commented 18 hours ago
  • \$\begingroup\$ I think you should probably include the code to create and destroy your string objects - without those, it's quite hard to understand how it works and is expected to be used. An example program that exercises the code under review would be extremely helpful to reviewers - I expect I'd vote to reopen if you can include that (sufficiently complete that I can compile and run in). \$\endgroup\$ Commented 17 hours ago
  • 1
    \$\begingroup\$ BTW, names containing two or more consecutive underscores are reserved for the implementation for any purpose, including as macros, so many of your identifiers are unsafe in that respect. \$\endgroup\$ Commented 17 hours ago
  • 2
    \$\begingroup\$ reinventing-the-wheel? \$\endgroup\$ Commented 15 hours ago
  • 1
    \$\begingroup\$ I don't think so @Tobias, since C doesn't have a counted-character string type. If it was C++, then I'd agree. \$\endgroup\$ Commented 2 hours ago

1 Answer 1

3
\$\begingroup\$

Browsing your code, I can't help but wonder if this is too specific. You've said you haven't done anything with unicode yet, and that's on purpose, but... if this implemented just a general byte memory buffer with attached metadata, and absolutely zero conception of null-terminated ASCII strings, then you'd have a better foundation to build both the functionality shown and support for something like UTF-8 on top of that.

As it stands, I suspect a lot of your work will have to be duplicated if you decide you want to handle unicode.

\$\endgroup\$
2
  • \$\begingroup\$ would it be better to add UTF-8 support in the same header with separate functions or just put it in a separate header on top of the core buffer. \$\endgroup\$ Commented 6 hours ago
  • \$\begingroup\$ I would go with a separate header. \$\endgroup\$ Commented 5 hours ago

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.