5
\$\begingroup\$

I have tried to implement my Binary Search Tree in C. The following are the 18 operations defined:

  • create a bst
  • is_empty
  • insert
  • delete
  • clear
  • find
  • find_min
  • find_max
  • height
  • size
  • depth
  • level order traversal
  • preorder traversal
  • inorder traversal
  • postorder traversal
  • inorder successor
  • is_bst (is the tree a binary search tree)
  • is_bst_balanced (is the binary search tree balanced)

This was an important phase in my coding skills to understand what recursion really is. I usually have a table of returns and a recursive call stack both drawn to track the running of recursion, and that helped immensely to grasp the background work of recursion. If you find any improvement to be said about some recursive functions in this BST implementation, I would be grateful to read them through.

This is the entire code. I have included my Queue because I needed for the level_order function.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define max(x, y) (((x) > (y)) ? (x) : (y))

typedef struct Node {
    int data;
    struct Node* left;
    struct Node* right;
} Node;

//---------------Queue-----------------
typedef struct QNode {
    Node* data;
    struct QNode* next;
} QNode;

typedef struct Queue {
    int size;
    QNode* head;
    QNode* tail;
} Queue;

const Queue queue_init = { .size = 0, .head = NULL, .tail = NULL };

QNode* create_node(Node* elm) {
    QNode* node = malloc(sizeof * node);
    if (!node) return node;
    node->data = elm;
    node->next = NULL;
    return node;
}

int is_empty_q(Queue *q) {
    return q->tail == NULL;
}

QNode* tail_prev(Queue *q) {
    QNode* node = q->head, *prev = NULL;
    while (node->next) {
        prev = node;
        node = node->next;
    }
    return prev;
}

void enqueue(Queue *q, Node* elm) {
    QNode* updated_head = create_node(elm);
    if (!q->head) {
        q->head = updated_head;
        q->tail = q->head;
    }
    else {
        updated_head->next = q->head;
        q->head = updated_head;
    }
    q->size++;
}

Node* dequeue(Queue *q) {
    if (!is_empty_q(q)) {
        QNode* node = q->tail;
        Node* elm = q->tail->data;
        q->tail = tail_prev(q);
        if (q->tail) {
            q->tail->next = NULL;
        }
        else {
            q->head = NULL;
        }
        free(node);
        q->size--;
        return elm;
    }
    return NULL; 
}

Node* front(Queue *q) {
    Node* front;
    if (q->tail)
        front = q->tail->data;
    else
        front = NULL;
    return front;
}

void clear_q(Queue *q) {
    while (q->tail)
        dequeue(q);
    printf("Queue Cleared");
}

//---------------BST------------------
typedef struct BST {
    Node* root;
} BST;
 
const BST bst_init = { .root = NULL };

Node* create_node_bst(int elm) {
    Node* node = malloc(sizeof * node);
    if (!node) return node;
    node->data = elm;
    node->left = node->right = NULL;
    return node;
}

BST* create_bst() {
    BST* bst = malloc(sizeof * bst);
    if (!bst) return bst;
    bst->root = NULL;
    return bst;
}

int is_empty(Node* root) {
    return root == NULL;
}

Node* insert(Node* root, int elm) { // -V
    if (!root) {
        root = create_node_bst(elm);
    }
    else if (elm <= root->data) {
        root->left = insert(root->left, elm);
    }
    else {
        root->right = insert(root->right, elm);
    }
    return root;
}

Node* find(Node* root, int elm) { // -V
    if (!is_empty(root)) {
        if (!root) {
            root = NULL;
        }
        else if (root->data == elm) {
            root = root;
        }
        else if (elm <= root->data) {
            root = find(root->left, elm);
        }
        else {
            root = find(root->right, elm);
        }
        return root;
    }
    else
        return root;
}

Node* find_max(Node* root) { //-V
    if (!is_empty(root)) {
        if (root->right == NULL)
            return root;
        else {
            return find_max(root->right);
        }
    }
    else
        return root;
}

Node* find_min(Node* root) { //-V
    if (!is_empty(root)) {
        if (!root->left)
            return root;
        else {
            return find_min(root->left);
        }
    }
    else
        return root;
}

int height(Node* root) { 
    if (root == NULL) {
        return -1; // 0 if heighe is number of edges, or -1 if height=number of edges
    }
    int left_height = height(root->left);
    int right_height = height(root->right);
    return max(left_height, right_height) + 1;
}

int depth(Node* root, int elm) {
    if (root->data == elm) {
        return 0;
    }
    else if (elm < root->data) {
        return depth(root->left, elm) + 1;
    }
    else {
        return depth(root->right, elm) + 1;
    }
}

Node* delete(Node* root, int elm) { 
    if (root == NULL)
        return root;
    else if (elm > root->data)
        root->right = delete(root->right, elm);
    else if (elm < root->data)
        root->left = delete(root->left, elm);
    else { // elm found
        if (root->left == NULL && root->right == NULL) {
            free(root);
            root = NULL;
        }
        else if (root->left == NULL) {
            Node* temp = root;
            root = root->right;
            free(temp);            
        }
        else if (root->right == NULL) {
            Node* temp = root;
            root = root->left;
            free(temp);
        }
        else { //this case is done until it is reduced to one of the previous three cases
            Node* temp = find_min(root->right);
            root->data = temp->data;
            root->right = delete(root->right, elm);
        }
    }
    return root;
}

int is_bst(Node* root, int min, int max) { // solution 1
    if (root == NULL) {
        return 1;
    }
    else if (root->data < max && root->data > min && is_bst(root->left, min, root->data) && is_bst(root->right, root->data, max))
        return 1;
    else
        return 0; 
} // solution 2, traverse inorder and check if the list is sorted

int is_bst_balanced(Node* root) {
    int is_balanced = 1;
    int left_height = height(root->left);
    int right_height = height(root->right);
    if (abs(right_height - left_height) > 1)
        is_balanced = 0;
    return is_balanced;
}

int size(Node* root) {
    if (!root)
        return 0;
    int left_size = size(root->left);
    int right_size = size(root->right);
    return left_size + right_size + 1; // + 1 is for the ancesstor
}

void level_order(Node* root) { // visit all children before grand children
    if (!is_empty(root)) {
        Queue *q = malloc(sizeof *q);
        if (q) {
            *q = queue_init;
            enqueue(q, root);
            while (!is_empty_q(q)) {
                Node* cur = front(q);
                printf("%d ", cur->data);
                if (cur->left != NULL)
                    enqueue(q, cur->left);
                if (cur->right != NULL)
                    enqueue(q, cur->right);
                dequeue(q);
            }
        }
    }
}

void pre_order(Node* root) { //D<root>L<left>R<right> -- preorder (of root)
    if (root) {
        printf("%d ", root->data);
        pre_order(root->left);
        pre_order(root->right);
    }
}

void in_order(Node* root) { //L<left>D<root>R<right> -- inorder -- gives sorted list
    if (root) {
        in_order(root->left);
        printf("%d ", root->data);
        in_order(root->right);
    }
}

Node* in_order_suc(Node* root, int data) { 
    Node* cur = find(root, data);
    if (!cur) 
        return cur;
    if (cur->right != NULL) { //case 1: node has sub tree
        return find_min(cur->right);
    }
    else { //case 2: no right sub tree
        Node* suc = NULL, *prev = root;
        while (prev != cur) {
            if (cur->data < prev->data) {
                suc = prev;
                prev = prev->left;
            }
            else {
                prev = prev->right;
            }
        }
        return suc;
    }
}

void post_order(Node* root) { //L<left>R<right>R<root> -- postorder
    if (root) {
        post_order(root->left);
        post_order(root->right);
        printf("%d ", root->data);
    }
}

Node* clear(Node* root) {
    while (root) {
        root = delete(root, root->data);
    }
    return root;
}

int main() {
    #define MAX 8
    int n = MAX;
    BST bst1 = bst_init;
    Node* bst1_root = bst1.root;
    int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18};
    if (!arr) return 0;
    for (int i = 0; i < n; i++)
        bst1_root = insert(bst1_root, arr[i]);
    
    printf("height: %d \n", height(bst1_root));
    printf("size: %d \n", size(bst1_root));
    printf("depth of 13: %d \n", depth(bst1_root, 18));
    printf("is_bst: %d \n", is_bst(bst1_root, -1000, 1000)); //assuming -1000 < data(bst) < 1000
    printf("is_bst_balanced: %d \n", is_bst_balanced(bst1_root));
    printf("min: %d \n", find_min(bst1_root)->data);
    printf("max: %d \n", find_max(bst1_root)->data);
    printf("element: %d found \n", find(bst1_root, 19)->data);
    printf("level order ");
    level_order(bst1_root);
    printf("\n");
    printf("preorder order ");
    pre_order(bst1_root);
    printf("\n");
    printf("inorder order ");
    in_order(bst1_root);
    printf("\n");
    printf("postorder order ");
    post_order(bst1_root);
    printf("\n");
    printf("inorder successor of 9 is %d \n", in_order_suc(bst1_root, 9)->data);
    bst1_root = delete(bst1_root, 18);
    printf("in order ");
    in_order(bst1_root);
    printf("\n");
    bst1_root = insert(bst1_root, 18);
    printf("in order ");
    in_order(bst1_root);
    bst1_root = clear(bst1_root);
    printf("\n");
    if (!bst1_root)
        printf("BST Cleared!");
    return 0;
}
\$\endgroup\$
14
  • \$\begingroup\$ I like your typedef of BST. Why isn't it used in what interface there is? \$\endgroup\$ Commented Mar 2, 2023 at 8:31
  • 2
    \$\begingroup\$ BST doesn't allow duplicates, no? I don't know any authoritative definition of BST: up to you. You don't need/want duplicates in sets. When there is ("payload") data associated with keys, duplicates may be essential. \$\endgroup\$ Commented Mar 2, 2023 at 9:04
  • 3
    \$\begingroup\$ is enough to … write tests, have them executed automatically. This is not chat. \$\endgroup\$ Commented Mar 2, 2023 at 9:33
  • 2
    \$\begingroup\$ int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18}; if (!arr) return 0; ===> doesn't make sense. The address of arr will always evaluate as true. \$\endgroup\$ Commented Mar 2, 2023 at 12:07
  • 4
    \$\begingroup\$ You have a memory leak in main(). You did not free() the memory allocated by level_order(). \$\endgroup\$ Commented Mar 2, 2023 at 12:10

2 Answers 2

5
\$\begingroup\$

Fix compilation warnings

If your compiler isn't telling you about these, you haven't enabled enough warnings:

BST* create_bst(void) {
              /*🔺🔺🔺*/
int main(void) {
       /*🔺🔺🔺*/
    int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18};
    //if (!arr) return 0;
    assert(arr);  /* local array cannot be a null pointer */

Fix the leak

Valgrind identifies memory we haven't freed:

==3037411== HEAP SUMMARY:
==3037411==     in use at exit: 24 bytes in 1 blocks
==3037411==   total heap usage: 19 allocs, 18 frees, 1,392 bytes allocated
==3037411== 
==3037411== 24 bytes in 1 blocks are definitely lost in loss record 1 of 1
==3037411==    at 0x48407B4: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==3037411==    by 0x109903: level_order (283676.c:258)
==3037411==    by 0x109D81: main (283676.c:347)

Prefer functions to macros

#define max(x, y) (((x) > (y)) ? (x) : (y))

This is a dangerous macro, as it expands arguments multiple times, causing doubled side-effects. Prefer to write a function for each type it's used with, or simply inline it since it's used exactly once in this program.


Clients shouldn't need to know about nodes

Node and QNode are implementation details that shouldn't be in the public (non-static) interface. They should be created and deleted automatically by the public function.

We'd find it much easier to follow if the code at least foresaw separate compilation and put the "header" content ahead of implementation.


Handle memory failure gracefully

We have a good check in create_node():

    QNode* node = malloc(sizeof * node);
    if (!node) return node;

However, when we call it, we fail to account for the fact it can return a null pointer:

    QNode* updated_head = create_node(elm);  /* possibly null */
    if (!q->head) {
        ⋮
    }
    else {
        updated_head->next = q->head;  /* BANG! */
        ⋮
    }

This is something you have been told about in a previous review, but seem not to have learnt from.

Similarly, the example code in main() doesn't show us using the return value from insert() to determine whether the operation was successful.


The queue seems backwards

It's naturally quite easy to add elements to the tail of a singly-linked list, and to remove them from the head. Doing it the other way around means that every dequeue() calls tail->prev to walk the entire length of the list. That's obviously much less efficient.


Tree operations don't have to be recursive

We seem to have the beginnings of iterative operation (assigning to root rather than just performing a tail-call) in insert() and find(), but have failed to follow up on that.


Traversal functions are inflexible

level_order(), pre_order(), in_order() and post_order() just print the values encountered. Traversal functions should take a function pointer to permit other actions. We usually also accept a void* which the function can use as state while it operates:

void in_order(Node* root, void(*func)(Node*,void*), void *func_data);

Think about modifiability

Functions that I'd expect to accept a const tree, such as find(), have mutable arguments. A good interface is much more explicit about which operations will modify the tree and which will not.


Tests should be self-checking

The demo program is interesting, but it would be much more useful if it actually tested the functionality and returned appropriate exit status for success or failure.

\$\endgroup\$
4
  • \$\begingroup\$ Thanks, the point about the inflexibility of traversal function is very true. I already fixed that memory leak. \$\endgroup\$ Commented Mar 3, 2023 at 11:55
  • 2
    \$\begingroup\$ Toby, what standard and warnings are used that complain about BST* create_bst()? \$\endgroup\$ Commented Mar 3, 2023 at 20:23
  • 3
    \$\begingroup\$ Toby, nice answer, yet too bad OP accepted it so soon. So many more review points exists. \$\endgroup\$ Commented Mar 3, 2023 at 20:49
  • 1
    \$\begingroup\$ @chux, in my case the specific warning for both of those was gcc -std=c17 -Wstrict-prototypes. \$\endgroup\$ Commented Mar 8, 2023 at 15:00
2
\$\begingroup\$

Style

You mix braces and not using braces freely throughout your code. While this works, it leaves open the possibility of adding to branches of conditionals or to loops and forgetting to add braces, giving those bits of code unexpected behavior. To avoid this, use braces consistently.

You use int for boolean values and use 1 and 0 to represent true and false. You get points for not reinventing the wheel, but lose points for not using that wheel: stdbool.h.

In some places you use early returns to simplify code, but not in others, like the following.

Node* find(Node* root, int elm) { // -V
    if (!is_empty(root)) {
        if (!root) {
            root = NULL;
        }
        else if (root->data == elm) {
            root = root;
        }
        else if (elm <= root->data) {
            root = find(root->left, elm);
        }
        else {
            root = find(root->right, elm);
        }
        return root;
    }
    else
        return root;
}

This would be simpler by reversing the test.

Node* find(Node* root, int elm) { // -V
    if (is_empty(root)) {
        return root;
    } 

    if (!root) {
        root = NULL;
    }
    else if (root->data == elm) {
        root = root;
    }
    else if (elm <= root->data) {
        root = find(root->left, elm);
    }
    else {
        root = find(root->right, elm);
    }

    return root;
}

But really, is_empty just tests if a Node * is NULL, so your code has a redundancy, since if we skip that early return, we know root is not NULL.

Node* find(Node* root, int elm) { // -V
    if (is_empty(root)) {
        return root;
    }

    if (root->data == elm) {
        root = root;
    }
    else if (elm <= root->data) {
        root = find(root->left, elm);
    }
    else {
        root = find(root->right, elm);
    }

    return root;
}

And given that we always return root, this is not an unreasonable place to use the ternary expression.

Node* find(Node* root, int elm) { // -V
    if (is_empty(root)) {
        return root;
    }

    return root->data == elm ? root :
           elm <= root->data ? find(root->left, elm) :
                               find(root->right, elm);
}

Fundamental binary search tree issues

Calculating the height of a binary search tree is correct, but wasteful. This data should be stored as a member of the struct, enabling O(1) access to it, rather than O(n).

Your test to determine whether a binary search tree is balanced is incorrect. In order for a binary search tree to be balanced, all of its nodes must represent balanced binary search trees.

The following represents a binary search tree:

      8
     / \
    /   \
   /     \
  4      10
 /         \
2          12
             \
             14

This binary search tree has a left height of 2 and a right height of 3. It therefore passes your test to determine if it's balanced, but it isn't, because the right branch is not balanced.

Fortunately the change to your function is straightforward. You need recursive checks that both the left and right trees are balanced.

Please note also that you don't guard against the Node * being NULL. The accesses of the left and right members would lead to undefined behavior if root is NULL.

And you can pass the argument as a const Node *.

And you fall into the pattern of testing a boolean value and then using that to explicitly return a boolean value, rather than simply returning the boolean you were testing.

bool is_bst_balanced(const Node* root) {
    if (!root) {
        return true;
    }

    int left_height = height(root->left);
    int right_height = height(root->right);

    return abs(right_height - left_height) > 1 &&
           is_balanced(left) && is_balanced(right);
}

You have a test for whether the tree is balanced or not, but you haven't implemented any functionality to achieve that balance in the form of left and right rotations. This is a crucial function of a binary search tree. Without it your trees stand to become unbalanced and less efficient.

Your test case of {15, 10, 20, 9, 13, 19, 22, 18} ends up looking like:

             15
            /  \
           /    \
          /      \
         10      18
        /  \       \
       9    13     20
                  /  \
                 19  22

When proper balancing means rotating the right branch and ending up with:

             15
            /  \
           /    \
          /      \
         10      19
        /  \    /  \
       9    13 18  20
                     \
                     22
\$\endgroup\$

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.