Skip to main content
2 of 3
fixed grammar & added beginner and performance tags

3D Connected Component in Cython

Below code is my implementation of 3D connected component algorithm, which I use for a 255x512x512 binary matrix. Although it is written in Cython, it still takes quite some time both for 6-neighbors and 26-neighbors. Do you see any inefficiency in the algorithm? I would really appreciate if you could give me any kind of feedback regarding efficiency, readability and maintainability. I am new to programming.

This is my connectedComponent3D.pyx file

import numpy as np
cimport numpy as np
cimport cython
from cpython cimport array
import array

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
def main(np.ndarray[np.int_t, ndim=3] arr):
    """
    Input arguments: arr -> The array which connected component algo. is applied 
                            type: 3D numpy array dtype=np.int
    Returns: numberOfComponents -> Number of distinct components in the input array -> int
             labels -> It has the same dims with input array. It contains the labels of each 
                       element inside the input array(label 0 is background)
                       
    equivalencydict -> Contains which component label is connected to which component label -> dict
    """
    cdef np.ndarray[np.int_t, ndim=3] labels = np.zeros_like(arr, dtype=np.int)
    cdef dict equivalencydict = {}
    cdef int numberOfComponents
    
    labels, equivalencydict = getConnectedComponents(arr, equivalencydict)
    labels = applyEquivalencyDictToLabels(labels, equivalencydict)
    numberOfComponents = len(np.unique(labels)) - 1
    return [numberOfComponents, labels]

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef getConnectedComponents(np.ndarray[np.int_t, ndim=3] arr, dict equivalencydict): 
    """
    Input arguments: arr -> The array which connected component is applied -> 3D numpy array 
                            dtype=np.int
                     equivalencydict -> Contains which component label is connected to which 
                                         component label type: dict
    Returns: labels -> It has the same dims with input array. It contains the labels of each 
                       element inside the input array(label 0 is background)
             equivalencydict 
    
    
    For each element in the input array, if the element is True on the input array, the function
    checks the labeled neighbors of that element. If there is no labeled neighbor, the element
    gets a new label buy incrementing currentComp by 1. If there are labeled
    neighbors, then the minimum of the neighbor labels is assigned to the
    element.
    """
    cdef np.ndarray[np.int_t, ndim=3] labels = np.zeros_like(arr, dtype=np.int)
    cdef int currentComp = 0
    cdef int z
    cdef int y
    cdef int x
    cdef int zmax = arr.shape[0]
    cdef int ymax = arr.shape[1]
    cdef int xmax = arr.shape[2]
    cdef array.array neighborLabels = array.array('i', [])
    
    #Iterate over each element of arr
    for z in range(1, zmax - 1):
        print(z)
        for y in range(ymax):
            for x in range(xmax):
                if arr[z, y, x] == 0:
                    continue
                else:
                    neighborLabels = getNeighborLabels(arr, labels, (z, y, x))
                    labels, equivalencydict, currentComp = labeling(labels, neighborLabels, currentComp, 
                                                                    equivalencydict, z, y, x)
    return labels, equivalencydict

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef getNeighborLabels(np.ndarray[np.int_t, ndim=3] arr, np.ndarray[np.int_t, ndim=3] labels, 
                       tuple centerIndex):
    """
    Input Arguments: centerIndex -> The index of the element which its neighbors are going to be searched
    Returns: neighborLabels -> It contains the labels of suitable neighbors
    (Neighbors which are true on the input array and are not labeled before during runtime are considered
    as suitable)
    
    neighbors: This is 2D numpy array which contains all the 26 neighbors around the element which is
               located at centerIndex(z, y, x)
    neighborsLabels: This is array which contains the labels of neighbors if there are any suitable
                     neighbors (Due to performance considerations, array is used instead of list)
    
    If a neighbor is True on the input array and if it has been labeled before during runtime
    (Otherwise it would be 0), then add the label of this neighbor to neighborLabels array.
    
    """
    cdef np.ndarray[np.int_t, ndim=2] neighbors = get26Neighbors(centerIndex)
    cdef array.array neighborLabels = array.array('i', [])
    cdef np.ndarray[np.int_t, ndim=1] neighbor_index
    
    for neighbor_index in neighbors:
        if arr[tuple(neighbor_index)] == 1 and labels[tuple(neighbor_index)] != 0:
            neighborLabels.append(labels[tuple(neighbor_index)])
    return neighborLabels

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef applyEquivalencyDictToLabels(np.ndarray[np.int_t, ndim=3] labels, dict equivalencydict):
    """
    The equivalencydict is a sorted dictionary. By iterating over it in reverse direction, 
    the equivalencydict is applied to labels array.
    """
    cdef tuple item
    
    for item in sorted(list(equivalencydict.items()), key=lambda x:x[0], reverse=True):
        print(item)
        labels[labels == item[0]] = item[1]
    return labels

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef get6Neighbors(tuple index):
    """
    Input arguments: index -> This is the index(z, y, x) of element whose neighbors are need to be
    calculated. type: tuple
    Returns: neighbors -> indices of 6-neighbors 
    
    This function calculates all 6 neighbors of an element in 3D space. 
    In order to see what a 6-neighbors is check the 29/38 slide in below link. Left figure is 6-n and
    right one is 26-neighbors.
    Link: http://slideplayer.com/slide/8645709/
    """
    cdef np.ndarray[np.int_t, ndim=2] neighbors = np.array([[index[0], index[1]-1, index[2]],
                                                           [index[0], index[1]+1, index[2]],
                                                           [index[0], index[1], index[2]-1], 
                                                           [index[0], index[1], index[2]+1],
                                                           [index[0]-1, index[1], index[2]], 
                                                           [index[0]+1, index[1], index[2]]], dtype=np.int)
    return np.resize(neighbors, (6,3))

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef labeling(np.ndarray[np.int_t, ndim=3] labels, array.array neighborLabels, int currentComp,  
              dict equivalencydict, int z, int y, int x):
    """
    This function assigns the appropriate label to element(with indices z, y, x which are passed in as
    input arguments).
    If there is no suitable neighbor around the element(z, y, x) then a new component is created and
    assigned to the element. Else, the minimum of the neigbors' labels around the element is 
    assigned to the element.
    """
    if len(neighborLabels) == 0:
        currentComp += 1
        labels[z, y, x] = currentComp
    else:
        labels[z, y, x] = np.amin(neighborLabels)
        equivalencydict = addLabelsToEquivalencyDict(labels, neighborLabels,  
                                                     equivalencydict, z, y, x)
    return labels, equivalencydict, currentComp

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef addLabelsToEquivalencyDict(np.ndarray[np.int_t, ndim=3] labels, array.array neighborLabels,  
                                dict equivalencydict, int z, int y, int x):
    """
    This function creates the spatial relationship between the neighbors of an element(z, y, x).
    The neighbors of an element are also connected to each other but they may not have the same
    component due to the complexity of the objects in the input array. Equivalencydict dictionary
    saves the information of which label is connected to which label. This equivalencydict is then
    applied to labels at the end of the main function.
    """
    cdef int label
    
    for label in neighborLabels:
        if label != labels[z, y, x]:
            equivalencydict[label] = labels[z, y, x]
    return equivalencydict

@cython.wraparound(False)
@cython.nonecheck(False)
@cython.boundscheck(False)
cdef get26Neighbors(tuple index):
    """
    Input arguments: index -> This is the index(z, y, x) of element whose neighbors are need to be
    calculated. type: tuple
    Returns: neighbors -> indices of 26-neighbors 
    
    This function calculates all 16 neighbors of an element in 3D space. 
    In order to see what a 26-neighbors is check the 29/38 slide in below link. Left figure is 6-n and
    right one is 26-neighbors.
    Link: http://slideplayer.com/slide/8645709/
    
    """
    cdef np.ndarray zz 
    cdef np.ndarray yy 
    cdef np.ndarray xx
    
    zz,yy,xx = np.mgrid[(index[0]-1):(index[0]+2) , (index[1]-1):(index[1]+2), (index[2]-1):(index[2]+2)]
    cdef np.ndarray[np.int_t, ndim=2] neighbors = np.vstack((zz.flatten(), yy.flatten(), xx.flatten())).T.astype(np.int)
    #Delete the center which is not a neighbor but the element itself and resize the neighbors
    np.delete(neighbors, [36,37,38])
    return np.resize(neighbors, (26,3))

This is my connectedComponent3D_Setup.py file

from distutils.core import setup
from Cython.Build import cythonize
import numpy

setup(
    ext_modules = cythonize("connectedComponent3D.pyx"),
    include_dirs=[numpy.get_include()]
)

If you want to run the function first cd into a folder where the above two functions are at and run the following code on your command window which compiles the code.

python connectedComponent3D_Setup.py build_ext --inplace

Then you can import connectedComponent3D and call connectedComponent3D.main(arr) function.

Thank you