I've been working on porting a library from C to CUDA. Two functions that I need to port make use of recursion, which CUDA doesn't seem to be playing nicely with. I therefore want to convert these functions from using a recursive approach to an iterative one, as even if the recursion did work, I suspect it would be more efficient to have these functions be iterative rather than recursive.
I've been having quite a bit of trouble though, as the way in which the functions make use of recursion is quite different to traditional recursion to iteration problems.
int get_resulting_node(const uint64_t np[6], const BiomeTree *bt, int idx,
int alt, uint64_t ds, int depth)
{
if (bt->steps[depth] == 0)
return idx;
uint32_t step;
do
{
step = bt->steps[depth];
depth++;
}
while (idx+step >= bt->len);
uint64_t node = bt->nodes[idx];
uint16_t inner = node >> 48;
int leaf = alt;
uint32_t i, n;
for (i = 0, n = bt->order; i < n; i++)
{
uint64_t ds_inner = get_np_dist(np, bt, inner);
if (ds_inner < ds)
{
int leaf2 = get_resulting_node(np, bt, inner, leaf, ds, depth);
uint64_t ds_leaf2;
if (inner == leaf2)
ds_leaf2 = ds_inner;
else
ds_leaf2 = get_np_dist(np, bt, leaf2);
if (ds_leaf2 < ds)
{
ds = ds_leaf2;
leaf = leaf2;
}
}
inner += step;
if (inner >= bt->len)
break;
}
return leaf;
}
and
float getSpline(const Spline *sp, const float *vals)
{
if (!sp || sp->len <= 0 || sp->len >= 12)
{
printf("getSpline(): bad parameters\n");
exit(1);
}
if (sp->len == 1)
return ((FixSpline*)sp)->val;
float f = vals[sp->typ];
int i;
for (i = 0; i < sp->len; i++)
if (sp->loc[i] >= f)
break;
if (i == 0 || i == sp->len)
{
if (i) i--;
float v = getSpline(sp->val[i], vals);
return v + sp->der[i] * (f - sp->loc[i]);
}
const Spline *sp1 = sp->val[i-1];
const Spline *sp2 = sp->val[i];
float g = sp->loc[i-1];
float h = sp->loc[i];
float k = (f - g) / (h - g);
float l = sp->der[i-1];
float m = sp->der[i];
float n = getSpline(sp1, vals);
float o = getSpline(sp2, vals);
float p = l * (h - g) - (o - n);
float q = -m * (h - g) + (o - n);
float r = lerp(k, n, o) + k * (1.0F - k) * lerp(k, p, q);
return r;
}
What's confusing me is that neither are using tail recursion. While I'm sure it's the way to go, I'm really struggling to see how a stack can be applied here.
Spline
,FixSpline
,BiomeTree
, ...etc) and amain
function that is runnable and which calls the relevant function will an example.