Skip to main content
added more explanation
Source Link

As a possible alternative [which I didn't try], the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i].

Edit: I added this version as an update at the bottom

Here is a version that precalculates the pdf_cache values (again, please pardon the C-like stuff):

As a possible alternative [which I didn't try], the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i].

Here is a version that precalculates the pdf_cache values:

As a possible alternative, the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i].

Edit: I added this version as an update at the bottom

Here is a version that precalculates the pdf_cache values (again, please pardon the C-like stuff):

added more explanation
Source Link

By doing this, I reduced the runtime to 12 sec. That is, it is 2.75x faster than the original


UPDATE:

Here is a version that precalculates the pdf_cache values:

#include <iostream>
#include <cmath>
#include <stdio.h>
#include <time.h>

double
tvgetf(void)
{
    struct timespec ts;
    double sec;

    clock_gettime(CLOCK_REALTIME,&ts);
    sec = ts.tv_nsec;
    sec /= 1e9;
    sec += ts.tv_sec;

    return sec;
}

#define inline_always   static inline __attribute__((__always_inline__))

// NOTE: the size of pdf_cache is _hardwired_. The max size _can_ be calculated
// and this can become a pointer to an allocated area
int pdf_max;
float *pdf_cache;

inline_always float
pdf(float u)
{
    //u += 1;
    return (1 / (u * u));
}

inline_always float
cdf(float u)
{
    u += 1;
    return (1 - 1 / (u));
}

inline_always float
pdfx(float u,int i)
{
    //u += 1;
    if (i > pdf_max) {
        pdf_cache[i] = pdf(u);
        pdf_max = i;
    }
    return pdf_cache[i];
}

// The main function that implements the numerical integration,
//and it is a recursive function
float
integ(float h, int k, float du)
{
    float res = 0;
    float u = 1;
    int i;

    int iter = h / du;

    k -= 1;

    h += 1;

    if (k == 1) {
        for (i = 0;  i < iter;  ++i) {
            res += cdf(h - u) * pdf_cache[i] * du;
            u += du;
        }
    }
    else {
        for (i = 0;  i < iter;  ++i) {
            res += integ(h - u, k, du) * pdf_cache[i] * du;
            u += du;
        }
    }

    return res;
}

// The main function that implements the numerical integration,
//and it is a recursive function
float
integ_top(float h, int k, float du)
{
    float res = 0;

    if (k == 1)
        res = cdf(h);
    else
        res = integ(h,k,du);

    return res;
}

int
main()
{
    float du = 0.0001;
    int K = 3;

    float gamma[4] = { 0.31622777, 0.79432823,
        1.99526231, 5.01187234
    };
    int G = 50;
    int Q = 2;

    float maxgam = 0;
    int initflg = 1;
    for (int i = 0; i < 4; i++) {
        if ((G - Q * (K - 1)) > 0) {
            float gammath = (gamma[i] / Q) * (G - Q * (K - 1));
            printf("i=%d gammath=%f\n",i,gammath);

            if (initflg) {
                maxgam = gammath;
                initflg = 0;
                continue;
            }

            if (gammath > maxgam)
                maxgam = gammath;
        }
    }

    pdf_max = maxgam / du;
    printf("pdf_max=%d\n",pdf_max);
    pdf_cache = (float *) malloc(sizeof(*pdf_cache) * pdf_max);

    float u = 1;
    for (int i = 0;  i < pdf_max;  ++i) {
        pdf_cache[i] = pdf(u);
        u += du;
    }

    for (int i = 0; i < 4; i++) {
        if ((G - Q * (K - 1)) > 0) {
            float gammath = (gamma[i] / Q) * (G - Q * (K - 1));

            double tvbeg = tvgetf();
            double rtn = 1 - integ_top(gammath, K, du);
            //std::cout << 1 - integ(gammath, K, du) << endl;
            double tvdif = tvgetf() - tvbeg;

            printf("i=%d rtn=%f tvdif=%.9f\n",i,rtn,tvdif);
            fflush(stdout);
        }
    }

    return 0;
}

By doing this, I reduced the runtime to 12 sec.

By doing this, I reduced the runtime to 12 sec. That is, it is 2.75x faster than the original


UPDATE:

Here is a version that precalculates the pdf_cache values:

#include <iostream>
#include <cmath>
#include <stdio.h>
#include <time.h>

double
tvgetf(void)
{
    struct timespec ts;
    double sec;

    clock_gettime(CLOCK_REALTIME,&ts);
    sec = ts.tv_nsec;
    sec /= 1e9;
    sec += ts.tv_sec;

    return sec;
}

#define inline_always   static inline __attribute__((__always_inline__))

// NOTE: the size of pdf_cache is _hardwired_. The max size _can_ be calculated
// and this can become a pointer to an allocated area
int pdf_max;
float *pdf_cache;

inline_always float
pdf(float u)
{
    //u += 1;
    return (1 / (u * u));
}

inline_always float
cdf(float u)
{
    u += 1;
    return (1 - 1 / (u));
}

inline_always float
pdfx(float u,int i)
{
    //u += 1;
    if (i > pdf_max) {
        pdf_cache[i] = pdf(u);
        pdf_max = i;
    }
    return pdf_cache[i];
}

// The main function that implements the numerical integration,
//and it is a recursive function
float
integ(float h, int k, float du)
{
    float res = 0;
    float u = 1;
    int i;

    int iter = h / du;

    k -= 1;

    h += 1;

    if (k == 1) {
        for (i = 0;  i < iter;  ++i) {
            res += cdf(h - u) * pdf_cache[i] * du;
            u += du;
        }
    }
    else {
        for (i = 0;  i < iter;  ++i) {
            res += integ(h - u, k, du) * pdf_cache[i] * du;
            u += du;
        }
    }

    return res;
}

// The main function that implements the numerical integration,
//and it is a recursive function
float
integ_top(float h, int k, float du)
{
    float res = 0;

    if (k == 1)
        res = cdf(h);
    else
        res = integ(h,k,du);

    return res;
}

int
main()
{
    float du = 0.0001;
    int K = 3;

    float gamma[4] = { 0.31622777, 0.79432823,
        1.99526231, 5.01187234
    };
    int G = 50;
    int Q = 2;

    float maxgam = 0;
    int initflg = 1;
    for (int i = 0; i < 4; i++) {
        if ((G - Q * (K - 1)) > 0) {
            float gammath = (gamma[i] / Q) * (G - Q * (K - 1));
            printf("i=%d gammath=%f\n",i,gammath);

            if (initflg) {
                maxgam = gammath;
                initflg = 0;
                continue;
            }

            if (gammath > maxgam)
                maxgam = gammath;
        }
    }

    pdf_max = maxgam / du;
    printf("pdf_max=%d\n",pdf_max);
    pdf_cache = (float *) malloc(sizeof(*pdf_cache) * pdf_max);

    float u = 1;
    for (int i = 0;  i < pdf_max;  ++i) {
        pdf_cache[i] = pdf(u);
        u += du;
    }

    for (int i = 0; i < 4; i++) {
        if ((G - Q * (K - 1)) > 0) {
            float gammath = (gamma[i] / Q) * (G - Q * (K - 1));

            double tvbeg = tvgetf();
            double rtn = 1 - integ_top(gammath, K, du);
            //std::cout << 1 - integ(gammath, K, du) << endl;
            double tvdif = tvgetf() - tvbeg;

            printf("i=%d rtn=%f tvdif=%.9f\n",i,rtn,tvdif);
            fflush(stdout);
        }
    }

    return 0;
}
added more explanation
Source Link

As a possible alternative [which I didn't try], the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i]. 

This could be done in main before its for loop as pdf_cache will be the same no matter what main's value ofvalues for i or gammath (aka h) are.

Further, this precalculation could be split up amongst multiple cores if needed.

Also, if openmp is used within integ [as others have suggested], the full precalculation would probably be necessary to prevent interthread write contention.

As a possible alternative [which I didn't try], the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i]. This could be done in main before its for loop as pdf_cache will be the same no matter what main's value of i is.

As a possible alternative [which I didn't try], the pdf_cache array could be fully precalculated once before starting anything. Then, all usage of pdf(u) could be replaced with pdf_cache[i]. 

This could be done in main before its for loop as pdf_cache will be the same no matter what main's values for i or gammath (aka h) are.

Further, this precalculation could be split up amongst multiple cores if needed.

Also, if openmp is used within integ [as others have suggested], the full precalculation would probably be necessary to prevent interthread write contention.

Source Link
Loading