Purpose of this program is to calculate the velocity of ng = nxnynz particles at the Lagrangian points from the values at regular grid nodes. How can i speed up this calculation? I hope to receive suggestions on this following code:
#include <iomanip>
#include <omp.h>
#include <vector>
#include <cmath>
#include <ctime>
#include <iostream>
#include <string>
#include <fstream>
#include <string.h>
#include <sstream>
#include <istream>
#include <stdlib.h>
#include <algorithm>
using namespace std;
void functionM4( vector<double> &b, vector<double> &w)
// this is an interpolation function
{
w[0]= 0.5*b[0]*b[0]*b[0]+2.5*b[0]*b[0]+4.0*b[0]+2.0;
w[1]=-1.5*b[1]*b[1]*b[1]-2.5*b[1]*b[1]+1.0;
w[2]= 1.5*b[2]*b[2]*b[2]-2.5*b[2]*b[2]+1.0;
w[3]=-0.5*b[3]*b[3]*b[3]+2.5*b[3]*b[3]-4.0*b[3]+2.0;
}
int main()
{
double pi = 3.141592653589793;
int nx0 = 400; // number of grid in X direction
int ny0 = 400; // number of grid in Y direction
int nz0 = 400; // number of grid in Y direction
int nx = nx0+10;
int ny = ny0+10;
int nz = nz0+10;
int ng = nx*ny*nz;
double xmin = 0.0; // set up domain
double xmax = 2.0*pi;
double ymin = 0.0;
double ymax = 2.0*pi;
double zmin = 0.0;
double zmax = 2.0*pi;
double dx = (xmax-xmin)/(nx0); //spacing step
double dy = (ymax-ymin)/(ny0);
double dz = (zmax-zmin)/(nz0);
vector<double> x(nx,0.0);
vector<double> y(ny,0.0);
vector<double> z(nz,0.0);
for (int i=0;i<nx;i++)
x[i] = xmin+(double)(i-5)*dx; //building grid for x direction
for (int j=0;j<ny;j++)
y[j] = ymin+(double)(j-5)*dy;
for (int k=0;k<nz;k++)
z[k] = zmin+(double)(k-5)*dz;
// ug is velocity component u on grid at the beginning
vector<vector<vector<double> > > ug (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > vg (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > wg (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
// particles at Lagrangian points
vector<vector<vector<double> > > xpStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > ypStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > zpStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
//the particle velocity at Lagrangian locations
vector<vector<vector<double> > > upStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > vpStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > wpStar (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
int i,j,k;
#pragma omp parallel for private (j,k) schedule(dynamic)
for (i=0;i<nx;i++)
for (j=0;j<ny;j++)
for (k=0;k<nz;k++)
{
//initial values of velocity components at grid nodes
ug[i][j][k] = -sin(x[i])*cos(y[j])*sin(z[k]);
vg[i][j][k] = -cos(x[i])*sin(y[j])*sin(z[k]);
wg[i][j][k] = -2.0*cos(x[i])*cos(y[j])*cos(z[k]);
}
double dt = 0.001; // time step
//the particle velocity upStar at Lagrangian location xpStar is
//obtained from the velocity ug at the grid nodes (x,y,z)
//using a kernel interpolation function.
cout<<" start "<<endl;
#pragma omp parallel for private (j,k) schedule(dynamic)
for (i=0;i<nx;i++)
for (j=0;j<ny;j++)
for (k=0;k<nz;k++){
xpStar[i][j][k]= x[i] + dt*ug[i][j][k];
ypStar[i][j][k]= y[j] + dt*vg[i][j][k];
zpStar[i][j][k]= z[k] + dt*wg[i][j][k];
}
#pragma omp parallel for private (j,k) schedule(dynamic)
for (i=2;i<nx-2;i++)
for (j=2;j<ny-2;j++)
for (k=2;k<nz-2;k++){
int Xindex = (int)((xpStar[i][j][k]- x[0])/dx);
int Yindex = (int)((ypStar[i][j][k]- y[0])/dy);
int Zindex = (int)((zpStar[i][j][k]- z[0])/dz);
int west = Xindex-1; int est = Xindex+2;
int south = Yindex-1; int north = Yindex+2;
int front = Zindex-1; int back = Zindex+2;
vector<double> BX(4,0.0);
vector<double> BY(4,0.0);
vector<double> BZ(4,0.0);
vector<double> wx(4,0.0);
vector<double> wy(4,0.0);
vector<double> wz(4,0.0);
for (int m=west;m<=est;m++){
BX[m-west]=(x[m]-xpStar[i][j][k])/dx;}
for (int n=south;n<=north;n++){
BY[n-south]=(y[n]-ypStar[i][j][k])/dy;}
for (int q=front;q<=back;q++){
BZ[q-front]=(z[q]-zpStar[i][j][k])/dz;}
functionM4(BX,wx);
functionM4(BY,wy);
functionM4(BZ,wz);
for (int m=west;m<=est;m++)
for (int n=south;n<=north;n++)
for (int q=front;q<=back;q++){
double w=wx[m-west]*wy[n-south]*wz[q-front];
upStar[i][j][k] += ug[m][n][q]*w;
vpStar[i][j][k] += vg[m][n][q]*w;
wpStar[i][j][k] += wg[m][n][q]*w;
}
}
cout<<" finish ! "<<endl;
//-------------------------------------Checking results------------------------------------------//
vector<vector<vector<double> > > exact (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
vector<vector<vector<double> > > error (nx,vector<vector<double> >(ny,vector <double>(nz,0.0)));
#pragma omp parallel for
for (int i=3;i<nx-3;i++)
for (int j=3;j<ny-3;j++)
for (int k=3;k<nz-3;k++)
{
exact[i][j][k] = -sin(xpStar[i][j][k])*cos(ypStar[i][j][k])*sin(zpStar[i][j][k]);
error[i][j][k] = abs(exact[i][j][k]-upStar[i][j][k]);
}
double aa = 0.0;
double bb = 0.0;
double cc = 0.0;
double dd = 0.0;
double maxi = 0.0;
for (int i=3;i<nx-3;i++)
for (int j=3;j<ny-3;j++)
for (int k=3;k<nz-3;k++)
{
aa += abs(exact[i][j][k]);
bb += error[i][j][k];
cc += error[i][j][k]*error[i][j][k];
dd += exact[i][j][k]*exact[i][j][k];
if (abs(error[i][j][k])>maxi)
maxi = abs(error[i][j][k]);
}
cout<<" L1_norm = "<<bb/aa<<endl;
cout<<" L2_norm = "<<sqrt(cc/dd)<<endl;
cout<<" L3_norm = "<<maxi<<endl;
return 0;
//Compiler: g++ interpolation.cpp -o running -fopenmp
//export OMP_NUM_THREADS=30;
//Run:./running
}
w, which should save a few multiplications. \$\endgroup\$