This is for a computation for laser physics. The speed is quite slow, which seems to be delayed a lot by the three nested for loops with indices j,k, and l (with comments below). And the computation time seems to be proportional to the fourth power of nx, but I want to increase nx more to ~100.
I also guess the 7 indices Numpy Array S drags the computation a lot. It would be better if I can remove the three nested for loops. However, that seems to be impossible. Are there any ways to improve speed?
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import matplotlib as mpl
import math
from scipy import linalg, optimize
from scipy.integrate import quad
import scipy.integrate as integrate
import scipy.special as special
from matplotlib.colors import LogNorm
import time
from datetime import datetime
import copy
nz=70
nx=3
ny=nx
ntau=70
gamsp = 1/(160.*10.**-12)
Rincrease=10**0
R = Rincrease*400*10.**-9
nnu = 1.6*10.**25*Rincrease**-2
c = 3.*10.**8
n = nnu*math.pi*R**2
lambda0= 12.4/0.88*10**-10
delo = 4.*10.**-6
tau_max= 2./gamsp
z_max=3.*10**-4
L=z_max
dtau = tau_max/ntau
dx=2*R/nx
dy=dx
dz = z_max/nz
Omega=2*math.pi*c/lambda0
alpha=np.arctan(R/L)
S=np.empty((nx,ny,nz,nx,ny,nz,ntau),dtype='complex_')
S[:,:,:,:,:,:,0]=0
rho=np.empty((nx,ny,nz,ntau),dtype='complex_')
for i in range(nx):
for j in range(ny):
rho[i,j,:,0]=1 if math.sqrt((i-(nx-1)/2)**2+(j-(ny-1)/2)**2)<nx/2 else 0
Intensitytrans=np.zeros((nx,ny,nz,ntau),dtype='complex_')
Intensity=np.zeros((nz,ntau),dtype='complex_')
Intensityn=np.zeros((nz,ntau),dtype='complex_')
photonn=np.zeros((nz),dtype='complex_')
g=np.zeros((2*(nx-1)+1,2*(ny-1)+1,2*(nz-1)+1),dtype ='complex_')
g2=np.zeros((2*(nx-1)+1,2*(ny-1)+1,2*(nz-1)+1),dtype ='complex_')
for i in range (2*(nx-1)+1): # store numerical values in g
if i>nx-1:
g[i]=copy.copy(np.conjugate(g[2*nx-2-i]))
else:
for j in range (2*(ny-1)+1):
if j>ny-1:
g[i,j]=copy.copy(np.conjugate(g[i,2*ny-2-j]))
else:
for k in range (2*(nz-1)+1):
if k>nz-1:
g[i,j,k]=copy.copy(np.conjugate(g[i,j,2*nz-2-k]))
else:
xvalue=copy.copy(-2*R+4*R*i/(2*(nx-1)))
yvalue=copy.copy(-2*R+4*R*j/(2*(nx-1)))
radius=copy.copy(math.sqrt(xvalue**2+yvalue**2))
zvalue=copy.copy(-L+2*L*k/(2*(nz-1)))
Funcre = "2*math.pi*np.sin(x)*(np.cos(x))**2*np.cos(Omega/c*({})*(np.cos(x)-1))*special.jv(0,Omega/c*{}*math.sin(x))".format(zvalue,radius)
Funcim = "2*math.pi*np.sin(x)*(np.cos(x))**2*np.sin(Omega/c*({})*(np.cos(x)-1))*special.jv(0,Omega/c*{}*math.sin(x))".format(zvalue,radius)
def fre(x):
fre=eval(Funcre)
return fre
def fim(x):
fim=eval(Funcim)
return fim
g[i,j,k]=quad(fre,0,alpha)[0]+1j*quad(fim,0,alpha)[0]
for i in range(2*(nz-1)+1):
g2[:,:,i]=copy.copy(g[:,:,i])*(0 if i>nz else 1)
for i in range(ntau):
i1=np.zeros((nx,ny,nz),dtype='complex_')
i2=np.zeros((nx,ny,nz),dtype='complex_')
if i<ntau-1:
snoise1=np.zeros((nx,ny,nz,nx,ny,nz),dtype='complex_')
snoise2=np.zeros((nx,ny,nz,nx,ny,nz),dtype='complex_')
snoise=np.zeros((nx,ny,nz,nx,ny,nz),dtype='complex_')
s1=np.zeros((nx,ny,nz),dtype='complex_')
s2=np.zeros((nx,ny,nz,nx,ny,nz),dtype='complex_')
s3=np.zeros((nx,ny,nz,nx,ny,nz),dtype='complex_')
for j in range(nx):
for k in range(ny):
for l in range(nz): # THESE LOOPS MAY SLOW THE COMPUTATION
gc=copy.copy(g[(nx-1)-j:2*(nx-1)+1-j,(ny-1)-k:2*(ny-1)+1-k,(nz-1)-l:2*(nz-1)+1-l])
g2c=copy.copy(g2[(nx-1)-j:2*(nx-1)+1-j,(ny-1)-k:2*(ny-1)+1-k,(nz-1)-l:2*(nz-1)+1-l])
i1[j,k,l]=np.sum(S[:,:,:,:,:,:,i]*np.tensordot(np.conjugate(g2c),g2c,axes=0))*(dx*dy*dz)**2
i2[j,k,l]=np.sum((1+rho[:,:,:,i])/2*np.absolute(g2c)**2)*dx*dy*dz
if i<ntau-1:
s1+=S[:,:,:,j,k,l,i]*np.conjugate(g2c)*dx*dy*dz
s2+=np.tensordot(np.conjugate(g2c)*rho[:,:,:,i],S[j,k,l,:,:,:,i],axes = 0)*dx*dy*dz
s3+=np.tensordot(S[:,:,:,j,k,l,i],g2c*rho[:,:,:,i],axes = 0)*dx*dy*dz
snoise1[:,:,:,j,k,l]=rho[:,:,:,i]*(1+rho[j,k,l,i])/2*np.conjugate(gc)
snoise2[j,k,l,:,:,:]=rho[j,k,l,i]*(1+rho[:,:,:,i])/2*np.conjugate(gc)
Intensitytrans[:,:,:,i]=3/(32*math.pi*lambda0**2*delo)*gamsp*(nnu**2*i1+nnu*i2)
if i<ntau-1:
for j in range(nz):
for k in range(nz):
snoise[:,:,j,:,:,k]=snoise1[:,:,j,:,:,k] if j>k else snoise2[:,:,j,:,:,k]
rho[:,:,:,i+1]=rho[:,:,:,i]+gamsp*(-2*(1+rho[:,:,:,i])/2-3*nnu/(8*math.pi)*2*np.real(s1))*dtau
S[:,:,:,:,:,:,i+1]=S[:,:,:,:,:,:,i]+gamsp*(-S[:,:,:,:,:,:,i]+3/(16*math.pi)*(nnu*(s2+s3)+snoise))*dtau
for i in range(ntau):
for jz in range (nz):
Intensity[jz,i]=np.sum(Intensitytrans[:,:,jz,i])/(nx*ny)
peak=np.empty((nz,2))
for i in range(nz):
photonn[i]=np.sum(Intensity[i,:])/ntau*tau_max*delo*math.pi*R**2
peak[i,0]=np.argmax(Intensity[i,:])/ntau
peak[i,1]=i/nz
if max(Intensity[i,:])!=0:
Intensityn[i,:]=Intensity[i,:]/max(Intensity[i,:])
freandfimfunctions redefined in a nested loop? As aneval, no less. Looping that much and that deep while you're working with numpy is probably a red flag by itself, but I'm quite confused what made you go this particular route forfreandfim. Have you profiled any of this? \$\endgroup\$evalincrease the computation time that much \$\endgroup\$