I am rather new to python and am porting my R functions into python. I have written the functions below and the primary function is called thetaMax. That function minimizes a likelihood function or optionally minimizes a posterior distribution, depending on the option selected by method. The function works exactly as intended and I have unit tested this against my R code.
However, because I am not familiar with pythonic ways of doing this, I am curious what parts of the code below could be improved to 1) write less/more compact code and 2) to improve speed and take advantage of vectorization when possible?
A complete reproducible example is below
import numpy as np
from scipy.stats import binom
from scipy.optimize import minimize
from scipy.stats import norm
def prob3pl(theta, a, b, c, D = 1.7):
result = c + (1 - c) / (1 + np.exp(-D * a * (theta - b)))
return(result)
def gpcm(theta, d, score, a, D = 1.7):
Da = D * a
result = np.exp(np.sum(Da * (theta - d[0:score])))/np.sum(np.exp(np.cumsum(Da * (theta - d))))
return(result)
def thetaMax(x, indDichot, a, b, c, D, d, method = 'mle', **kwargs):
method_options = ['mle', 'map']
optional_args = kwargs
if method not in method_options:
raise ValueError("Invalid method. Expected one of: %s" % method_options)
if method == 'map' and 'mu' not in optional_args:
raise ValueError("You must enter a value for 'mu' for the posterior distribution. Example, mu = 0")
if method == 'map' and 'sigma' not in optional_args:
raise ValueError("You must enter a value for 'sigma' for the posterior distribution. Example, sigma = 1")
x1 = x[indDichot]
x2 = np.delete(x, indDichot)
result = [0] * len(x2)
def fn(theta):
if(len(x1) > 0):
p = prob3pl(theta, a, b, c, D = D)
logDichPart = np.log(binom.pmf(x1,1,p)).sum()
else:
logPolyPart = 0
if(len(x2) > 0):
for i in range(0,len(x2)):
result[i] = gpcm(theta, d = d[i], score = x2[i], a = 1, D = D)
logPolyPart = np.log(result).sum()
else:
logPolyPart = 0
if(method == 'mle'):
LL = -(logDichPart + logPolyPart)
elif(method == 'map'):
normal = np.log(norm.pdf(theta, loc = optional_args['mu'], scale = optional_args['sigma']))
LL = -(logDichPart + logPolyPart + normal)
return(LL)
out = minimize(fn, x0=0)
return(out)
### In order to run
d = np.array([[0, -1, .5, 1],[0,-.5,.2,1]])
a = np.array([1,1,1,1,1])
b = np.array([-1,.5,-.5,0,2])
c = np.array([0,0,0,0,0])
#x = np.array([1,1,0,1,0])
x = np.array([1,1,0,1,0,1,1])
indDichot = range(0,5,1)
object = py.thetaMax(x,indDichot,a,b,c,D=1,d = d)
NameError: name 'py' is not defined\$\endgroup\$