I modeled a multi-level logistic regression model to reflect the heterogeneity of individuals. including 140 users and 1451 shopping data, including age and gender. The model is as follows: When I run this model, the speed is very slow, 2.5s/it.My CPU is already 100%. Is there any possible to improve the performance ? Thanks!
code:
y_1=pd.Categorical(modeldata['switch']).codes
timephase=pd.Categorical(modeldata['timephase']).codes
isweeken=pd.Categorical(modeldata['isweeken']).codes
usercode=modeldata['user_code'].values
no_x=2
no_z=4
delta_bar = np.zeros((no_x, no_user))
A=np.identity(no_z)*0.01
z=userall[['gender','age_1','age_2','age_3']].values
mu_delta=np.ones((no_z,no_x))*0
mu_vi=np.ones((no_user,no_x))*0
def my_model():
with pm.Model() as model_allcate2:
# We define the prioris
sigma = pm.InverseGamma('sigma',alpha=3,beta=1)
#using LKJCholeskyCov to get MvNormal
sd_dist = pm.HalfNormal.dist(2.5)
chol_packed = pm.LKJCholeskyCov('chol_packed', n=no_x, eta=2, sd_dist=sd_dist)
chol = pm.expand_packed_triangular(no_x, chol_packed,lower=True)
vi=pm.MvNormal('vi', mu=mu_vi, chol=chol, shape=(no_user,no_x))
# Or compute the covariance matrix
#cov = tt.dot(chol, chol.T)
#using LKJCholeskyCov to get MvNormal
delta = pm.MvNormal('delta', mu=mu_delta, chol=chol*10, shape=(no_z,no_x))
beta =pm.Deterministic('beta',pm.math.dot(z,delta)+vi)
epsilon = pm.Normal('epsilon',mu=0,sd=sigma,shape=no_user)
mu = epsilon[usercode] + pm.math.dot(timephase,beta[usercode,0]) + pm.math.dot(isweeken,beta[usercode,1])
# Aplly the logistic linking function
theta = 1 / (1 + pm.math.exp(-mu))
# Compute the boundary decision
# bd = pm.Deterministic('bd', -alpha/beta[1] - beta[0]/beta[1] * x_1[:,0])
# Define the likelihood
yl = pm.Bernoulli('yl', p=theta, observed=y_1)
return model_allcate2
with my_model():
trace_allcate2 = pm.sample(3000,tune=1000,chains=1,init='adapt_diag' )
trace_2 = pm.save_trace(trace_allcate2)