About the performance of multi-level logistic regression model

I modeled a multi-level logistic regression model to reflect the heterogeneity of individuals. including 140 users and 1451 shopping data, including age and gender. The model is as follows: When I run this model, the speed is very slow, 2.5s/it.My CPU is already 100%. Is there any possible to improve the performance ? Thanks!

code:

y_1=pd.Categorical(modeldata['switch']).codes
timephase=pd.Categorical(modeldata['timephase']).codes
isweeken=pd.Categorical(modeldata['isweeken']).codes
usercode=modeldata['user_code'].values

no_x=2
no_z=4
delta_bar = np.zeros((no_x, no_user))
A=np.identity(no_z)*0.01
z=userall[['gender','age_1','age_2','age_3']].values
mu_delta=np.ones((no_z,no_x))*0
mu_vi=np.ones((no_user,no_x))*0
def my_model():
    with pm.Model() as model_allcate2:
        # We define the prioris
        sigma = pm.InverseGamma('sigma',alpha=3,beta=1)

        #using LKJCholeskyCov to get MvNormal
        sd_dist = pm.HalfNormal.dist(2.5)
        chol_packed = pm.LKJCholeskyCov('chol_packed', n=no_x, eta=2, sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(no_x, chol_packed,lower=True)
        vi=pm.MvNormal('vi', mu=mu_vi, chol=chol, shape=(no_user,no_x))

        # Or compute the covariance matrix
        #cov = tt.dot(chol, chol.T)
        #using LKJCholeskyCov to get MvNormal
        delta = pm.MvNormal('delta', mu=mu_delta, chol=chol*10, shape=(no_z,no_x))
        
        beta =pm.Deterministic('beta',pm.math.dot(z,delta)+vi) 
        epsilon = pm.Normal('epsilon',mu=0,sd=sigma,shape=no_user)
        mu  = epsilon[usercode] + pm.math.dot(timephase,beta[usercode,0]) + pm.math.dot(isweeken,beta[usercode,1])

        # Aplly the logistic linking function
        theta = 1 / (1 + pm.math.exp(-mu))
        # Compute the boundary decision
        # bd = pm.Deterministic('bd', -alpha/beta[1] - beta[0]/beta[1] * x_1[:,0])

        # Define the likelihood
        yl = pm.Bernoulli('yl', p=theta, observed=y_1)     
        
    return model_allcate2

with my_model():
    trace_allcate2 = pm.sample(3000,tune=1000,chains=1,init='adapt_diag' )
    trace_2 = pm.save_trace(trace_allcate2)

You can try profiling the logp to see where is the bottom neck.