Thank you so much for inspiring me. Indeed, the scan is the main problem. I replaced scan by vectorization and the running time decreased to 10 minutes from around 3.5 hours.
Many thanks again!
I paste the revised code here for my notes.
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import pytensor
import time
D_phylo = pd.read_csv('Combo_Dmatrix.csv')
COMBO_OTU = pd.read_csv('COMBO_OTU.csv')
y_obs = pd.read_csv('ynew3.csv')
y_true = pd.read_csv('truevalues.csv')
D_phylo = D_phylo.to_numpy()
D_phylo = D_phylo[:,1:]
COMBO_OTU = COMBO_OTU.to_numpy()
COMBO_OTU = COMBO_OTU[:,1:]
y_obs = y_obs.to_numpy()
y_obs = y_obs[:,1:]
y_true = y_true.to_numpy()
K1_combo = int(62 * 0.85)
# rho = 0.25
# sigma = 1
cov = 1**2* np.exp(-2* 0.25* D_phylo)
samples = y_obs.shape[0]
specise = y_obs.shape[1]
phai = np.zeros([samples,1])
N = np.sum(COMBO_OTU[0:samples,:], axis=1).reshape(-1, 1)
tt = samples
spe = ['spe{}'.format(i) for i in range(specise)]
sam = ['sam{}'.format(i) for i in range(tt)]
coords = {"specie": spe, "sample": sam}
ind = pt.arange(specise)
cov = pt.as_tensor_variable(cov)
pp = pt.as_tensor_variable(np.zeros([tt,specise]))
true_abundance = pt.as_tensor_variable(np.zeros([tt,specise]))
obs_like = np.array(y_obs[:,:].tolist())
NN = np.sum(np.array(y_obs[:,:].tolist()), axis = 1)
zk = pt.arange(specise)
start = time.time()
print("RUNing...")
if __name__ == '__main__':
with pm.Model(coords=coords) as Multi:
#########Prior########
T = pm.Uniform('T', lower = 1, upper = 100)
p = pm.Deterministic('p',1-pm.math.exp(-D_phylo[-1,:]/T)) # the last column is the reference taxon
delta = pm.Bernoulli("delta",p = p, shape = specise) #np.ones([specise])
ind_tem = ind[pm.math.lt(delta[:],1)]
pll = ind_tem.shape[0]
ind_tem = pytensor.ifelse.ifelse(~pt.all(pll), zk, ind_tem)
zpll = pytensor.ifelse.ifelse(~pt.all(pll), ind_tem.shape[0], pll)
theta = pm.Normal('theta', mu = 1 , sigma = 1, shape = specise)
th_t = pt.zeros([specise])
cov_t = pt.diag(pt.ones([specise]))
th_t = pt.set_subtensor(th_t[0:zpll], theta[ind_tem])
cov_t = pt.set_subtensor(cov_t[0:zpll,0:zpll], cov[ind_tem][:, ind_tem])
U = pm.MvNormal("U", mu = th_t, cov = cov_t, shape = (tt,zpll))
expend_con = pt.zeros((tt,specise))+1e-10
tem = pm.math.sum(pm.math.exp(U[:, :pll-1]),axis = 1).reshape([tt,1])
concatenated_values = pm.math.concatenate((pm.math.exp(U[:, :pll-1]) / (1 + tem), 1 / (1 + tem)),axis = 1)
expend_con = pytensor.ifelse.ifelse(pt.eq(pll,0),
pt.set_subtensor(expend_con[:,:], pt.zeros((tt,specise))+1/specise),
pt.set_subtensor(expend_con[:, ind_tem], concatenated_values))
########Likelihood########
likelihood_y = pm.Multinomial('likelihood', n =NN[0:tt], p = expend_con, observed = obs_like[0:tt,:])
########sampling########
trace = pm.sample(draws = 2000, tune = 1000, chains = 4, cores = 4, step = pm.NUTS())
# az.plot_trace(trace)
########output########
summary = az.summary(trace,hdi_prob=0.95,stat_focus = 'mean')
print(summary)
summary.to_excel('test2.xlsx')
trace.to_netcdf("trace_information.nc")
#trace = az.from_netcdf("trace_information.nc")