Hi
I have a simple model, but it takes longer time (3 hours) then I expected. I cannot find the bottleneck of the script. Codes and data are attached. Many thanks in advance, for the suggestions you provided.
Combo_Dmatrix.csv (11.0 KB)
COMBO_OTU.csv (15.5 KB)
truevalues.csv (1.0 KB)
ynew3.csv (18.8 KB)
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import pytensor
import time
D_phylo = pd.read_csv('Combo_Dmatrix.csv')
COMBO_OTU = pd.read_csv('COMBO_OTU.csv')
y_obs = pd.read_csv('ynew3.csv')
y_true = pd.read_csv('truevalues.csv')
D_phylo = D_phylo.to_numpy()
D_phylo = D_phylo[:,1:]
COMBO_OTU = COMBO_OTU.to_numpy()
COMBO_OTU = COMBO_OTU[:,1:]
y_obs = y_obs.to_numpy()
y_obs = y_obs[:,1:]
y_true = y_true.to_numpy()
K1_combo = int(62 * 0.85)
# rho = 0.25
# sigma = 1
cov = 1**2* np.exp(-2* 0.25* D_phylo)
samples = y_obs.shape[0]
specise = y_obs.shape[1]
phai = np.zeros([samples,1])
N = np.sum(COMBO_OTU[0:samples,:], axis=1).reshape(-1, 1)
tt = samples
chain_length = 2000
tune_length = 1000
spe = ['spe{}'.format(i) for i in range(specise)]
sam = ['sam{}'.format(i) for i in range(tt)]
coords = {"specie": spe, "sample": sam}
ind = pt.arange(specise)
cov = pt.as_tensor_variable(cov)
pp = pt.as_tensor_variable(np.zeros([tt,specise]))
true_abundance = pt.as_tensor_variable(np.zeros([tt,specise]))
obs_like = np.array(y_obs[:,:].tolist())
NN = np.sum(np.array(y_obs[:,:].tolist()),axis = 1)
zk = pt.arange(specise)
zp = pt.ones(specise)
start = time.time()
def def_test2(ii, pll, U, ind_tem, expend_con):
p2 = pll
tem = pm.math.sum(pm.math.exp(U[ii, :p2-1]))
concatenated_values = pm.math.concatenate((pm.math.exp(U[ii, :p2-1]) / (1 + tem), [1 / (1 + tem)]))
updated_expend_con = pytensor.ifelse.ifelse(pt.eq(p2,0),
pt.set_subtensor(expend_con[ii,:], pt.zeros(62)+1/62),
pt.set_subtensor(expend_con[ii, ind_tem], concatenated_values))
return updated_expend_con
def cholesky(A):
n = A.shape[0]
L = pt.zeros((n, n))
def step(i, j, L, A):
L = pytensor.ifelse.ifelse(pt.eq(i,j),
pt.set_subtensor(L[i, j], pt.sqrt(A[i, i] - pt.sum(L[i, :i] ** 2))),
pt.set_subtensor(L[i,j], (A[i, j] - pt.sum(L[i, :j] * L[j, :j])) / L[j, j] ))
return L
z = pt.tril_indices(n)
L, _ = pytensor.scan(step,
sequences=[z[0], z[1]],
non_sequences=[A],
outputs_info = L)
return L[-1]
if __name__ == '__main__':
with pm.Model(coords=coords) as Multi:
#########Prior########
T = pm.Uniform('T', lower = 1, upper = 100)
p = pm.Deterministic('p',1-pm.math.exp(-D_phylo[-1,:]/T)) # the last column is the reference taxon
delta = pm.Bernoulli("delta",p = p, shape = specise) #np.ones([specise])
ind_tem = ind[pm.math.lt(delta[:],1)]
pll = ind_tem.shape[0]
z_ind_tem = zk
ind_tem = pytensor.ifelse.ifelse(~pt.all(pll), z_ind_tem, ind_tem)
zpll = pytensor.ifelse.ifelse(~pt.all(pll), ind_tem.shape[0], pll)
theta = pm.Normal('theta', mu = 1 , sigma = 1, shape = specise)
th_t = pt.zeros([specise])
cov_t = pt.diag(pt.ones([specise]))
th_t = pt.set_subtensor(th_t[0:zpll], theta[ind_tem])
cov_t = pt.set_subtensor(cov_t[0:zpll,0:zpll], cov[ind_tem][:, ind_tem])
U = pm.MvNormal("U", mu = th_t, cov = cov_t, shape = (tt,zpll))
ii = pt.arange((tt))
expend_con = pt.zeros((tt,specise))+1e-10
Results, _ = pytensor.scan(def_test2,
sequences = [ii],
non_sequences = [pll, U, ind_tem, expend_con])
expend_con = pt.set_subtensor(expend_con[ii,:],Results[ii,ii,:])
########Likelihood########
likelihood_y = pm.Multinomial('likelihood', n =NN[0:tt], p = expend_con, observed = obs_like[0:tt,:]) #N should be total counts of species of each sample
########sampling########
trace = pm.sample(draws = 8000, tune = 4000, chains = 4, cores = 4, step = pm.NUTS()) #chain_length, tune = tune_length,
# az.plot_trace(trace)
########output########
summary = az.summary(trace,hdi_prob=0.95,stat_focus = 'mean')
print(summary)