Hello,
I am a new user of Pymc. I can not debug my code because I am really confused about the error: “shape mismatch”. In my mind, the line that located by this error
expend_con = pt.set_subtensor(expend_con[i, ind_tem[i]], concatenated_values)
has no problem and can be debugged, but error appears when pm.sample() runs. I attached the code and the data that needed, I would be really appreciate if anyone can give me some directions.
Many thanks in advance!
Combo_Dmatrix.csv (11.0 KB)
COMBO_OTU.csv (15.5 KB)
truevalues.csv (1.0 KB)
ynew.csv (35.2 KB)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc as pm
import pytensor.tensor as pt
import arviz as az
D_phylo = pd.read_csv('Combo_Dmatrix.csv')
COMBO_OTU = pd.read_csv('COMBO_OTU.csv')
y_obs = pd.read_csv('ynew.csv')
y_true = pd.read_csv('truevalues.csv')
D_phylo = D_phylo.to_numpy()
D_phylo = D_phylo[:,1:]
COMBO_OTU = COMBO_OTU.to_numpy()
COMBO_OTU = COMBO_OTU[:,1:]
y_obs = y_obs.to_numpy()
y_obs = y_obs[:,1:]
y_true = y_true.to_numpy()
K1_combo = int(62 * 0.85)
cov = 1**2* np.exp(-2* 0.25* D_phylo)
samples = y_obs.shape[0]
specise = y_obs.shape[1]
phai = np.zeros([samples,1])
N = np.sum(COMBO_OTU[0:samples,:], axis=1).reshape(-1, 1)
tt = 1 #samples
spe = ['spe{}'.format(i) for i in range(specise)]
sam = ['sam{}'.format(i) for i in range(tt)]
coords = {"specie": spe, "sample": sam}
ind = np.arange(specise)
ind = pt.as_tensor_variable(ind)
cov = pt.as_tensor_variable(cov)
pp = pt.as_tensor_variable(np.zeros([tt,specise]))
true_abundance = pt.as_tensor_variable(np.zeros([tt,specise]))
obs_like = np.array(y_obs[:,:].tolist())
NN = np.sum(np.array(y_obs[:,:].tolist()),axis = 1)
if __name__ == '__main__':
with pm.Model(coords=coords) as Multi:
#########Prior########
alpha = pm.Normal('alpha',mu = 10, sigma = 1, shape = specise)
theta = pm.Normal('theta', mu = 10 , sigma = 1, shape = specise)
T = pm.HalfNormal('T',sigma = 10)
d = pm.HalfNormal('d',sigma = 10, shape = specise)
p = pm.Deterministic('p',1-pm.math.exp(-d/T)) #1/(1+pm.math.exp(-alpha))
delta = pm.Bernoulli("delta",p = p, dims = ("sample", "specie")) #np.ones([specise])
ind_tem = [ind[pm.math.lt(delta[i,:],1)] for i in range(tt)]
pll = [ind_tem[i].shape[0] for i in range(tt)]
th_t = []
ct = []
for i in range(tt):
theta_tem = pt.as_tensor_variable(np.ones([specise]))
cov_tem = pt.as_tensor_variable(np.diag(np.ones([specise])))
th_t.append(pt.set_subtensor(theta_tem[0:pll[i]], theta[ind_tem[i]]))
ct.append(pt.set_subtensor(cov_tem[0:pll[i],0:pll[i]], cov[ind_tem[i]][:, ind_tem[i]]))
U = pm.MvNormal("U", mu = th_t, cov = ct)
U = pt.ones([tt,specise])
expend_con = pt.zeros([tt,specise])
for i, p2 in enumerate(pll):
tem = pm.math.sum(pm.math.exp(U[i, :p2-1]))
concatenated_values = pm.math.concatenate((pm.math.exp(U[i, :p2-1]) / (1 + tem), [1 / (1 + tem)]))
if pm.draw(p2) == 0:
expend_con = pt.set_subtensor(expend_con[i,:], concatenated_values)
else:
expend_con = pt.set_subtensor(expend_con[i, ind_tem[i]], concatenated_values)#concatenated_values[0]
########Likelihood########
likelihood_y = pm.Multinomial('likelihood', n =NN[0:tt], p = expend_con[0:tt,:], observed = obs_like[0:tt,:]) #N should be total counts of species of each sample
########sampling########
trace = pm.sample(step = pm.Slice())#
az.plot_trace(trace)
########output########
summary = az.summary(trace,hdi_prob=0.95,stat_focus = 'mean')
print(summary)