MissingInputError with prior specified

My aim with a small project is to see whether it’s possible to estimate covariances as part of mixture model parameters. I can set up a simulation for the data without problem:

CELL_TYPES = ['Neuron', 'Astroctye', 'Microglia', 'Oligodendrocyte', 'Endothelial']
CELL_TYPE_PROPORTIONS = [0.3, 0.25, 0.12, 0.18, 0.05]

TRUE_MEAN_MAT = np.array([[3., 1.5, 1., 0.3, 0.],
                         [1.5, 2., 0.5, 0.1, 0.]])

COV_MATS = [[[2.0, 1.7],
             [1.7, 2.0]], 
            [[2.5, -1.8],
             [-1.8, 2.0]],
            [[0.4, 0.],
             [0., 0.4]],
            [[0.3, 0.12],
             [0.12, 0.5]],
            [[1.0, -0.3],
             [-0.3, 0.7]]]
COV_MATS = [np.array(c) for c in COV_MATS]
MAT_I = np.array([[1., 0.], [0., 1.]])
            
N_OBS = 100
MAT_I_N = np.diag(np.ones(N_OBS,dtype=float))

with pm.Model() as model_true:
    # define the true parameters
    # proportion is going to be a dirichlet -- this is needed to actually perform sampling
    a0 = 50.  # low variance on the type proportions
    cell_mix = pm.Dirichlet('cell_proportions', a0 * np.array(CELL_TYPE_PROPORTIONS))
    # define the cell expression of the 2 genes
    cdists = list()
    for i, ctype in enumerate(CELL_TYPES):
        cell_expr = pm.MvNormal(ctype, mu=TRUE_MEAN_MAT[:,i], cov=COV_MATS[i], shape=(2,))
        cdists.append(pm.Deterministic('{}_exp'.format(ctype), tt.exp(cell_expr)))
        
    expr = pm.Deterministic('expr', tt.log(cell_mix[0] * cdists[0] + \
                                           cell_mix[1] * cdists[1] + \
                                           cell_mix[2] * cdists[2] + \
                                           cell_mix[3] * cdists[3] + \
                                           cell_mix[4] * cdists[4]))
        
    samples = pm.sample(draws=N_OBS, n_init=50000, chains=1)

samples['expr'].shape  # (100, 2)

But (taking the cell_mix proportions as pre-specified) I cannot perform inference:

mod_input = samples['cell_proportions']
mod_output = samples['expr']
with pm.Model() as model_infer:
    # define the prior parameters. Here we *observe* the mixture proportions and the cell expression
    cpriors = list()
    for i, ctype in enumerate(CELL_TYPES):
        prior_mu = pm.MvNormal('{}_mu'.format(ctype), mu=np.array([0., 0.]), cov=MAT_I, shape=(2,))
        prior_cov_pl = pm.LKJCholeskyCov('{}_cv_pl'.format(ctype), eta=1., sd_dist=pm.HalfCauchy.dist(1.), n=2)
        prior_cov_L =  pm.expand_packed_triangular(2, prior_cov_pl)
        prior_cov = pm.Deterministic('{}_cv'.format(ctype), prior_cov_L.dot(prior_cov_L.T))
        cell_expr = pm.MvNormal(ctype, mu=prior_mu, cov=prior_cov, shape=(2,))
        cpriors.append(cell_expr)
    
    # define the sampling for each sample
    # In PyMC3 it is not possible to *observe* a sum (i.e. you can't pass data= to a deterministic)
    # but i think we can cheat here 
    obs_expr = list()
    for i in xrange(samples['expr'].shape[0]):
        sam_expr = pm.Deterministic('expr{}'.format(i), 
                                    tt.log(mod_input[i, 0] * tt.exp(cpriors[0]) + \
                                           mod_input[i, 1] * tt.exp(cpriors[1]) + \
                                           mod_input[i, 2] * tt.exp(cpriors[2]) + \
                                           mod_input[i, 3] * tt.exp(cpriors[3]) + \
                                           mod_input[i, 4] * tt.exp(cpriors[4])))
        obs_expr.append(sam_expr)
    obs_expr = np.array(obs_expr)  # 100 x 2
    expr_obs = pm.MatrixNormal('expr_obs', mu=expr, 
                               rowcov=0.01 * MAT_I_N, colcov=MAT_I, shape=(N_OBS,2), 
                               observed=mod_output)
    
    mod_fit = pm.sample()

This results in an error:

MissingInputError: Input 0 of the graph (indices start from 0), used to compute Elemwise{exp,no_inplace}(Endothelial), was not provided and not given a value. Use the Theano flag exception_verbosity=‘high’, for more information on this error.

But I’ve provided priors for the group (cell) mean and covariance. What’s wrong?

There is a mistake here that mu=expr, also you shouldnt use a for loop to build the tensor obs_expr. Try something like:

mod_input = samples['cell_proportions']
mod_output = samples['expr']
with pm.Model() as model_infer:
    # define the prior parameters. Here we *observe* the mixture proportions and the cell expression
    cpriors = list()
    for i, ctype in enumerate(CELL_TYPES):
        prior_mu = pm.MvNormal('{}_mu'.format(ctype), mu=np.array([0., 0.]), cov=MAT_I, shape=(2,))
        prior_cov_pl = pm.LKJCholeskyCov('{}_cv_pl'.format(ctype), eta=1., sd_dist=pm.HalfCauchy.dist(1.), n=2)
        prior_cov_L =  pm.expand_packed_triangular(2, prior_cov_pl)
        prior_cov = pm.Deterministic('{}_cv'.format(ctype), prior_cov_L.dot(prior_cov_L.T))
        cell_expr = pm.MvNormal(ctype, mu=prior_mu, cov=prior_cov, shape=(2,))
        cpriors.append(cell_expr)
    
    # define the sampling for each sample
    # In PyMC3 it is not possible to *observe* a sum (i.e. you can't pass data= to a deterministic)
    # but i think we can cheat here 
    obs_expr = pm.Deterministic('obs_expr', 
                                 tt.log(mod_input[:, 0][:, np.newaxis] * tt.exp(cpriors[0])[np.newaxis, :] + \
                                          mod_input[:, 1][:, np.newaxis] * tt.exp(cpriors[1])[np.newaxis, :] + \
                                          mod_input[:, 2][:, np.newaxis] * tt.exp(cpriors[2])[np.newaxis, :] + \
                                          mod_input[:, 3][:, np.newaxis] * tt.exp(cpriors[3])[np.newaxis, :] + \
                                          mod_input[:, 4][:, np.newaxis] * tt.exp(cpriors[4])[np.newaxis, :]))
    expr_obs = pm.MatrixNormal('expr_obs', mu=obs_expr, 
                               rowcov=0.01 * MAT_I_N, colcov=MAT_I, shape=(N_OBS,2), 
                               observed=mod_output)
    
    mod_fit = pm.sample()
1 Like

Thank you! Solved my follow-up question before I asked it, too.