I am toying with a model that has a missing not at random model structure. The outcome variables takes values 0,1 and -1 (-1 is to indicate missing). I am testing a basic model with fake data. The model looks like this.
#Imports
import pymc3 as pm
import numpy as np
np.random.seed(seed=42)
import theano.tensor as tt
# Fake Data
Xtrain = np.random.rand(1000,10)
missing = np.random.choice([0, 1], size=[1000],p=[.1, .9])
nclass=3
y_train = np.random.choice([0, 1,-1], size=[1000],p=[.1, .8,0.1])
nclass=3
M = Xtrain.shape[1]
N =Xtrain.shape[0]
# model
with pm.Model() as model:
pred = pm.Data("pred",Xtrain)
miss = pm.Data("miss",missing)
σ_ = pm.HalfNormal("σ_", 2.5)
β_ = pm.Normal('β_', mu=0, sd=1, shape=M)
β0_ = pm.Normal("β0_",mu=0, sd=10, shape=1)
mu_ = pm.Deterministic('mu_',β0_ + pm.math.dot( pred,β_) )
latent = pm.Normal("latent",mu=mu_ ,sd= σ_, shape=N )
β_missing = pm.Normal('β_missing', mu=0, sd=1, shape=M)
β_missing_lat = pm.Normal('β_missing_lat', mu=0, sd=1, shape=1)
β0_missing = pm.Normal("β0_missing", 0, 10.)
mu_missing = pm.Deterministic('mu_missing',β0_missing + pm.math.dot(pred, β_missing) +β_missing_lat*latent )
missing_obs = pm.Bernoulli("missing_obs",p= pm.math.exp(mu_missing)/(1+pm.math.exp(mu_missing)) , observed=miss)
#mu = pm.Deterministic('mu',mu_missing)
#p
alfa = pm.Normal('alfa', mu=0, sd=1, shape=nclass)
beta = pm.Normal('beta', mu=0, sd=1, shape=( nclass, 1))
gamma = pm.Normal('gamma', mu=0, sd=1, shape=nclass)
mu_1 = beta[0]*mu_missing + alfa[0]*latent +gamma[0]
mu_2 = beta[1]*mu_missing + alfa[1]*latent +gamma[1]
mu_3= beta[2]*mu_missing + alfa[2]*latent+gamma[2]
p = tt.nnet.softmax(pm.math.stack([mu_1,mu_2,mu_3], axis=1)) # recall latent is size 1000, we want to apply softmax for each row across columns . so p should be shape 1000 X 3
obs = pm.Categorical('obs', p=p, observed=y_train)
The model looks like this
` SEED = 123456789 # for reproducibility
rng = np.random.default_rng(SEED)
CHAINS = 2
SAMPLE_KWARGS = {
'cores': CHAINS,
'target_accept': 0.99,
'max_treedepth': 15,
'random_seed': [SEED + i for i in range(CHAINS)],
'return_inferencedata': True,
'tune':1000,
'draws':1000,
'discard_tuned_samples': True,
#'callback': MyCallback()
}
with pm.Model() as model:
trace = pm.sample(**SAMPLE_KWARGS)`
The model reports this as an error.
ValueError: The model does not contain any free variables.
q1: Any help on looking the issue would be appreciated.
q2: Is there an easy way to get the shape of rv’s?
Thanks for looking - this feels like a simple model.