In your model the item specific effect is indexing through [idx], so you want the number of coefficients equal to the number of unique items:
num_items = len(idx) --> num_items = len(np.unique(idx))
and the indexing should be correspondent to the input data in
y_est = (
intercept_prior[idx] +
x1_prior[idx] * masked_x1.get_value() +
x2_prior[idx] * masked_x2.get_value()
)
the [idx] above should be correctly correspondent to masked_x1 and masked_x2
Try the code below, it will give the correct format of output now:
new_data = pd.read_csv('new_data.txt', sep='\t')
idx = np.asarray(new_data.idx)
num_items = len(np.unique(idx))
train = new_data[:-15]
test = new_data[-15:]
masked_x1 = shared(np.asarray(train.x1))
masked_x2 = shared(np.asarray(train.x2))
masked_idx = shared(np.asarray(train.idx))
with pm3.Model() as model:
intercept_prior = pm3.Normal('intercept_prior',
mu=20,
sd=100,
shape=num_items)
mu_x1 = pm3.Normal('mu_x1', mu=0, sd=20)
std_x1 = pm3.Uniform('std_x1', lower=0, upper=100)
x1_prior = pm3.Normal('x1_prior',
mu=mu_x1,
sd=std_x1,
shape=num_items)
std_x2 = pm3.Uniform('std_x2', lower=0, upper=100)
x2_prior = pm3.HalfNormal('x2_prior',
sd=std_x2,
shape=num_items)
y_est = pm3.Deterministic('y_est',
intercept_prior[masked_idx] +
x1_prior[masked_idx] * masked_x1 +
x2_prior[masked_idx] * masked_x2
)
pm3.Normal('y_pred',
mu=y_est,
sd=100,
observed=train.y)
trace = pm3.sample(3000, njobs=2)
varnames = ['mu_x1', 'std_x1', 'std_x2']
pm3.traceplot(trace[2000:], varnames)
pm3.plot_posterior(trace[2000:], varnames)
#%%
masked_x1.set_value(np.asarray(test.x1))
masked_x2.set_value(np.asarray(test.x2))
masked_idx.set_value(np.asarray(test.idx))
ppc = pm3.sample_ppc(trace, model=model, samples=500)
Also, some hyper prior on intercept_prior would also improve the model.