After much experimentation, I have a model that works (at least it samples):
with pymc.Model() as categorical_simple_model:
factors=5
N,M=X_train.shape
gps=[ ]
_f=[ ]
ϵ = 1e-9
μ = pymc.gp.mean.Constant(0)
σ_c=pymc.Normal('σ_c', mu=.7, sigma=.2)
σ_w = pymc.MvNormal('σ_w', mu=np.ones(M)*3.1, cov=σ_c*np.eye(M), shape=M)
σ_b = pymc.Normal('σ_b',mu=3.1, sigma=.7)
_η = pymc.Exponential('_η', lam=2.0)
ϵ = 1e-9
η = pymc.Deterministic('η', _η+ϵ)
κ = MultiLayerPerceptronKernel(M, variance=η**2, bias_variance=σ_b**2, weight_variance=σ_w**2)
for factor,loc in list(core_mappings[('origin', 'location', 'loc')].items())[:]:
gp = pymc.gp.Latent(mean_func=μ, cov_func=κ)
gps.append(gp)
f = gp.prior('f_{loc}'.format(loc=loc),
X=X_train.values)
_f.append(f)
f = pymc.Deterministic('f', at.math.sigmoid(at.stack(*_f)) )
p = pymc.Deterministic('p', f/f.sum(axis=0))
y = pymc.Categorical('y', p=p ,observed=Y_train.values, shape=Y_train.shape)
with categorical_simple_model:
fs = []
for (factor, loc), gp in zip(list(core_mappings[('origin', 'location', 'loc')].items()), gps):
f = gp.conditional(f'_f_star_{loc}1',Xnew=X_test.values)
fs.append(f)
f_star = pymc.Deterministic('f_star', at.stack(*fs) )
p_star = pymc.Deterministic('p_star', pymc.math.sigmoid(f_star)/pymc.math.sigmoid(f_star).sum(axis=0) )
This seems to be producing plausible-looking outputs. However I’ve no idea if it’s the model I’m trying to formulate. I’m particularly confused about pymc.Categorical. Without the shape parameter it tends to throw shape errors which are hard to interpret. Furthermore the model seems to work ok both when Y is on-hit-encoded and when it just a single column of integers. Can someone clarify what exactly Categorical expects to see?