[quote=“Nn_Nnn, post:1, topic:11183”]
df = sns.load_dataset('tips')
df.loc[df['day']=='Sun','tip']*=10
cat= pd.Categorical(df['day'])
groups = {"day": cat.categories}
x = cat.codes
with pm.Model(coords=groups) as modelp: #nh
#Prior
mu = pm.Normal('mu', mu=5, sigma=10,dims='day')
sigma = pm.HalfNormal('sigma', sigma=10, dims='day')
ν = pm.Exponential('ν', 1/30)
y = pm.StudentT('y', mu=mu[x], sigma=sigma[x], nu=ν, observed=df['tip'].values)
#y = pm.Normal('y', mu=mu[x], sigma=sigma[x], observed=df['tip'].values)
diff_1_0=pm.Deterministic('mu1-mu0',mu[1]-mu[0])
diff_2_0=pm.Deterministic('mu2-mu0',mu[2]-mu[0])
diff_3_0=pm.Deterministic('mu3-mu0',mu[3]-mu[0])
idatap = pm.sample(1000,chains=2)
idatap.extend(pm.sample_posterior_predictive(idatap))
Thank you for your reply and suggested code: idatap.posterior_predictive[“y”].sel(y_dim_2=np.where(df[“day”]==“Thur”)[0]), which solved the problem of selecting out the predicted y,s for each day separately. Even though I had great difficulty in finding a way to do this once I saw your code it gave me a clearer view of the way the y array is interrogated. Consequently, I tried the following lines of code which numerically appear to give the same answer as your code but is easier for me: y0=idatap.posterior_predictive[“y”].sel(y_dim_2=df.query(“day == ‘Thur’”).index)
y1=idatap.posterior_predictive[“y”].sel(y_dim_2=df.query(“day == ‘Fri’”).index)
y2=idatap.posterior_predictive[“y”].sel(y_dim_2=df.query(“day == ‘Sat’”).index)
y3=idatap.posterior_predictive[“y”].sel(y_dim_2=df.query(“day == ‘Sun’”).index) please let me know if you think this approach is still ok.
Initially I was unable to separate the predicted y’s by day, I tried replacing the above bundled model’s indexed likelihood with its 4 separate likelihoods each representing one of the four days respectively and called it model_new ie: with pm.Model() as model_new:
#Prior
mu = pm.Normal('mu', mu=5, sigma=20,shape=4)
sigma = pm.HalfNormal('sigma', sigma=10, shape=4)
ν = pm.Exponential('ν', 1/30)
y0 = pm.StudentT("y0", mu=mu[0], sigma=sigma[0], nu=ν, observed=df['tip'][df['day']=='Thur'].values)
y1 = pm.StudentT("y1", mu=mu[1], sigma=sigma[1], nu=ν, observed=df['tip'][df['day']=='Fri'].values)
y2 = pm.StudentT("y2", mu=mu[2], sigma=sigma[2], nu=ν, observed=df['tip'][df['day']=='Sat'].values)
y3 = pm.StudentT("y3", mu=mu[3], sigma=sigma[3], nu=ν, observed=df['tip'][df['day']=='Sun'].values)
diff_1_0=pm.Deterministic('mu1-mu0',mu[1]-mu[0])
diff_2_0=pm.Deterministic('mu2-mu0',mu[2]-mu[0])
diff_3_0=pm.Deterministic('mu3-mu0',mu[3]-mu[0])
idata_new = pm.sample(1000,chains=2)
idata_new.extend(pm.sample_posterior_predictive(idata_new))
This model allows the y’s to be separated right from the beginning.
My remaining question is the ‘model_new’ expected to give the same posteriors etc as ‘modelp’ above or is it interrogated as a different model by pymc ? I ran both models and numerically they were in close agreement for the data and priors I tried. thanking you for your guidance.