import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
import arviz as az
import scipy.stats as stats
plt.style.use('seaborn-darkgrid')
%matplotlib inline
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.titlesize'] = 20
#-----------------------------------------------
data_sample_size=37 #for original study.
data= np.random.normal(2, 1, data_sample_size)
#-------------------------------------------------
with pm.Model() as model:
mu = pm.Normal('mu', mu=0, sd=3,testval=0)
sd = pm.HalfNormal('sd', sd=10)
y = pm.Normal('y', mu=mu, sd=sd, observed=data)
trace = pm.sample(2000,tune=1000)
ppc =pm.sample_posterior_predictive(trace,2000,model)
#-----------------------------------------------------------------------------
fig=plt.figure(figsize=(20,5))
ax1=fig.add_subplot(121,title='Posterior prediction for a future single value\n and also the mean of N future values', xlabel='y', ylabel='Probability Densit')
ax1.set_xlim(0,4)
ax1.legend(loc="upper left")
#To predict a single future value from posterior or the mean of a
#sample with same sample original sample size (n = 37):
with model:
ppc =pm.sample_posterior_predictive(trace,2000,model)
az.plot_dist(ppc["y"].ravel(),ax =ax1,color ="g",label ="kde for predicting value single value" , kind="kde")
az.plot_dist(ppc["y"].mean(axis=1) ,ax=ax1,color = "r",label= "kde of mean value\nwith original sample size(n=37)", kind="kde")
#Predicting the mean the mean of a future sample size N_new
#using Scipy_library and pymc3 trace posterior values is easy:
mu =trace["mu"]
sd = trace["sd"]
N= 37 #original samples size to test procedure.
yy =stats.norm(mu,sd/np.sqrt(N)).rvs(4000)
ax1.hist(yy, histtype = "step", density = True)
az.plot_dist(yy,color="y", label = "Sci_py predicted value of future mean of size N",ax =ax1 )
fig.savefig("Original sample size")
#plt.show()
#----------------------------------------------------------
ppc["y"].shape,ppc["y"].mean(axis=1).shape,trace["mu"].shape
-------------------------------------------------------------
#To predict a future mean of a sample with a multiple (integer M) of original sample size (n = 37):
fig=plt.figure(figsize=(20,5))
ax1=fig.add_subplot(121,title='Posterior prediction for a future mean with sample size (M x N)', xlabel='y', ylabel='Probability Densit')
ax1.set_xlim(0,4)
ax1.legend(loc="upper left")
with model:
N= 37 # Original study sample size.
M=3 # Multiple of original sample size, new sample size = 3 x 37
ppc =pm.sample_posterior_predictive(trace,2000,model,size=M)
az.plot_dist(ppc["y"].mean(axis=(1,2)) ,ax=ax1,color = "r",label= "kde of mean predicted value with\n multiple of original sample size(n=37)", kind="kde")
yy =stats.norm(mu,sd/np.sqrt(N*M)).rvs(4000)
az.plot_dist(yy,color="y", label = "Sci_py predicted value of future mean with sample size\n a multiple of original sample size.",ax =ax1 )
fig.savefig("Multiple M of Original sample size")
#--------------------------------------------
ppc["y"].mean(axis=(1,2)).shape
#-------------------------------------------
? how to predict the mean for any sample size from posterior distribution of original study using ppc.
#Thanks for looking at the code and my attempt at predicting for different sample sizes in future draws.