I’m working on a Bayesian model where I have the data model d = y + \epsilon, with \epsilon normally distributed and y = a * sin(x * t). Given data d and t as arrays, I want to calculate the predictive posterior for new data points d^* and t^*through the following marginalization:
p(d^* | d, t^) = \int_a \int_b p(a, b | d) , p(d^ | a, b, t^*) , da , db
Here’s the code I’m using, which samples from the posterior, then loops through d^* and t^* values to compute p(d^* | d, t^*) based on a and b samples. I set up grids over new values of d and t, then compute each likelihood across samples of a and b.
import numpy as np
import pymc as pm
from matplotlib import pyplot as plt
from numba import njit
x = np.linspace(0, 5, 20)
true_a = 1.0
true_b = 0.9
epsilon_std = 0.1
y_true = true_a * np.sin(true_b * x)
d = (y_true
+ np.random.normal(0, epsilon_std, size=len(x))
)
with pm.Model() as model:
# Priors
beta = pm.Uniform("beta", lower=0, upper=2.0)
alpha = pm.Uniform("alpha", lower=0.0, upper=2.0)
# Model equation
y = alpha * pm.math.sin(beta * x)
# Likelihood
Y_obs = pm.Normal('obs', mu=y, sigma=epsilon_std, observed=d)
# Sampling
trace = pm.sample(6000,
var_names=["alpha", "beta"],
cores=1,
chains=4)
with model:
# Compute posterior predictive distribution for new x values
pm.sample_posterior_predictive(trace,
var_names=["alpha", "beta"],
extend_inferencedata=True)
alpha_samples = trace.posterior_predictive["alpha"].values.flatten()
beta_samples = trace.posterior_predictive["beta"].values.flatten()
nD = 40
nt = 40
D_lin = np.linspace(-2, 2, nD)
t_lin = np.linspace(0, 5, nt)
dv, tv = np.meshgrid(D_lin, t_lin, indexing='ij')
posterior_D = np.zeros_like(dv)
@njit
def prob_observe_datum(D, t, a_array, b_array):
res = 0.0
for i in range(len(a_array)):
a = a_array[i]
b = b_array[i]
res = res + np.exp(- (D - a * np.cos(b * t)) ** 2 / (2 * epsilon_std ** 2))
res /= (len(a_array) * epsilon_std * np.sqrt(2))
return res
for i in range(nD):
for j in range(nt):
D = dv[i, j]
t = tv[i, j]
posterior_D[i, j] = prob_observe_datum(D, t, alpha_samples, beta_samples)
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 3))
axes.contourf(dv, tv, posterior_D)
az.plot_pair(trace, var_names=['alpha', 'beta'], kind='kde', marginals=True)
plt.show()
The results are okay, but I’m curious if there’s a more efficient or straightforward approach to handle this type of marginalization.
Any suggestions on optimizing this, alternative approaches, or thoughts on using different sampling or inference methods are very welcome!