I am trying to model the following:
where as blue ovals denotes outcomes and yellow independent variables.
As you can see, we have this recursive structure where y2 depends on y1.
I am wondering if the following code indeed models this or if i missed out on something, to me it makes sense as it is now though but it may be some error with defining two likelihoods/outcomes and then sampling?
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
from pymc import HalfCauchy, Model, Normal, sample
size = 200
true_intercept_y1 = 1
true_slope_x1 = 2
true_slope_x2 = 0.2
x1 = np.linspace(0, 1, size)
x2 = np.linspace(2, 4, size)
true_regression_line = true_intercept_y1 + true_slope_x1 * x1 + true_slope_x2 * x2
# add noise
y1 = true_regression_line + rng.normal(scale=0.5, size=size)
x3 = np.linspace(3, 4, size)
x4 = np.linspace(4, 5, size)
true_intercept_y2 = 1
true_slope_x3 = 2
true_slope_x4 = 0.2
y1_effect = 2
true_regression_line_y2 = true_intercept_y2 + true_slope_x3 * x3 + true_slope_x4 * x4 + y1 * y1_effect
y2 = true_regression_line_y2 + rng.normal(scale=4, size=size)
data = pd.DataFrame(dict(x=x1, x2=x2, x3=x3, x4=x4, y=y1, y2=y2))
with Model() as model: # model specifications in PyMC are wrapped in a with-statement
# Define priors
sigma = HalfCauchy("sigma", beta=10)
intercept = Normal("Intercept", 0, sigma=20)
slope = Normal("slope", 0, sigma=20)
slopex2 = Normal("slopex2", 0, sigma=10)
# Define likelihood
likelihood_y1 = Normal("y", mu=intercept + slope * x1 + slopex2 * x2, sigma=sigma, observed=y1)
sigma2 = HalfCauchy("sigma2", beta=10)
intercept2 = Normal("Intercept2", 0, sigma=20)
slopex3 = Normal("slopex3", 0, sigma=20)
slopex4 = Normal("slopex4", 0, sigma=10)
slopey1 = Normal("slopey1", 0, sigma=10)
likelihood_y2 = Normal("y2", mu=intercept2 + slopex3 * x3 + slopex4 * x4 + slopey1 * likelihood_y1, observed=y2)
idata = sample(3000)