Hey all,
What is the best way to implement constraints when trying the model below?
a_i \sim N(\mu , \sigma)
b_{ij} \sim N(\mu_{j}, \sigma_{j})
for x_i = a_i + \sum_{j}^{n} b_{ij} c_{ij} with known coefficients b_{ij} \in \{0, 1\} and \sum b_{ij} = 1.
Since this is underdetermined. I wanted to get out of the dilemma by setting
\sum_{j} \mu_{j} = 0 via \mu_{n} = -\sum_{j}^{n-1} \mu_{j}
(see constrained_model)
But this does not converge as good anymore (r_hat up to 1.03).
I also tried to do a reparametrization (see constrained_model_rp) via
c_dist = pm.Normal("c_dist", mu=0, sigma=1, shape=n)
c = pm.Deterministic("c", mu_c + sigma_c * c_dist)
This one runs and converges, but the reparametrization softens the constraint since the means of c_dist are different to 0.
Is there a good alternative to implement this constraint?
Ultimately, I want to have multiple of these layers, what would be the best way to get there?
n = 5
samples = 10000
def data_faker(n, samples, mu_a=3, sig_a=5, sig_b=1):
xdata = np.random.normal(mu_a, sig_a, samples)
bdata = np.random.randint(low=0, high=n, size=(samples))
mu = np.arange(0, n)-(n-1)/2
for i,m in enumerate(mu):
xdata += np.where(bdata == i, np.random.normal(m, sig_b, samples), 0)
return xdata, bdata
xdata, bdata = data_faker(n, samples)
with pm.Model() as model:
#Input data
x = pm.Data("x", xdata, dims="obs_id")
b = pm.Data("b", bdata, dims="obs_id")
#baseline
mu_a = pm.Normal("mu_a ", mu=0.0, sigma=10)
sigma_a= pm.Exponential("sigma_a", 1)
# level 1
mu_c = pm.Exponential("mu_c", 1, shape=n)
sigma_c= pm.Exponential("sigma_c", 1, shape=n)
c = pm.Normal("c", sigma_c, shape=n)
y_hat = mu_a + c[b]
# y_hat = c[b] # --> this one runs stable
# value
value = pm.Normal("value",
mu=y_hat,
sigma = sigma_a,
observed = x)
with pm.Model() as constrained_model:
#Input data
x = pm.Data("x", xdata, dims="obs_id")
b = pm.Data("b", bdata, dims="obs_id")
#baseline
mu_a = pm.Normal("mu_a ", mu=0.0, sigma=10)
sigma_a= pm.Exponential("sigma_a", 1)
# level 1
mu_c_raw = [pm.Exponential(f"mu_c_raw_{i}", 1) for i in range(n-1)]
mu_c = pytensor.tensor.stacklists(mu_c_raw + [-pt.tensor.sum(mu_c_raw )])
# mu_c = pytensor.tensor.stacklists(mu_c_raw + [-pt.tensor.sum(pytensor.tensor.stacklists(mu_c_raw ))]) # same result
sigma_c= pm.Exponential("sigma_c", 1, shape=n)
c = pm.Normal("c", mu=mu_c, sigma=sigma_c, shape=n)
y_hat = mu_a + c[b]
# value
value = pm.Normal("value",
mu=y_hat,
sigma = sigma_a,
observed = x)
with pm.Model() as constrained_model_rp:
#Input data
x = pm.Data("x", xdata, dims="obs_id")
b = pm.Data("b", bdata, dims="obs_id")
#baseline
mu_a = pm.Normal("mu_a ", mu=0.0, sigma=10)
sigma_a= pm.Exponential("sigma_a", 1)
# level 1
mu_c_raw = [pm.Exponential(f"mu_c_raw_{i}", 1) for i in range(n-1)]
mu_c = pytensor.tensor.stacklists(mu_c_raw + [-pt.tensor.sum(mu_c_raw )])
# mu_c = pytensor.tensor.stacklists(mu_c_raw + [-pt.tensor.sum(pytensor.tensor.stacklists(mu_c_raw ))])
sigma_c= pm.Exponential("sigma_c", 1, shape=n)
# reparametrization
c_dist = pm.Normal("c_dist", mu=0, sigma=1, shape=n)
c = pm.Deterministic("c", mu_c + sigma_c * c_dist)
y_hat = mu_a + c[b]
# value
value = pm.Normal("value",
mu=y_hat,
sigma = sigma_a,
observed = x)