The ‘telescoping’ technique works for me! Thanks for sharing the pointer.
Below is an MWE for my use case:
import pytensor
import pandas as pd
import pymc as pm
import numpy as np
# create a binary tree of 3 levels with 7 nodes
# the dataframe contains leaves only, so 4 rows
df = pd.DataFrame(
[
["A", "B", "D"],
["A", "B", "E"],
["A", "C", "F"],
["A", "C", "G"],
],
columns=["level_0", "level_1", "level_2"],
)
def make_var_at_level(name: str, var_of_parent_level, mapping: np.ndarray, dims=None):
"""create an RV on a certain level, based on the RV of its parent level"""
return pm.Normal(name, mu=var_of_parent_level[mapping], sigma=5, dims=dims)
level_0_idx, level_0_labels = pd.factorize(df["level_0"])
level_1_idx, level_1_labels = pd.factorize(df["level_1"])
edges = df[["level_0", "level_1", "level_2"]].drop_duplicates()
level_0_idx_by_level_1 = (
edges[["level_0", "level_1"]]
.drop_duplicates()
.set_index("level_1")["level_0"]
.sort_index()
.map({k: i for i, k in enumerate(level_0_labels)})
.values
)
level_1_idx_by_level_2 = (
edges[["level_1", "level_2"]]
.drop_duplicates()
.set_index("level_2")["level_1"]
.sort_index()
.map({k: i for i, k in enumerate(level_1_labels)})
.values
)
# create the toy dataset for a linear regression problem
num_samples = 500
n_levels = 3
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
rng = np.random.default_rng(RANDOM_SEED)
# the coefficients in the linear regression model
# 0, 1, 2, 3, 4, 5, 6
# however, we 'mask out' the trailing 4 entries (corresponding to the leaves)
beta = np.arange(2**n_levels - 1)
X = np.random.random((num_samples, len(beta)))
# we hide the variables corresponding to the leaf nodes
X[:, 3:] = 0.0
y = np.dot(X, beta) + rng.normal(scale=0.01, size=num_samples)
coords = {
"level_0": ["A"],
"level_1": ["B", "C"],
"level_2": ["D", "E", "F", "G"],
"predictor": ["A", "B", "C", "D", "E", "F", "G"],
}
with pm.Model(coords=coords):
X_var = pm.MutableData(
"X",
X,
dims=["sample", "predictor"],
)
beta_level_0 = pm.Normal("beta_level_0", 0, 1, dims="level_0")
beta_level_1 = make_var_at_level(
"beta_level_1", beta_level_0, mapping=level_0_idx_by_level_1, dims="level_1"
)
beta_level_2 = make_var_at_level(
"beta_level_2", beta_level_1, mapping=level_1_idx_by_level_2, dims="level_2"
)
beta = pm.Deterministic(
"beta",
pm.math.concatenate([beta_level_0, beta_level_1, beta_level_2]),
dims="predictor",
)
sigma = pm.HalfCauchy("sigma", beta=5)
likelihood = pm.Normal("y", mu=pm.math.dot(X_var, beta), sigma=sigma, observed=y)
trace = pm.sample()
print(trace.posterior.beta.mean(axis=1).mean(axis=0))
The expected output should be something like
array([4.34063445e-04, 9.99617637e-01, 1.99989857e+00, 9.32247531e-01,
1.04552970e+00, 2.06996202e+00, 2.01758882e+00])
Note that the trailing 4 entries should “inherit” values from their parents.