Implementing hierarchical priors encoded in tree structures

The ‘telescoping’ technique works for me! Thanks for sharing the pointer.

Below is an MWE for my use case:

import pytensor
import pandas as pd
import pymc as pm
import numpy as np

# create a binary tree of 3 levels with 7 nodes
# the dataframe contains leaves only, so 4 rows
df = pd.DataFrame(
    [
        ["A", "B", "D"],
        ["A", "B", "E"],
        ["A", "C", "F"],
        ["A", "C", "G"],
    ],
    columns=["level_0", "level_1", "level_2"],
)


def make_var_at_level(name: str, var_of_parent_level, mapping: np.ndarray, dims=None):
    """create an RV on a certain level, based on the RV of its parent level"""

    return pm.Normal(name, mu=var_of_parent_level[mapping], sigma=5, dims=dims)


level_0_idx, level_0_labels = pd.factorize(df["level_0"])
level_1_idx, level_1_labels = pd.factorize(df["level_1"])

edges = df[["level_0", "level_1", "level_2"]].drop_duplicates()

level_0_idx_by_level_1 = (
    edges[["level_0", "level_1"]]
    .drop_duplicates()
    .set_index("level_1")["level_0"]
    .sort_index()
    .map({k: i for i, k in enumerate(level_0_labels)})
    .values
)
level_1_idx_by_level_2 = (
    edges[["level_1", "level_2"]]
    .drop_duplicates()
    .set_index("level_2")["level_1"]
    .sort_index()
    .map({k: i for i, k in enumerate(level_1_labels)})
    .values
)


# create the toy dataset for a linear regression problem
num_samples = 500

n_levels = 3
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
rng = np.random.default_rng(RANDOM_SEED)

# the coefficients in the linear regression model
# 0, 1, 2, 3, 4, 5, 6
# however, we 'mask out' the trailing 4 entries (corresponding to the leaves)
beta = np.arange(2**n_levels - 1)

X = np.random.random((num_samples, len(beta)))

# we hide the variables corresponding to the leaf nodes
X[:, 3:] = 0.0

y = np.dot(X, beta) + rng.normal(scale=0.01, size=num_samples)

coords = {
    "level_0": ["A"],
    "level_1": ["B", "C"],
    "level_2": ["D", "E", "F", "G"],
    "predictor": ["A", "B", "C", "D", "E", "F", "G"],
}
with pm.Model(coords=coords):
    X_var = pm.MutableData(
        "X",
        X,
        dims=["sample", "predictor"],
    )

    beta_level_0 = pm.Normal("beta_level_0", 0, 1, dims="level_0")
    beta_level_1 = make_var_at_level(
        "beta_level_1", beta_level_0, mapping=level_0_idx_by_level_1, dims="level_1"
    )
    beta_level_2 = make_var_at_level(
        "beta_level_2", beta_level_1, mapping=level_1_idx_by_level_2, dims="level_2"
    )

    beta = pm.Deterministic(
        "beta",
        pm.math.concatenate([beta_level_0, beta_level_1, beta_level_2]),
        dims="predictor",
    )

    sigma = pm.HalfCauchy("sigma", beta=5)

    likelihood = pm.Normal("y", mu=pm.math.dot(X_var, beta), sigma=sigma, observed=y)

    trace = pm.sample()

print(trace.posterior.beta.mean(axis=1).mean(axis=0))

The expected output should be something like

array([4.34063445e-04, 9.99617637e-01, 1.99989857e+00, 9.32247531e-01,
       1.04552970e+00, 2.06996202e+00, 2.01758882e+00])

Note that the trailing 4 entries should “inherit” values from their parents.