Hello all,
I feel like this may be a problem of simple misunderstanding… I have a model in which I sum a list of slope terms (slopes
in the code below). If I have two slope terms and compute the predicted mean like
mu = pm.Deterministic("mu", a[o] + slopes[0] + slopes[1])
then everything is fine.
However, if I try to cover the general case of having an arbitrary number of slope terms, like this
mu = pm.Deterministic("mu", a[o] + pm.math.sum(pm.math.stack(slopes)))
then the model takes forever to sample and generates enormous variation on the output parameters. (It samples, though, so I believe that the tensor shapes are fine…)
What am I missing about the behaviour of the stack
and sum
?
Cheers and thanks,
Eberhard
Model
# shortcuts (there are three categories, indexed as 0, 1, 2)
o = df["category_idx"].values
with pm.Model() as m:
# Global hyperprior for mean
bar_a = pm.Normal("bar_a", mu=0, sigma=params.sigma_bar_a)
# Variation around mean per category, centered prior
z_a = pm.Normal("z_a", mu=0, sigma=1, shape=3)
a = pm.Deterministic("a", bar_a + z_a * params.sigma_a)
# containers for slope terms
bar_bs = {}
z_bs_o = {}
bs_o = {}
slopes = []
# slope terms
for var in params.vars:
# Global hyperprior for slope
bar_bs[var.abbr] = pm.Normal(
f"bar_b_{var.abbr}", mu=0, sigma=var.sigma_bar_b
)
# Slope per category; centered prior
z_bs_o[var.abbr] = pm.Normal(f"z_b_{var.abbr}_o", mu=0, sigma=1, shape=3)
bs_o[var.abbr] = pm.Deterministic(
f"b_{var.abbr}_o",
bar_bs[var.abbr] + var.sigma_b * z_bs_o[var.abbr],
)
# Compute contribution to mean estimate
slopes.append(
pm.Deterministic(
f"slope_{var.abbr}", bs_o[var.abbr][o] * df[var.var_name].values
)
)
# Mean predicted value for relative revenue
# This produces strange results (stacking and summing along axis=1 doesn't help)
# mu = pm.Deterministic("mu", a[o] + pm.math.sum(pm.math.stack(slopes)))
# This works fine, for the case of two slopes
mu = pm.Deterministic("mu", a[o] + slopes[0] + slopes[1])
# StdDev of residual variation amongst entities, per category
sigma = pm.Exponential(
"sigma",
params.lbda,
shape=3,
)
# Variation of data around mean predicted value
pm.StudentT(
"target",
nu=params.nu,
sigma=sigma[o],
mu=mu,
observed=df["target"].values,
)
params
is a paramter object defined like this:
@dataclasses.dataclass(eq=True, frozen=True)
class MultiFlexVar:
"""Parameters for one regression variable in MultiFlex model"""
abbr: str # abbreviation for variable names
var_name: str # data field to regress against
sigma_bar_b: float # stdev of hyperprior for mean slope
sigma_b: float # variation of slope per category
typical_range: Tuple[float, float] # typical value range for prior plots
default_value: float # default value for counterfactual plots
@dataclasses.dataclass(eq=True, frozen=True)
class MultiFlexParams:
"""Parameters for flexible multiparametric model"""
vars: Tuple[MultiFlexVar, ...] # variables to regress against
sigma_bar_a: float = 0.02 # prior stdev for global mean parameter `bar_a`
sigma_a: float = (
0.04 # prior stdev for variation of per-category mean around `bar_a`
)
lbda: float = 20 # decay constant for prior of noise `sigma`
nu: float = 2 # shape parameter for StudentT distribution
tune: int = 2000 # tuning steps
draws: int = 4000 # sampling draws
target_accept: float = 0.9 # sampling tuning parameter
params = MultiFlexParams(
vars=(
MultiFlexVar(
abbr="a",
var_name="column_a",
sigma_bar_b=0.1,
sigma_b=0.1,
typical_range=(0, 0.1),
default_value=0.03,
),
MultiFlexVar(
abbr="b",
var_name="column_b",
sigma_bar_b=0.01,
sigma_b=0.01,
typical_range=(0, 1),
default_value=0.3,
),
)
)