The answer to your sampling speed question depends on the model. In my experience speed increase is limited (or negative) on small models (the ones I use as examples to get the code right), however there is a clear speed advantage when the model gets more complex (e.g. multivariate blocks).
Enjoy working on the matrix structures! I felt it rewarding when getting a hang on it. Especially the pm.Data
, which gives you the invaluable bonus of in-sample and out-of-sample “prediction”. Below the function I currently use for multilevel model components; it is rather specific for my own use case and terminology, but maybe you can generalize. Hope it helps!
import numpy as NP # numerical analysis
import pandas as PD # data management
import pymc3 as PM # basic modeling
import patsy as PA # formula notation
if PM.__version__ == '4.0':
import aesara as TH # efficent tensors
import aesara.tensor as TT # tensor control
else:
import theano as TH # efficent tensors
import theano.tensor as TT # tensor control
# print the shape of a aesara tensor
# https://aholzner.wordpress.com/2016/01/03/printing-a-tensors-shape-in-theano/
PrintShape = lambda label, tensor: TH.printing.Print(label, attrs = [ 'shape' ])(tensor)
def AddMultiLevelComponent( \
model: PM.Model \
, data: PD.DataFrame \
, label: str \
, params: list \
, level: str \
, observables: list \
, prior_kwargs: dict = {'mu': 0., 'sd': 1.} \
, verbose: bool = False \
):
# generate a single, multilevel component of a model
# label: a name for this part of the model
# params: list of parameters ("predictors") which are multiplied with slopes; e.g. ['x1', 'x2']
# level must be a PD.Categorical in data PD.DataFrame
# observables: the dependent model variables (as a list), e.g. ['y1', 'y2']
# both params and observables are columns in the data
# returns: (n_observations x n_observables) model component
# I label counts as "n"
n_observables = len(observables)
n_params = len(params)
if verbose:
print (f"### assembling {label} on {level} ###")
print ("\tpriors:", prior_kwargs)
# data matrix
# to be honest, I haven't tried using multiple "params", but it /should/ work.
# You can also have ["1"] in the params list to get a multilevel intercept
# using patsy dmatrix is a bit overkill, you can just grab values from the data frame instead.
data_matrix = NP.asarray(PA.dmatrix( f" + ".join(['0'] + params) \
, data = data \
, return_type = 'dataframe' \
))
# design matrix for group level
# "level" is a categorical column in the "data"
# here, the Patsy dmatrix is actually useful because it generates a boolean matrix from the categorical
group_matrix = NP.asarray( \
PA.dmatrix( f'0 + {level}' \
, data = data \
, return_type = 'dataframe' \
) \
)
n_groups = group_matrix.shape[1]
if verbose:
print ("\tdmat:\t", str(group_matrix[:5, :]).replace('\n', '\n\t\t') \
, "\n\t\t", group_matrix.shape \
)
# convert data to a "theano.shared" via the fabulous PM.Data function
with model:
shared_dat = PM.Data(f'{label}_data', data_matrix)
shared_grp = PM.Data(f'{label}_group', group_matrix)
# loop observables (stacked afterwards) because their parameter blocks are independent
obslwise = [] # will aggregate the slopes per observable (obsl); stacked below
for obsl, observable in enumerate(observables):
with model:
if label == 'intercept':
hp_mean = PM.Normal( f'{label}_ml_population_{observable}' \
, shape = n_params \
, **{k: v[obsl] for k, v in prior_kwargs.items()} \
)
population = PM.Normal( f'{label}_ml_{level}_{observable}' \
, shape = (n_groups, n_params ) \
, mu = hp_mean \
, sd = prior_kwargs.get('sd', [1.]*n_observables)[obsl] \
)
# # you could leave out the hyperprior
# population = PM.Normal( f'{label}_ml_{level}_{observable}' \
# , shape = (n_groups, n_params ) \
# , **{k: v[obsl] for k, v in prior_kwargs.items()} \
# )
else:
# prepare n_groups x n_params slopes
population = PM.Normal( f'{label}_ml_{level}_{observable}' \
, shape = (n_groups, n_params ) \
, **{k: v[obsl] for k, v in prior_kwargs.items()} \
)
# only use the group-specific slope per observation
groupwise = TT.dot(shared_grp, population)
# debugging:
# print(f'{label} {level} {observable}')
# PrintShape('groupwise', groupwise)
# multiply data and slopes; then sum over parameters (components are added)
grpdat_matrix = TT.mul(shared_dat, groupwise)
# PrintShape('grpdat_M', grpdat_matrix)
grpdat = TT.sum(grpdat_matrix, axis = 1)
# PrintShape('grpdat', grpdat)
# append the observable-wise list
obslwise.append(grpdat)
# stack observable-wise column vectors
with model:
component = TT.stack(obslwise, axis = 1)
# PrintShape('component', component)
if verbose:
PrintShape(f'component: {component}', component)
print ("\tdone!")
return component