Minibatch ADVI ppc dimensions

Hi! I’m new to PyMC3, I wanted to say that it’s a great piece of software. I come from stan, and the transition has been pretty seamless! I had one question, about dimensions and posterior predictive checks. I’m using mini-batching ADVI to fit Peter Hoff’s AMEN model example in stan.

The data I use are here: csv (dropbox)

My model is here:

import matplotlib.pyplot as plt
import pymc3 as pm
import theano
import numpy as np
import pandas as pd
import theano.tensor as tt
import scipy.stats as st
np.random.seed(42)
pm.set_tt_rng(42)
df = pd.read_csv('ir90s_edgelist.csv')

y = pm.Minibatch(df.y.values, 50)
sender = pm.Minibatch(df.sender.values - 1, 50)
receiver = pm.Minibatch(df.receiver.values - 1, 50)
K = 2
n_nodes = df.sender.values.max()
n_dyads = df.dyad_id.values.max()
cov_dims = K * 2
lower_tri = int((cov_dims * cov_dims + 1)/2)


with pm.Model() as amen_model:
    #intercept
    alpha = pm.Normal('alpha', mu=0., sigma=5)
with amen_model:

    # node terms
    sigma_nodes = pm.HalfCauchy.dist(beta=2.5, shape = 2 )

    corr_nodes_packed = pm.LKJCholeskyCov(
        'corr_nodes_packed', n = 2, eta = 5, sd_dist = sigma_nodes)
    corr_nodes = pm.expand_packed_triangular(2, corr_nodes_packed)
    z_nodes = pm.Normal('z_nodes', mu=0., sigma=1, shape = (2, n_nodes))
    mean_nodes = pm.Deterministic('mean_nodes', tt.dot(corr_nodes, z_nodes).T)

with amen_model:
    # multi-effect terms
    z_multi_effects = pm.Normal(
        'z_multi_effects', mu=0., sigma=1, shape = (cov_dims, n_nodes))


    sigma_multi_effects = pm.HalfCauchy.dist(beta=2.5, shape = cov_dims )

    corr_multi_packed = pm.LKJCholeskyCov(
        'corr_multi', 
        n = cov_dims, 
        eta = 5., 
        sd_dist = sigma_multi_effects)

        
    corr_multi_effects = pm.expand_packed_triangular( cov_dims, corr_multi_packed) 
    mean_multi_effects = pm.Deterministic(
        'mean_multi_effects', tt.dot(corr_multi_effects, z_multi_effects).T)
with amen_model:
    y_star = pm.Deterministic('y_star', alpha + \
    mean_nodes[sender, 0] + \
    mean_nodes[receiver, 1] + \
        tt.dot(mean_multi_effects[sender, 0:K], mean_multi_effects[receiver, K:K*2].T).sum(axis = 1))
        
with amen_model:

    # Model error
    eps = pm.HalfCauchy('eps', beta = 3)

    # Data likelihood
    y_likelihood = pm.Normal('y_likelihood', mu=y_star, sigma=eps, observed=y, total_size=len(df))

with amen_model:
    mean_field = pm.fit(1000, callbacks=[pm.callbacks.CheckParametersConvergence(diff='relative')])

I’ve had no issue fitting the model, however, I’m confused when it comes time to posterior predictive check. There are 16770 observations, however, when I run

trace_mf = pm.sample_approx(mean_field, 500)
mf_pp = pm.sample_posterior_predictive(trace_mf, samples=500, model=amen_model)
mf_pp['y_likelihood'].shape

I get a tuple of (500, 50).
What’s the best way to get the posterior predictions across all Y, along the lines of the generated quantities in the stan code above?

Thanks!

1 Like

Hi,
And thanks for the kind words, always nice to hear :slight_smile:

You want PPC back with the shape (16770, 50), right? Did you try not passing samples explicitly to sample_posterior_predictive? I think it’ll automatically default to the number of samples in the train set.

Also, I don’t think you need total_size=len(df) in your likelihood: since you’re passing observations, it should pick the shape up automatically.

Finally, note that since PyMC 3.9.0, you can replace:

corr_multi_packed = pm.LKJCholeskyCov(
        'corr_multi', 
        n = cov_dims, 
        eta = 5., 
        sd_dist = sigma_multi_effects
)
corr_multi_effects = pm.expand_packed_triangular(cov_dims, corr_multi_packed) 

by the simple one-liner:

# PyMC >= 3.9 extracts the expanded Cholesky, stds and matrix of correlations automatically
corr_multi_effects, Rho, sigmas = pm.LKJCholeskyCov(
        "corr_multi", n=cov_dims, eta=5, sd_dist=sigma_multi_effects, compute_corr=True
    )

Hope this helps :vulcan_salute:

Thanks for the feedback, especially the one-liner! I tried not explicitly passing samples:

trace_mf = pm.sample_approx(mean_field)
mf_pp = pm.sample_posterior_predictive(trace_mf, model=amen_model)

and the results was
mf_pp['y_likelihood'].shape = (100, 50). I suspect it’s basically giving the ppc for the last minibatch sample, since I’d want (100, 16770), but that’s just a guess on my part.

1 Like

I’ve got a solution! Based on this, I rewrote the code like so:

Y =  theano.shared(df.y.values)
y = pm.Minibatch(df.y.values, 50)

sender_full = theano.shared(df.sender.values - 1)
sender = pm.Minibatch(df.sender.values - 1, 50)

receiver_full = theano.shared(df.receiver.values - 1)
receiver = pm.Minibatch(df.receiver.values - 1, 50)
K = 2
n_nodes = df.sender.values.max()
n_dyads = df.dyad_id.values.max()
cov_dims = K * 2
lower_tri = int((cov_dims * cov_dims + 1)/2)


with pm.Model() as amen_model:
    #intercept
    alpha = pm.Normal('alpha', mu=0., sigma=5)
with amen_model:

    # node terms
    sigma_nodes = pm.HalfCauchy.dist(beta=2.5, shape = 2 )

    corr_nodes, Rho_nodes, sigmas_nodes_est = pm.LKJCholeskyCov(
        "corr_nodes", n=2, eta=5, sd_dist=sigma_nodes, compute_corr=True)
        
    
    z_nodes = pm.Normal('z_nodes', mu=0., sigma=1, shape = (2, n_nodes))
    mean_nodes = pm.Deterministic('mean_nodes', tt.dot(corr_nodes, z_nodes).T)
    # multi-effect terms
    z_multi_effects = pm.Normal(
        'z_multi_effects', mu=0., sigma=1, shape = (cov_dims, n_nodes))

    sigma_multi_effects = pm.HalfCauchy.dist(beta=2.5, shape = cov_dims )

    corr_multi_effects, Rho_multi, sigmas_multis = pm.LKJCholeskyCov(
        "corr_multi", n=cov_dims, eta=5, sd_dist=sigma_multi_effects, compute_corr=True)    
    mean_multi_effects = pm.Deterministic(
        'mean_multi_effects', tt.dot(corr_multi_effects, z_multi_effects).T)
    y_star = pm.Deterministic('y_star', alpha + \
    mean_nodes[sender_full, 0] + \
    mean_nodes[receiver_full, 1] + \
        tt.dot(mean_multi_effects[sender_full, 0:K], mean_multi_effects[receiver_full, K:K*2].T).sum(axis = 1))
    # Model error
    eps = pm.HalfCauchy('eps', beta = 3)
    # Data likelihood
    y_likelihood = pm.Normal('y_likelihood', mu=y_star, sigma=eps, observed=Y)



with amen_model:
    mean_field = pm.fit(20000, method='advi', 
    more_replacements={Y: y, sender_full:sender, receiver_full:receiver}, 
    callbacks=[pm.callbacks.CheckParametersConvergence(diff='relative', tolerance = 1e-2)])

and now, the PPC shapes are the correct size.

3 Likes

Well done, and thanks for posting the solution :clap:

1 Like