Any case studies comparing HMC, VI, and Marginal Maximum Likelihood?

I tweeted something on this some time ago:


Codes below for the lazy:

"""
https://twitter.com/junpenglao/status/928206574845399040
"""
import pymc3 as pm
import numpy as np
import matplotlib.pylab as plt

L = np.array([[2, 1]]).T
Sigma = L.dot(L.T) + np.diag([1e-2, 1e-2])
L_chol = np.linalg.cholesky(Sigma)

with pm.Model() as model:
    y = pm.MvNormal('y', mu=np.zeros(2), chol=L_chol, shape=2)
    tr0 = pm.sample(500, chains=1)
    tr1 = pm.fit(method='advi').sample(500)
    tr2 = pm.fit(method='fullrank_advi').sample(500)
    tr3 = pm.fit(method='svgd').sample(500)


plt.figure()
plt.plot(tr0['y'][:,0], tr0['y'][:,1], 'o', alpha=.1, label='NUTS')
plt.plot(tr1['y'][:,0], tr1['y'][:,1], 'o', alpha=.1, label='ADVI')
plt.plot(tr2['y'][:,0], tr2['y'][:,1], 'o', alpha=.1, label='FullRank')
plt.plot(tr3['y'][:,0], tr3['y'][:,1], 'o', alpha=.1, label='SVGD')
plt.legend();


"""
https://twitter.com/junpenglao/status/930826259734638598
"""
import matplotlib.pylab as plt
from mpl_toolkits import mplot3d
import numpy as np
import pymc3 as pm

def cust_logp(z):
    return -(1.-z[0])**2 - 100.*(z[1] - z[0]**2)**2

grid = np.mgrid[-2:2:100j,-1:3:100j]
Z = -np.asarray([cust_logp(g) for g in grid.reshape(2, -1).T])
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(grid[0], grid[1], Z.reshape(100,100), cmap='viridis',
                       linewidth=0, antialiased=False)

with pm.Model():
    pm.DensityDist('pot1', logp=cust_logp, shape=(2,))
    tr1 = pm.sample(500, step=pm.NUTS())['pot1']
    tr2 = pm.sample(500, step=pm.Metropolis())['pot1']
    tr3 = pm.fit (n=50000, method='fullrank_advi').sample(500)['pot1'] #VI, cause whynot

import matplotlib.pylab as plt
_, ax = plt.subplots(1,3,figsize=(15,5), sharex=True, sharey=True)
ax[0].imshow(Z.reshape(100,100), extent=[-1,3,-2,2,]);
ax[0].plot(tr1[:,1], tr1[:,0], 'ro-',alpha=.1)
ax[1].imshow(Z.reshape(100,100), extent=[-1,3,-2,2,]);
ax[1].plot(tr2[:,1], tr2[:,0], 'ro-',alpha=.1)
ax[2].imshow(Z.reshape(100,100), extent=[-1,3,-2,2,]);
ax[2].plot(tr3[:,1], tr3[:,0], 'ro', alpha=.1)
plt.tight_layout()

with pm.Model():
    pm.DensityDist('pot1', logp=cust_logp, shape=(2,))
    minimal=pm.find_MAP()

You can also find a case study in https://github.com/junpenglao/GLMM-in-Python for hierarchical linear models, especially https://github.com/junpenglao/GLMM-in-Python/blob/master/pymc3_different_sampling.py comparing different fitting in PyMC3.

Something I would like to point out is that, even though sometimes the estimation of regression coefficients seems to differ among different samplers, the fitting and prediction are actually very close.

1 Like