Hello,
I’m inferring latent parameters of a generalized linear mixed model. The data is a fraction of a categorical random field, which is dependent on three latent Gaussian random fields. I was following this example notebook. The sampler seems stuck at the initial position and does not move, while the CPU utilisation goes up to 100%.
Sampling: [choice, coeffs1, coeffs2, coeffs3, sof_x, sof_z, stddev]
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
Below is a reproducible code snippet. I’m running the script in Spyder on a Windows10 PC.
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
#%%
def generate_cov(stddev, coords, sof):
cov = np.zeros([coords.shape[0], coords.shape[0]])
distance = np.zeros([coords.shape[0], coords.shape[0], 2])
for i, coord in enumerate(coords):
distance[i, :, :] = coords - coord
cov = stddev**2 * np.exp(-np.pi*(distance[:, :, 0]**2/sof[1]**2+distance[:, :, 1]**2/sof[0]**2))
return cov
#%% Synthetic data
domain_x, domain_z = 80, 30
dx, dz = 1, 1
x_coords, z_coords = np.arange(0, domain_x, dx), np.arange(0, domain_z, dz)
xpos, zpos = np.meshgrid(x_coords, z_coords)
xpos, zpos = np.atleast_2d(xpos.ravel()).T, np.atleast_2d(zpos.ravel()).T
Coords = np.hstack([xpos, zpos])
nxpoints, nzpoints = x_coords.size, z_coords.size
ncells = nxpoints*nzpoints
dd = np.zeros([ncells, 4])+100
dd[:, 0] = np.tile(x_coords, z_coords.size)
dd[:, 2] = np.repeat(z_coords, x_coords.size)
dd[10::x_coords.size, -1] = [0]*2+[1]*17+[2]*4+[100]*7
dd[40::x_coords.size, -1] = [0]*7+[1]*8+[2]*13+[100]*2
dd[70::x_coords.size, -1] = [0]*5+[1]*12+[2]*6+[100]*7
obs_idx = np.array(np.where(dd[:, -1]!=100))
data = pd.DataFrame(dd, columns =['X', 'Y', 'Dep', 'Code'])
data = data.astype({'Code': int})
#%% Model
K = 3
n_kl = 100
Beta = np.zeros(3)
sof_x_mu, sof_x_std = 30, 5
sof_z_mu, sof_z_std = [5,5,5], [1,1,1]
observed1 = np.array(data[data.Code!=100].Code, dtype=int)
coords1 = {'choices': ['type1', 'type2', 'type3'], 'obs': range(observed1.size),}
with pm.Model(coords=coords1) as model:
sof_x = pm.Normal('sof_x', sof_x_mu, sof_x_std, shape=K)
sof_z = pm.Normal('sof_z', sof_z_mu, sof_z_std, shape=K)
stddev = pm.HalfCauchy('stddev', beta=1)
cov1 = generate_cov(stddev, Coords, sof=[sof_z[0], sof_x[0]])
cov2 = generate_cov(stddev, Coords, sof=[sof_z[1], sof_x[1]])
cov3 = generate_cov(stddev, Coords, sof=[sof_z[2], sof_x[2]])
# K-L expansion of covariance matrices
eigvals1, eigvecs1 = pt.linalg.eigh(cov1) # eigvals is in ascending order, keep the last n_kl columns of eigvecs
eigvals2, eigvecs2 = pt.linalg.eigh(cov2)
eigvals3, eigvecs3 = pt.linalg.eigh(cov3)
eigvals1 = eigvals1[-n_kl:]
eigvals2 = eigvals2[-n_kl:]
eigvals3 = eigvals3[-n_kl:]
eigvecs1_scaled = eigvecs1[:, -n_kl:] * np.sqrt(eigvals1)
eigvecs2_scaled = eigvecs2[:, -n_kl:] * np.sqrt(eigvals2)
eigvecs3_scaled = eigvecs3[:, -n_kl:] * np.sqrt(eigvals3)
# sample proposal K-L coefficients
coeffs1 = pm.Normal('coeffs1', 0, 1, shape=n_kl)
coeffs2 = pm.Normal('coeffs2', 0, 1, shape=n_kl)
coeffs3 = pm.Normal('coeffs3', 0, 1, shape=n_kl)
# construct latent utility fields based on sampeld prior parameters and KL coeffs
u1 = pt.sum(eigvecs1_scaled * coeffs1, axis=1) + Beta[0]
u2 = pt.sum(eigvecs2_scaled * coeffs2, axis=1) + Beta[1]
u3 = pt.sum(eigvecs3_scaled * coeffs3, axis=1) + Beta[2]
s = pm.math.stack([u1, u2, u3]).T
s1 = s[obs_idx].squeeze()
p_ = pm.Deterministic('p', pm.math.softmax(s1, axis=1), dims=('obs', 'choices'))
# likelihood
choice = pm.Categorical('choice', p=p_, observed=observed1, dims='obs')
# sampling
idata = pm.sample_prior_predictive(cores=1)
idata.extend(pm.sample(nuts_sampler="numpyro", idata_kwargs={"log_likelihood": True}, random_seed=101))
idata.extend(pm.sample_posterior_predictive(idata))
The model uses K-L expansion to simulate Gaussian random fields, for which I need to calculate covariance matrices and then factorize them to get eigenvalues and eigenvectors.
I’ve tried the suggestion in here by setting cores=1
in pm.sample()
, but the sampling process seems still stuck.
Auto-assigning NUTS sampler…
Initializing NUTS using jitter+adapt_diag…
Sequential sampling (2 chains in 1 job)
NUTS: [sof_x, sof_z, stddev, coeffs1, coeffs2, coeffs3]
Sampling chain 0, 0 divergences ---------------------------------- 0% -:–:–