Hi,
I’m trying to estimate the parameters of a Hidden Markov Model with a beta emission function (using pymc3, version 3.11.4).
Install packages
import numpy as np
import matplotlib.pyplot as plt
import theano.tensor as tt
from scipy.stats import multinomial, beta
from scipy.special import logsumexp
import theano
import pymc3 as pm
Generate example Data
def equilibrium_distribution(p_transition):
"""This implementation comes from Colin Carroll"""
n_states = p_transition.shape[0]
A = np.append(
arr=p_transition.T - np.eye(n_states),
values=np.ones(n_states).reshape(1, -1),
axis=0
)
# Moore-Penrose pseudoinverse = (A^TA)^{-1}A^T
pinv = np.linalg.pinv(A)
# Return last row
return pinv.T[-1]
def markov_sequence(p_init: np.array, p_transition: np.array, sequence_length: int) -> List[int]:
"""
Generate a Markov sequence based on p_init and p_transition.
"""
if p_init is None:
p_init = equilibrium_distribution(p_transition)
initial_state = list(multinomial.rvs(1, p_init)).index(1)
states = [initial_state]
for _ in range(sequence_length - 1):
p_tr = p_transition[states[-1]]
new_state = list(multinomial.rvs(1, p_tr)).index(1)
states.append(new_state)
return states
def beta_emissions(states, A, B):
emissions = []
for state in states:
a = A[state]
b = B[state]
e = beta.rvs(a,b)
emissions.append(e)
return emissions
def plot_emissions(states, emissions):
fig, axes = plt.subplots(figsize=(16, 8), nrows=2, ncols=1, sharex=True)
axes[0].plot(states)
axes[0].set_title("States")
axes[1].plot(emissions)
axes[1].set_title("Emissions")
sns.despine();
# generate data
## example parameters
A = [2, 5, 10]
B = [5, 1, 1]
p_init = np.array([0.33, 0.33, 0.33])
p_transition = np.array(
[[0.9, 0.05, 0.05],
[0.05, 0.9, 0.05],
[0.01, 0.01, 0.98]]
)
states = markov_sequence(p_init, p_transition, sequence_length=1000)
beta_ems = beta_emissions(states, A, B)
plot_emissions(states, beta_ems)
plt.show()
Fit Model
observed_label = theano.shared(np.array(beta_ems))
theta = p_transition
N_states = theta.shape[0]
def onestep(obs, gamma_, theta, A, B):
i = tt.cast(obs, 'int32')
alpha = gamma_ + tt.log(theta) + tt.tile(pm.Beta.dist(alpha=A, beta=B).logp(i), (theta.shape[0], 1))
return pm.math.logsumexp(alpha, axis=0).T
T = len(beta_ems)
with pm.Model() as model:
Pt = pm.Dirichlet('P_transition',
a=np.ones((N_states, N_states)),
shape=(N_states, N_states),
testval=theta)
A = pm.Exponential("A", lam=0.01, shape=(N_states,), testval = A)
B = pm.Exponential("B", lam=0.01, shape=(N_states,), testval = B)
gamma = pm.Beta.dist(alpha=A, beta=B).logp(beta_ems[0])
gamma = tt.tile(gamma, (1, 1)).T
result, updates = theano.scan(fn=onestep,
outputs_info=gamma,
sequences=observed_label,
non_sequences=[Pt, A, B],
n_steps=T-1)
obs_logp = pm.Potential('obs_logp', pm.math.logsumexp(result[-1]))
trace = pm.sample(1000, tune=1000, init='advi+adapt_diag')
I obtain the following error message:
Error Msg
Auto-assigning NUTS sampler…
Initializing NUTS using advi+adapt_diag…
0.00% [0/200000 00:00<00:00]
FloatingPointError Traceback (most recent call last)
in ()
42 obs_logp = pm.Potential(‘obs_logp’, pm.math.logsumexp(result[-1]))
43
—> 44 trace = pm.sample(1000, tune=1000, init=‘advi+adapt_diag’)
45
4 frames
/usr/local/lib/python3.7/dist-packages/pymc3/variational/inference.py in _iterate_with_loss(self, s, n, step_func, progress, callbacks)
236 except IndexError:
237 pass
→ 238 raise FloatingPointError("\n".join(errmsg))
239 scores[i] = e
240 if i % 10 == 0:
FloatingPointError: NaN occurred in optimization.
The current approximation of RV A_log__
.ravel()[0] is NaN.
The current approximation of RV A_log__
.ravel()[1] is NaN.
The current approximation of RV A_log__
.ravel()[2] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[0] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[1] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[2] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[3] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[4] is NaN.
The current approximation of RV P_transition_stickbreaking__
.ravel()[5] is NaN.
The current approximation of RV B_log__
.ravel()[0] is NaN.
The current approximation of RV B_log__
.ravel()[1] is NaN.
The current approximation of RV B_log__
.ravel()[2] is NaN.
Try tracking this parameter: http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters
Any ideas what I’m doing wrong or how I could debug?