# How to create a Mixture of First-order Markov Models

Dear all,

As I am diving to pymc3 for the first time, I would like to create a mixture of first-order markov models but I am stuck on how to combine a Mixture distribution with a class Markov model created based on the link here: pym3 and hmms and here: pymc3 and lda

My data consists of a set of temporal sequences of different length (each sequence has between 1 and 20 observations). Thea idea is simple: I would like to model each sequence as a weighted mixture model of first order markov models and use a latent cluster variable c_k to model similar sequences.

To be specific, the probability of each sequence x should be modeled as:

P(x|\theta) = \sum_{k=1}^{K} w_{c_k} p(x|c_k,\theta)

The probability of each sequence p(x|c_k,\theta) is modeled as a first-order markov model:

p(x|c_k,A) = p(x_1|\theta_k) \prod_{i=2}^{L} p(x_i|x_{-1},A)

And the probability of the observed data should be the sum of likelihood of all the sequences.
The weights w_{c_k} are cluster proportions w_{c_k} to be learned by the model (each sequence has its own cluster proportions). The matrix A represents is the transition matrix between all possible values of x_i and is the same for all the data.

My model (and a toy sequence) so far are:

%matplotlib inline
import numpy as np
import keras
import pymc3 as pm
import theano.tensor as tt
import matplotlib.pyplot as plt
import theano.tensor.slinalg as sla
from pymc3.distributions.transforms import t_stick_breaking
from pymc3 import math as pmmath

num_states = 6 # all potential values of x
num_clusters = 3 # hidden clusters
max_len_seq = 5
num_seqs = 7

sequences = [[1, 2, 2, 3 ], [1, 4, 5], [4, 5, 2], [2, 2, 2], [4, 4, 4, 4], [1, 4, 4],[1, 4, 4]]

class MarkovModel(pm.Categorical):
"""
Markov Model
Parameters
----------
P : tensor
transition probability
shape = (N_states,N_states)

PA : tensor
equilibrium probabilities
shape = (N_states)

"""

def __init__(self, PA=None, P=None,
*args, **kwargs):
super(pm.Categorical, self).__init__(*args, **kwargs)
self.P = P
self.PA = PA
#self.k = N_states
self.mode = tt.cast(0,dtype='int64')
self.mean = tt.cast(0,dtype='int64')

def logp(self, x):
P = self.P
PA = self.PA

# calculate equilibrium

# now we need to create an array with probabilities
# so that for x=A: PA=P1, PB=(1-P1)
# and for x=B: PA=P2, PB=(1-P2)
#P = tt.switch(x[:-1],P1,P2)

PS = P[x[:-1]]

x_i = x[1:]
ou_like = pm.Categorical.dist(PS).logp(x_i)
return pm.Categorical.dist(PA).logp(x[0]) + tt.sum(ou_like)

def log_data(w,distr):
def log_data_f(x):
ou_like = pm.Mixture.dist(w=w,comp_dists=distr).logp(x)
return tt.sum(ou_like)

return log_data_f

with pm.Model() as model:

# transition probabilities
P = pm.Dirichlet('P', a=np.ones((num_states,num_states)), shape=(num_states,num_states))
AA = tt.dmatrix('AA')
AA = tt.eye(num_states) - P + tt.ones(shape=(num_states,num_states))
PA = pm.Deterministic('PA',sla.solve(AA.T,tt.ones(shape=(num_states))))
# prob of cluster for each sequence
theta = pm.Dirichlet('theta', a= pm.floatX((1.0 / num_clusters) * np.ones((num_seqs, num_clusters))),
shape=(num_seqs, num_clusters))

components = MarkovModel.dist(PA=PA, P=P, shape=num_seqs)
obs = pm.DensityDist('obs', log_data(w=theta,distr=components), observed=preproc_seqs)


The code gives the error: “index must be integers”, but I don’t know why. I’d appreciate for help and hints on how:

1. Combining a mixture model and a markov model
2. Alternative ways to deal with data sequences of different lenth. Many thanks!

TypeError Traceback (most recent call last)
in ()
86 #components = pm.Poisson.dist(mu=lam, shape=(num_seqs))
87
—> 88 obs = pm.DensityDist(‘obs’, log_data(w=theta,distr=components), observed=preproc_seqs)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/distribution.pyc in new(cls, name, *args, **kwargs)
35 total_size = kwargs.pop(‘total_size’, None)
36 dist = cls.dist(*args, **kwargs)
—> 37 return model.Var(name, dist, data, total_size)
38 else:
39 raise TypeError(“Name needs to be a string but got: {}”.format(name))

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/model.pyc in Var(self, name, dist, data, total_size)
830 var = ObservedRV(name=name, data=data,
831 distribution=dist,
–> 832 total_size=total_size, model=self)
833 self.observed_RVs.append(var)
834 if var.missing_values:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/model.pyc in init(self, type, owner, index, name, data, distribution, total_size, model)
1286
1287 self.missing_values = data.missing_values
-> 1288 self.logp_elemwiset = distribution.logp(data)
1289 # The logp might need scaling in minibatches.
1290 # This is done in Factor.

in log_data_f(x)
58 def log_data(w,distr):
59 def log_data_f(x):
—> 60 ou_like = pm.Mixture.dist(w=w,comp_dists=distr).logp(x)
61 return tt.sum(ou_like)
62

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/mixture.pyc in logp(self, value)
143 w = self.w
144
–> 145 return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
146 w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1),

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/mixture.pyc in comp_logp(self, value)
109 value
= value if value.ndim > 1 else tt.shape_padright(value)
110
–> 111 return comp_dists.logp(value_)
112 except AttributeError:
113 return tt.squeeze(tt.stack([comp_dist.logp(value)

in logp(self, x)
49 #P = tt.switch(x[:-1],P1,P2)
50
—> 51 PS = P[x[:-1]]
52
53 x_i = x[1:]

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/var.pyc in getitem(self, args)
568 TensorVariable, TensorConstant,
569 theano.tensor.sharedvar.TensorSharedVariable))):
–> 570 return self.take(args[axis], axis)
571 else:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/var.pyc in take(self, indices, axis, mode)
612
613 def take(self, indices, axis=None, mode=‘raise’):
–> 614 return theano.tensor.subtensor.take(self, indices, axis, mode)
615
616 # COPYING

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in take(a, indices, axis, mode)
2453 [a.shape[:axis], indices.shape, a.shape[axis + 1:]])
2454 ndim = a.ndim + indices.ndim - 1
-> 2455 return take(a, indices.flatten(), axis, mode).reshape(shape, ndim)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in take(a, indices, axis, mode)
2430 elif axis == 0:
2432 else:
2433 if axis < 0:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/gof/op.pyc in call(self, *inputs, **kwargs)
613 “”"
614 return_list = kwargs.pop(‘return_list’, False)
–> 615 node = self.make_node(*inputs, **kwargs)
616
617 if config.compute_test_value != ‘off’:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in make_node(self, x, ilist)
1701 ilist_ = theano.tensor.as_tensor_variable(ilist)
1702 if ilist_.type.dtype not in theano.tensor.integer_dtypes:
-> 1703 raise TypeError(‘index must be integers’)
1704 if ilist_.type.ndim != 1:
1705 raise TypeError(‘index must be vector’)

TypeError: index must be integers

I have reprhased the question here: How to treat array observations in a custom loglikelihood