How to create a Mixture of First-order Markov Models

Dear all,

As I am diving to pymc3 for the first time, I would like to create a mixture of first-order markov models but I am stuck on how to combine a Mixture distribution with a class Markov model created based on the link here: pym3 and hmms and here: pymc3 and lda

My data consists of a set of temporal sequences of different length (each sequence has between 1 and 20 observations). Thea idea is simple: I would like to model each sequence as a weighted mixture model of first order markov models and use a latent cluster variable c_k to model similar sequences.

To be specific, the probability of each sequence x should be modeled as:

P(x|\theta) = \sum_{k=1}^{K} w_{c_k} p(x|c_k,\theta)

The probability of each sequence p(x|c_k,\theta) is modeled as a first-order markov model:

p(x|c_k,A) = p(x_1|\theta_k) \prod_{i=2}^{L} p(x_i|x_{-1},A)

And the probability of the observed data should be the sum of likelihood of all the sequences.
The weights w_{c_k} are cluster proportions w_{c_k} to be learned by the model (each sequence has its own cluster proportions). The matrix A represents is the transition matrix between all possible values of x_i and is the same for all the data.

My model (and a toy sequence) so far are:

%matplotlib inline
import numpy as np 
import keras
import pymc3 as pm
import theano.tensor as tt
import matplotlib.pyplot as plt
import theano.tensor.slinalg as sla
from pymc3.distributions.transforms import t_stick_breaking
from pymc3 import math as pmmath

num_states = 6 # all potential values of x
num_clusters = 3 # hidden clusters
max_len_seq = 5 
num_seqs = 7

sequences = [[1, 2, 2, 3 ], [1, 4, 5], [4, 5, 2], [2, 2, 2], [4, 4, 4, 4], [1, 4, 4],[1, 4, 4]]
preproc_seqs = keras.preprocessing.sequence.pad_sequences(sequences,maxlen=max_len_seq,padding='post')

class MarkovModel(pm.Categorical):
"""
Markov Model    
Parameters
----------
P : tensor
    transition probability
    shape = (N_states,N_states)
    
PA : tensor
     equilibrium probabilities
     shape = (N_states)

"""

def __init__(self, PA=None, P=None,
             *args, **kwargs):
    super(pm.Categorical, self).__init__(*args, **kwargs)
    self.P = P
    self.PA = PA
    #self.k = N_states
    self.mode = tt.cast(0,dtype='int64')
    self.mean = tt.cast(0,dtype='int64')

    def logp(self, x):
      P = self.P
      PA = self.PA
    
      # calculate equilibrium
    
      # now we need to create an array with probabilities
      # so that for x=A: PA=P1, PB=(1-P1)
      # and for x=B: PA=P2, PB=(1-P2)
      #P = tt.switch(x[:-1],P1,P2)
    
      PS = P[x[:-1]]
            
      x_i = x[1:]
      ou_like = pm.Categorical.dist(PS).logp(x_i)
      return pm.Categorical.dist(PA).logp(x[0]) + tt.sum(ou_like)


def log_data(w,distr):
    def log_data_f(x):
       ou_like = pm.Mixture.dist(w=w,comp_dists=distr).logp(x)              
       return tt.sum(ou_like)

    return log_data_f 



with pm.Model() as model:   

  # transition probabilities
  P = pm.Dirichlet('P', a=np.ones((num_states,num_states)), shape=(num_states,num_states))
  AA = tt.dmatrix('AA')
  AA = tt.eye(num_states) - P + tt.ones(shape=(num_states,num_states))
  PA = pm.Deterministic('PA',sla.solve(AA.T,tt.ones(shape=(num_states))))
  # prob of cluster for each sequence
  theta = pm.Dirichlet('theta', a= pm.floatX((1.0 / num_clusters) * np.ones((num_seqs, num_clusters))),
                 shape=(num_seqs, num_clusters))

  components = MarkovModel.dist(PA=PA, P=P, shape=num_seqs)
  obs = pm.DensityDist('obs', log_data(w=theta,distr=components), observed=preproc_seqs)

The code gives the error: “index must be integers”, but I don’t know why. I’d appreciate for help and hints on how:

  1. Combining a mixture model and a markov model
  2. Alternative ways to deal with data sequences of different lenth. Many thanks!

TypeError Traceback (most recent call last)
in ()
86 #components = pm.Poisson.dist(mu=lam, shape=(num_seqs))
87
—> 88 obs = pm.DensityDist(‘obs’, log_data(w=theta,distr=components), observed=preproc_seqs)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/distribution.pyc in new(cls, name, *args, **kwargs)
35 total_size = kwargs.pop(‘total_size’, None)
36 dist = cls.dist(*args, **kwargs)
—> 37 return model.Var(name, dist, data, total_size)
38 else:
39 raise TypeError(“Name needs to be a string but got: {}”.format(name))

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/model.pyc in Var(self, name, dist, data, total_size)
830 var = ObservedRV(name=name, data=data,
831 distribution=dist,
–> 832 total_size=total_size, model=self)
833 self.observed_RVs.append(var)
834 if var.missing_values:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/model.pyc in init(self, type, owner, index, name, data, distribution, total_size, model)
1286
1287 self.missing_values = data.missing_values
-> 1288 self.logp_elemwiset = distribution.logp(data)
1289 # The logp might need scaling in minibatches.
1290 # This is done in Factor.

in log_data_f(x)
58 def log_data(w,distr):
59 def log_data_f(x):
—> 60 ou_like = pm.Mixture.dist(w=w,comp_dists=distr).logp(x)
61 return tt.sum(ou_like)
62

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/mixture.pyc in logp(self, value)
143 w = self.w
144
–> 145 return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
146 w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1),
147 broadcast_conditions=False)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/pymc3/distributions/mixture.pyc in comp_logp(self, value)
109 value
= value if value.ndim > 1 else tt.shape_padright(value)
110
–> 111 return comp_dists.logp(value_)
112 except AttributeError:
113 return tt.squeeze(tt.stack([comp_dist.logp(value)

in logp(self, x)
49 #P = tt.switch(x[:-1],P1,P2)
50
—> 51 PS = P[x[:-1]]
52
53 x_i = x[1:]

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/var.pyc in getitem(self, args)
568 TensorVariable, TensorConstant,
569 theano.tensor.sharedvar.TensorSharedVariable))):
–> 570 return self.take(args[axis], axis)
571 else:
572 return theano.tensor.subtensor.advanced_subtensor(self, *args)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/var.pyc in take(self, indices, axis, mode)
612
613 def take(self, indices, axis=None, mode=‘raise’):
–> 614 return theano.tensor.subtensor.take(self, indices, axis, mode)
615
616 # COPYING

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in take(a, indices, axis, mode)
2453 [a.shape[:axis], indices.shape, a.shape[axis + 1:]])
2454 ndim = a.ndim + indices.ndim - 1
-> 2455 return take(a, indices.flatten(), axis, mode).reshape(shape, ndim)

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in take(a, indices, axis, mode)
2429 return advanced_subtensor1(a.flatten(), indices)
2430 elif axis == 0:
-> 2431 return advanced_subtensor1(a, indices)
2432 else:
2433 if axis < 0:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/gof/op.pyc in call(self, *inputs, **kwargs)
613 “”"
614 return_list = kwargs.pop(‘return_list’, False)
–> 615 node = self.make_node(*inputs, **kwargs)
616
617 if config.compute_test_value != ‘off’:

/Users/bieljoanisaac/Library/Python/2.7/lib/python/site-packages/theano/tensor/subtensor.pyc in make_node(self, x, ilist)
1701 ilist_ = theano.tensor.as_tensor_variable(ilist)
1702 if ilist_.type.dtype not in theano.tensor.integer_dtypes:
-> 1703 raise TypeError(‘index must be integers’)
1704 if ilist_.type.ndim != 1:
1705 raise TypeError(‘index must be vector’)

TypeError: index must be integers

I have reprhased the question here: How to treat array observations in a custom loglikelihood