Dear all, Pymc newb here. I am trying to make infinite relational model.
We have a binary matrix here. We want to co-clustering using this generative model:
z | γ ~ Chinese Restaurant Process (γ)
η(a,b) ~ Beta(β,β)
R(i,j)| z, η~ Bernoulli(η(zi,zj)).
γ and η are the hyperparameters.
Here is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import pymc3 as pm
import theano.tensor as tt
import theano as T
plt.style.use("seaborn-darkgrid")
sns.set_context("paper")
def generate_data(K,alpha,beta):
w = np.random.beta(alpha,beta,size=(K,K))
groups = []
for i in range(K):
for j in range(K):
group = np.random.binomial(1,w[i,j],size = (10,10))
groups.append(group)
temp_mats = []
for i in range(K):
temp_mat = np.concatenate([groups[i*K+j] for j in range(K)],axis =1)
temp_mats.append(temp_mat)
mat = np.concatenate(temp_mats)
random_row_delete= np.random.randint(0,mat.shape[0],7)
random_col_delete= np.random.randint(0,mat.shape[1],7)
mat = np.delete(mat,random_row_delete,axis=0 )
mat = np.delete(mat,random_col_delete,axis=1 )
return mat,w
# The data is not shuffled yet
data,true_w = generate_data(3,0.5,0.5)
def stick_breaking(beta):
portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
return beta * portion_remaining
def cartesian_to_row_index(i,j,shape):
return i*shape[1] + j
with pm.Model() as model:
alpha = pm.Gamma('alpha', 1., 1.)
beta = pm.Beta('beta', 1., alpha, shape=K)
w = pm.Deterministic('w', stick_breaking(beta))
z = pm.Categorical("z",p = w,shape=data.shape)
groups = []
rho = pm.Beta("rho",1,1, shape = (K , K))
for i in range(data.shape[0]):
for j in range(data.shape[1]):
r = pm.Bernoulli("r%i"%(cartesian_to_row_index(i,j,data.shape)),p = rho[z[i,j]],observed = data[i,j])
trace = pm.sample(1000,tune = 1000)
Unfortunately, this model taking a long time even for initialize. Is there anything wrong with my model? How can I improve the speed of my model?