Index out of bounds Error

I’m new and trying to run a bayesian model for the first time. I’m getting an index out of bounds error and for the life of me, I can’t figure out why.

import pandas as pd
import numpy as np
import scipy.stats as stats
import seaborn as sns
from pymc3 import Model, Normal, HalfNormal, Poisson, find_MAP, NUTS, sample, summary, traceplot
import theano 

#create fake data
n = 1000
y = np.random.poisson(lam = 1, size = n)
lower = 5000
upper = 150000
mu, sigma = 60000, 30000
X = stats.truncnorm(
    (lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
value = X.rvs(1000)
married = ['m', 's', 's', 'm', 'm']
marital_status = married * 200
credit = ['GF', '375', '400', '425', '450', '475', '500', '525', '550', '575', '600', '625', '650',
         '675', '700', '725', '750', '775', '800', 'T', '575', '600', '625', '650',
         '675', '700', '725', '750', '775', '800', '750', '775', '800', '750', '775', '800'
          ,'775', '800', '775', '800', '700', '725', '750', '775', '800', 'T', '575', '600', '625',
          '650']
credit_group = credit * 20
df = pd.DataFrame(columns = ['claims'], data = y)
df['value'] = value
df['marital_status'] = marital_status
df['credit_group'] = credit_group

marital_dummies = pd.get_dummies(df['marital_status'], drop_first=True)
credit_dummies = pd.get_dummies(df['credit_group'], drop_first=True)

df['single_status'] = marital_dummies
df = pd.concat([df, credit_dummies], axis = 1)
df.drop(['marital_status', 'credit_group'], axis = 1, inplace = True)
df.head()

value_shared = theano.shared(df['value'].values)
single_shared = theano.shared(df['single_status'].values)
x3_shared = theano.shared(df['400'].values)
x4_shared = theano.shared(df['425'].values)
x5_shared = theano.shared(df['450'].values)
x6_shared = theano.shared(df['475'].values)
x7_shared = theano.shared(df['500'].values)
x8_shared = theano.shared(df['525'].values)
x9_shared = theano.shared(df['550'].values)
x10_shared = theano.shared(df['575'].values)
x11_shared = theano.shared(df['600'].values)
x12_shared = theano.shared(df['625'].values)
x13_shared = theano.shared(df['650'].values)
x14_shared = theano.shared(df['675'].values)
x15_shared = theano.shared(df['700'].values)
x16_shared = theano.shared(df['725'].values)
x17_shared = theano.shared(df['750'].values)
x18_shared = theano.shared(df['775'].values)
x19_shared = theano.shared(df['800'].values)
x20_shared = theano.shared(df['GF'].values)
x21_shared = theano.shared(df['T'].values)

np.random.seed(123)

poisson_model = Model()

with poisson_model:
    
    #priors for unknown model paramters
    alpha = Normal('alpha', mu = 0, sd = 1)
    beta = Normal('beta', mu = 0, sd = 1, shape = 2)
    sigma = HalfNormal('sigma', sd = 1)
    x1 = single_shared
    x2 = value_shared
    x3 = x3_shared
    x4 = x4_shared
    x5 = x5_shared
    x6 = x6_shared
    x7 = x7_shared
    x8 = x8_shared
    x9 = x9_shared
    x10 = x10_shared
    x11 = x11_shared
    x12 = x12_shared
    x13 = x13_shared
    x14 = x14_shared
    x15 = x15_shared
    x16 = x16_shared
    x17 = x17_shared
    x18 = x18_shared
    x19 = x19_shared
    x20 = x20_shared
    x21 = x21_shared
    
    mu = alpha + beta[0]*alpha + beta[1]*x1 + beta[2]*x2 + beta[3]*x3 + beta[4]*x4 + beta[5]*x5 + beta[6]*x6+ beta[7]*x7+ beta[8]*x8+ beta[9]*x9+ beta[10]*x10+ beta[11]*x11+ beta[12]*x12 + beta[13]*x13+ beta[14]*x14+ beta[15]*x15+ beta[16]*x16+ beta[17]*x17+ beta[18]*x18+ beta[19]*x19+ beta[20]*x20+ beta[21]*x21
    
    claims = Poisson('claims', mu = mu, sigma = sigma, observed = df)
    
    start = find_MAP(fmin=optimize.fmin_powell)
    
    step = NUTS(scaling = start)
    
    trace = sample(1000, step, start = start)

Not sure why it is saying index is out of bounds.

What are you trying to do?

I’m trying to learn how to do a bayesian regression so I can compare to our standard GLM methods.

Hi @rosgori

I’ve changed my code and got rid of the index out of bounds by changing beta shape parameter to how many betas I’m using…22. Now I get an error that states chain 0 failed. I’ll research that and start another discussion if I have to. Thanks!