I’m kind of new at Bayesian Modeling and very new at Python. I’m trying to model the shifted beta geometric distribution (sBG) just like Daniel Weitzenfield did. This model was based in a paper published by Fader and Hardie. The problem is I can’t get closer to the same results as Daniel, which are very similar to the ones in the paper. He’s using pymc and I’m using pymc3 (using DensityDist function). My beta posterior distribution grows as far I the higher boundary I set. I may be scrweing something within the model context, but can’t figure it out. Any help will be appreciated!
%matplotlib inline
import pymc3 as pm
import numpy as np
import scipy.stats as stats
from scipy import optimize
import matplotlib.pyplot as plt
import pymc3 as pm
import theano.tensor as tt
from theano.tensor import _shared
example_data = np.array([1000, 869, 743, 653, 593, 551, 517, 491])
def n_lost(data):
lost = [None]
for i in range(1, len(data)):
lost.append(data[i - 1] - data[i])
return lost
example_data_n_lost = n_lost(example_data)
n = len(example_data)
data = np.asarray((example_data, example_data_n_lost))
with pm.Model() as model:
alpha = pm.Uniform('alpha', 0.00001, 1000.0, testval=1)
beta = pm.Uniform('beta', 0.00001, 1000.0, testval=1)
def P_T_is_t(alpha=alpha, beta=beta, num_periods=n):
p = np.array([None, alpha / (alpha + beta)])
for t in range(2, num_periods):
pt = ((beta + t - 2) / (alpha + beta + t - 1)) * p[t-1]
p = np.append(p,pt)
return p
def survival_function(num_periods=n):
p = P_T_is_t()
s = np.array([None, 1 - p[1]])
for t in range(2, num_periods):
s = np.append(s, s[t-1] - p[t])
return s
def logp(value):
active = value[0,:]
lost = value[1,:]
# Those who've churned along the way...
p = P_T_is_t()
died = np.multiply(np.log(p[1:]), lost[1:])
# and those still active in last period
sf = survival_function()
still_active = np.log(sf[-1]) * active[-1]
return sum(died) + still_active
retention = pm.DensityDist('retention', logp, observed=data)
trace = pm.sample(10000, step=pm.Metropolis())
burned_trace = trace[3000:]
burned_trace['alpha'].mean()
burned_trace['beta'].mean()