Thank you very much, that makes sense and is the same fix they apply in the R-package I linked above.
In principle, the reason why we see some observations with frequency x = 0 – therefore also monetary value m_x = 0 – is that, when computing these quantities, the first transaction is discarded. In other words, by enforcing x and m_x not zero, we will not train the model over any customer with a single transaction.
Thanks @jlindbloom !
The new code is:
import pymc3 as pm
import numpy as np
import pandas as pd
from lifetimes.datasets import load_dataset
from lifetimes.utils import summary_data_from_transaction_data
from pymc3.math import log
from pymc3.distributions.special import gammaln
cdnow_transactions = load_dataset(
'CDNOW_sample.txt',
header=None,
delim_whitespace=True,
names=['customer_id', 'customer_index', 'date', 'quantity', 'amount'],
converters={'date': lambda x: pd.to_datetime(x, format="%Y%m%d")}
)
rfm = summary_data_from_transaction_data(
cdnow_transactions,
'customer_id',
'date',
monetary_value_col='amount',
observation_period_end=pd.to_datetime('1997-09-30'),
freq='W'
)
class GammaGamma(pm.Continuous):
"""
Custom distribution class for Gamma Gamma likelihood.
"""
def __init__(self, p, q, gamma, *args, **kwargs):
super(GammaGamma, self).__init__(*args, **kwargs)
self.p = p
self.q = q
self.gamma = gamma
def logp(self, x, m_x):
"""
Source
https://github.com/CamDavidsonPilon/lifetimes/blob/master/lifetimes/fitters/gamma_gamma_fitter.py
and
https://github.com/bachmannpatrick/CLVTools/blob/master/src/gg_LL.cpp
"""
negative_log_likelihood_values = (
gammaln(self.p * x + self.q)
- gammaln(self.p * x)
- gammaln(self.q)
+ self.q * log(self.gamma)
+ (self.p * x - 1) * log(m_x)
+ (self.p * x) * log(x)
- (self.p * x + self.q) * log(x * m_x + self.gamma)
)
return -negative_log_likelihood_values.sum()
# Extract data for model following notation from Fader/Hardie
non_zero_slice = (rfm.frequency != 0) & (rfm.monetary_value != 0)
N = rfm.loc[non_zero_slice].shape[0] # number of customers
x = rfm.loc[non_zero_slice].frequency.values
m_x = rfm.loc[non_zero_slice].monetary_value.values
n_draws = 2000
with pm.Model() as gg_model:
# Uninformative priors on model hyperparameters see Polson and Scott
# https://projecteuclid.org/download/pdfview_1/euclid.ba/1354024466
p = pm.HalfCauchy('p', beta=2)
q = pm.HalfCauchy('q', beta=2)
gamma = pm.HalfCauchy('gamma', beta=2)
v = pm.Gamma('v', alpha=q, beta=gamma, shape=N, testval=np.random.rand(N))
z_bar = pm.Gamma('z_bar', alpha=p * x, beta=v * x, shape=N, testval=np.random.rand(N))
# Custom distribution for GammaGamma likelihood function
loglikelihood = GammaGamma("loglikelihood", p=p, q=q, gamma=gamma, observed={'x': x, 'm_x': m_x})
with gg_model:
trace = pm.sample(n_draws, tune=2000, init=None, cores=8, chains=2, pickle_backend='dill')
One further question that comes to mind is, is it possible to apply such model to customers with a single or no purchases?