Hi Community,
I am trying to implement a custom likelihood function and need to iterate over individual values of the observed output variable. Need to use these values as an end argument to arange function. I am trying to use pytensor.scan to achieve the same within the LogP function.
I’m a newbie to PyMC, can somebody help me with this?
Input to logP function -
time_to_event.shape
TensorConstant{(1,) of 50}
[24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24., 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24. 24., 24. 24. 24. 17. 2. 6. 14. 23. 16. 7. 20. 20. 12. 10.]
def standardize(x, mu, sigma):
return (x - mu) / sigma
def standardNormCdf(x):
return 0.5 + 0.5 * pm.math.erf(x / pm.math.sqrt(2))
def exponetiateDotProduct(x, y):
return exp(x * y)
def getContributionFromInterval(interval, mu, sigma):
print('getContributionFromInterval')
logT = log(interval)
a = standardize(logT, mu, sigma)
return log(1 - standardNormCdf(a))
def getSurvival(time_to_event,mu,sigma,theta):
survival = 0
for interval in pt.arange(1,time_to_event):
logT = log(interval)
a = standardize(logT)
features = data[getListOfFeatures()]
A = (-1) * getContributionFromInterval(interval, mu, sigma)
B = getContributionFromInterval(interval - 1, mu, sigma)
survival += exponetiateDotProduct(theta, features) * (A + B)
return survival
def runModel():
bcphm_model = pm.Model()
with bcphm_model:
data = pd.read_csv('data/prevcovid_SF.csv')
data = data.head(50)
data.to_csv('first50.csv', index=False)
test_columns = ['time_to_default', 'time_to_prepayment', 'DTI']
data = data[test_columns]
data['event'] = data.apply(
lambda x: 'default' if x.time_to_default > 0 else ('prepayment' if x.time_to_prepayment > 0 else 'active'),
axis=1)
data['time_to_event'] = data.apply(
lambda x: x.time_to_default if x.time_to_default > 0 else (
x.time_to_prepayment if x.time_to_prepayment > 0 else 24), axis=1)
data.sort_values(by=['event'], inplace=True)
events = np.where(data.event.values == "default", 0, np.where(data.event.values == "prepayment", 1, 2))
time_to_event = data.time_to_event.values
features = data['DTI'].values
# Define the priors for 𝜭D and 𝜭D
# Zero mean normal prior with standard deviation 100
theta_D = pm.Normal('theta_D', mu=0, sigma=100)
theta_P = pm.Normal('theta_P', mu=0, sigma=100)
# Define the priors for μD and μP
# Zero mean normal prior with standard deviation 10
mu_D = pm.Normal('mu_D', mu=0, sigma=10)
mu_P = pm.Normal('mu_P', mu=0, sigma=10)
# Define the priors for σD and σP
# Exponential with mean = 100
sigma_D = pm.Exponential('sigma_D', 100)
sigma_P = pm.Exponential('sigma_P', 100)
def logp(time_to_event, mu_P, mu_D, sigma_P, sigma_D, theta_D, theta_P, event, features):
failureRate = where(
pt.eq(event, 0),
computeFailureRate(sigma_D, mu_D, time_to_event, theta_D, features),
where(pt.eq(event, 1),
computeFailureRate(sigma_P, mu_P, time_to_event, theta_P, features),
1)
)
a, b = pytensor.scan(getSurvival, sequences=[time_to_event], outputs_info=None)
print(a.eval())
return time_to_event
likelihood = pm.CustomDist('LL', mu_P, mu_D, sigma_P, sigma_D, theta_D, theta_P, events, features,
logp=logp,
observed=time_to_event)
step = pm.Metropolis()
trace = pm.sample(5, step=step)
if __name__ == '__main__':
runModel()