Consider the code below:
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
import pandas as pd
import pytensor.tensor as tt
def generate_input_data(timerange):
import random
return np.array([random.uniform(0.5, 0.7) for x in range(len(timerange))]).reshape(-1, 1)
def get_coefficient_val(date, coefficientknots):
coefficientval = 0
for knot, val in coefficientknots.items():
lb, ub = map(int, knot.split("-"))
if lb <= date.day <= ub:
coefficientval = val
break
return coefficientval
def get_exponent_val(date, exponentknots):
exponentval = 1.0
for exponentknot, val in exponentknots.items():
lb, ub = map(int, exponentknot.split("-"))
if lb <= date.day <= ub:
exponentval = val
break
return exponentval
def compute_true_values(timerange, coefficientknots, exponentknots):
true_values = []
for idx, date in enumerate(timerange):
coefficientval = get_coefficient_val(date, coefficientknots)
exponentval = get_exponent_val(date, exponentknots)
if coefficientval == 0 or exponentval == 1.0:
raise ValueError("Invalid coefficient or exponent value")
val = generate_input_data([date])[0]
entry = coefficientval * val ** exponentval
true_values.append(entry)
return np.array(true_values)
def main():
timerange = pd.date_range(start="2023-01-01", end="2023-07-24", freq='D')
coefficientknots = {'0-15': 2.0, '15-32': 3.0}
exponentknots = {'0-15': 0.9, '15-32': 0.3}
data = generate_input_data(timerange)
true_values = compute_true_values(timerange, coefficientknots, exponentknots)
# Gather stuff into a dataframe for organization
df = pd.DataFrame(np.c_[data, true_values], index=timerange, columns=['data', 'targets'])
df['day'] = df.index.day
# Create the map by assiging a bin index to each datapoint based on which day it falls into
knot_cutpoints = [0, 15, 32]
df['knot_idx'] = pd.cut(df['day'], knot_cutpoints, labels=False)
# Train-test split includes the knot mapping
df_train = df.iloc[:140].copy()
df_test = df.iloc[140:].copy()
# These names will be used to make the model output look nicer
knot_names = [f'{a}-{b}' for a, b in zip(knot_cutpoints[:-1], knot_cutpoints[1:])]
# Coords let us refer to dimension by their name, rather than having to remember their shapes
coords = {'knot': knot_names}
with pm.Model(coords=coords) as model:
data_pt = pm.MutableData('input', df_train.data)
targets_pt = pm.MutableData(f'convresponse', df_train.targets)
# Here is the mapping; when we do out-of-sample prediction we'll update this as well
knot_idx_pt = pm.MutableData('knot_idx', df_train.knot_idx)
# Using coords, draw all the parameters for all knots at once
knot_coef = pm.Normal(f'knot_coef', 0, 1, dims=['knot'])
knot_exponent = pm.Gamma(f'knot_exponent', 3, 3, dims=['knot'])
# Use the mapping defined above to assign the correct coefficient to the
# correct observation
modelledresponse = tt.power(knot_coef[knot_idx_pt] * data_pt,
knot_exponent[knot_idx_pt])
sigma = pm.HalfCauchy(name="sigma", beta=10)
outcome = pm.Normal(name="outcome",
mu=modelledresponse,
sigma=sigma,
observed=targets_pt,
shape=data_pt.shape[0], # Important!
initval={'knot_exponent': [1, 1]}) # I found sometimes the exponents blow up
import pymc.sampling_jax as jax
idata = jax.sample_blackjax_nuts(1000,
tune=1000,
target_accept=0.97,
cores=2,
random_seed=0,
idata_kwargs={"log_likelihood": True},
return_inferencedata=True)
# idata = pm.sample()
with model:
idata = pm.sample_posterior_predictive(idata, extend_inferencedata=True)
pm.set_data({'input': df_test.data, 'knot_idx': df_test.knot_idx})
idata = pm.sample_posterior_predictive(idata, var_names=[x for x in idata.posterior],
extend_inferencedata=True, predictions=True)
print("_")
if __name__ == '__main__':
main()
I want to generate samples of the posterior of my parameters over the testdata, i.e given the code above where we have testdata of 60 datapoints and an model which is mapped up by knots to respective dates i want to get the posterior samples of my parameters over these 60 datapoints.
As i am doing it atm inspecting the predictions in idata we get a set of (4, 1000, 2) (cores, samples, coefficient value at knot).
I would like to get this mapped up over the horizon such that the set becomes (4, 1000, 60) (cores, samples, coefficient value at timestep) without manually mapping it up.
Since it is already mapped up in some sense i assume this can be done. I find it hard to formulate this question so if anyone has any questions please ask.