I am attempting to construct a model for the function y = coefficient * input ** exponent, where both coefficient and exponent vary with time and are assumed to follow a Gaussian Process distribution. To ensure that the exponent only takes on values between 0 and 1, I use a sigmoid link function to compress the output from the GP into this valid range.
The simulated data have circular dynamics but is quite close to periodic as the discrepancy between frequencies is not too large.
I am trying to play around with this or at least get it to work, being an beginner with GP’s I run into trouble.
Consider the code below. The output is quite far from what I would expect, seems like the scale of the coefficient is off, also the output from the GP posteriors are basically 1, can anyone see what I am doing wrong?
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
import pandas as pd
def generate_input_data(timerange):
import random
return np.array([random.uniform(0.5, 0.7) for x in range(len(timerange))]).reshape(-1, 1)
def get_coefficient_val(date, coefficientknots):
coefficientval = 0
for knot, val in coefficientknots.items():
lb, ub = map(int, knot.split("-"))
if lb <= date.day <= ub:
coefficientval = val
break
return coefficientval
def get_exponent_val(date, exponentknots):
exponentval = 1.0
for exponentknot, val in exponentknots.items():
lb, ub = map(int, exponentknot.split("-"))
if lb <= date.day <= ub:
exponentval = val
break
return exponentval
def compute_true_values(timerange, coefficientknots, exponentknots):
true_values = []
for idx, date in enumerate(timerange):
coefficientval = get_coefficient_val(date, coefficientknots)
exponentval = get_exponent_val(date, exponentknots)
if coefficientval == 0 or exponentval == 1.0:
raise ValueError("Invalid coefficient or exponent value")
val = generate_input_data([date])[0]
entry = coefficientval * val ** exponentval
true_values.append(entry)
return np.array(true_values)
def run_model(timerange, input, true_values, coefficientknots, exponentknots, exp_cov=False):
with pm.Model() as model:
if exp_cov:
l = pm.Uniform("l", 0, 10)
covcoefficient = pm.gp.cov.ExpQuad(1, l)
gpcoefficient = pm.gp.Latent(cov_func=covcoefficient)
else:
ls_coeff = pm.Gamma('lscoeff', 4.0, 0.1)
period_coeff = pm.Gamma('period', 15, 7)
gpcoefficient = pm.gp.Latent(pm.gp.cov.Periodic(input_dim=1,
period=period_coeff,
ls=ls_coeff))
gpcoefficientprior = gpcoefficient.prior("gpcoefficientprior", input)
if exp_cov:
lsat = pm.Uniform("lsat", 0, 10)
covsat = pm.gp.cov.ExpQuad(1, lsat)
gpsat = pm.gp.Latent(cov_func=covsat)
else:
ls_sat = pm.Gamma('lssat', 4.0, 0.1)
period_sat = pm.Gamma('period_sat', 15, 1)
gpsat = pm.gp.Latent(pm.gp.cov.Periodic(input_dim=1,
period=period_sat,
ls=ls_sat))
gpsatprior = pm.math.sigmoid(gpsat.prior("gpsatprior", input))
f = gpcoefficientprior * input ** gpsatprior
sigma = pm.HalfCauchy("sigma", beta=5)
v = pm.Gamma("v", alpha=2, beta=0.1)
y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=v, observed=true_values)
trace = pm.sample(1000, chains=2, cores=1, return_inferencedata=True)
coefficient_means = {}
saturation_means = {}
for knot, val in coefficientknots.items():
lb, ub = (int(x) for x in knot.split("-"))
mask = (timerange.day >= lb) & (timerange.day <= ub)
posterior = pm.sample_posterior_predictive(trace, var_names=['gpcoefficientprior'], samples=1000)
coefficient_means[knot] = np.mean(posterior["gpcoefficientprior"][:, mask], axis=0)
for knot, val in exponentknots.items():
lb, ub = (int(x) for x in knot.split("-"))
mask = (timerange.day >= lb) & (timerange.day <= ub)
posterior = pm.sample_posterior_predictive(trace, var_names=['gpsatprior'], samples=1000)
saturation_means[knot] = np.mean(posterior["gpsatprior"][:, mask], axis=0)
y_s = []
coefficients = trace.posterior.gpcoefficientprior.mean(axis=0).mean(axis=0).mean(axis=0).to_numpy()
saturations = trace.posterior.gpsatprior.mean(axis=0).mean(axis=0).mean(axis=0).to_numpy()
y_s = np.power(coefficients * input.reshape(-1), saturations)
y = np.array(y_s)
plt.plot([idx for idx, _ in enumerate(input)], true_values, label='true values')
plt.plot([idx for idx, _ in enumerate(input)], y, label='predicted values', alpha=0.3)
plt.legend(loc='best')
plt.xticks(rotation=45)
plt.show()
return True
def main():
timerange = pd.date_range(start="2023-01-01", end="2023-03-24", freq='D')
coefficientknots = {'0-15': 2.0, '15-32': 3.0}
exponentknots = {'0-15': 0.9, '15-32': 0.3}
input = generate_input_data(timerange)
true_values = compute_true_values(timerange, coefficientknots, exponentknots)
run_model(timerange, input, true_values, coefficientknots, exponentknots)
if __name__ == '__main__':
main()