Hey, can someone help me please. I am trying to use old posterior distributions as my new prior, by using the interpolated function. My Problem ist, that one feature is categorial with 50 different groups, an I can not determine dimension of the distribution, as I do it normally. Whats is my mistake? trace is my old inference object from the old model.
I thank u already so much for your help.
import numpy as np
from scipy.stats import gaussian_kde
import pymc as pm
import pandas as pd
def compute_density(trace, var_name, num_points):
samples = trace.posterior[var_name].values.flatten()
kde = gaussian_kde(samples)
x_vals = np.linspace(samples.min(), samples.max(), num_points)
y_vals = kde(x_vals)
return x_vals, y_vals
def compute_density_for_all_groups(trace, var_name, num_points):
# Extrahiere die Samples der angegebenen Variablen
samples = trace.posterior[var_name].values
num_groups = samples.shape[2]
all_x_vals = []
all_y_vals = []
for grp in range(num_groups):
grp_samples = samples[:, :, grp].flatten()
kde = gaussian_kde(grp_samples)
x_vals = np.linspace(grp_samples.min(), grp_samples.max(), num_points)
y_vals = kde(x_vals)
all_x_vals.append(x_vals)
all_y_vals.append(y_vals)
return np.array(all_x_vals), np.array(all_y_vals)
x_vals_cont, y_vals_cont = compute_density(trace, 'continuous_var', 100)
x_vals_pi, y_vals_pi = compute_density(trace, 'pi_var', 100)
x_vals_alpha, y_vals_alpha = compute_density(trace, 'alpha_var', 100)
x_vals_cat, y_vals_cat = compute_density_for_all_groups(trace, 'categorical_var', 100)
# Datenvorbereitung
counts_data = data[data['YEAR'] == X]['COUNT'].values
log_data = data[data['YEAR'] == X]['LOG_Data'].values
cat_idx, groups = pd.factorize(data['CATEGORY'], sort=True)
coords = {"groups": groups, "log_data": log_data}
# Bayesian Modell
with pm.Model(coords=coords) as bayesian_model:
category_data = pm.Data("category_data", cat_idx, mutable=True)
log_input = pm.Data('log_input', log_data, mutable=True)
beta_cont = pm.Interpolated('beta_cont', x_points=x_vals_cont, pdf_points=y_vals_cont)
pi_var = pm.Interpolated('pi_var', x_points=x_vals_pi, pdf_points=y_vals_pi)
beta_cat = pm.Interpolated('beta_cat', x_points=x_vals_cat, pdf_points=y_vals_cat, dims="groups")
alpha_var = pm.Interpolated('alpha_var', x_points=x_vals_alpha, pdf_points=y_vals_alpha)
lambda_freq = pm.math.exp(beta_cat[category_data] + beta_cont * log_input)
y_observed = pm.ZeroInflatedNegativeBinomial('y_observed', psi=pi_var, mu=lambda_freq, alpha=alpha_var, observed=counts_data, shape=lambda_freq.shape)
trace_result = pm.sample(1000, chains=4, tune=1000, return_inferencedata=True)
pm.sample_posterior_predictive(trace_result, extend_inferencedata=True)
My Error:
{
ValueError Traceback (most recent call last)
Cell In[57], line 7
5 continuous_var = pm.Interpolated('continuous_var', x_points=x_vals_cont, pdf_points=y_vals_cont)
6 pi_var = pm.Interpolated('pi_var', x_points=x_vals_pi, pdf_points=y_vals_pi)
----> 7 categorical_var = pm.Interpolated('categorical_var', x_points=x_vals_cat, pdf_points=y_vals_cat, dims=\"groups\")
8 alpha_var = pm.Interpolated('alpha_var', x_points=x_vals_alpha, pdf_points=y_vals_alpha)
10 lambda_freq = pm.math.exp(categorical_var[category_data] + continuous_var * log_input)
File c:\\ProgramData\\miniconda3\\envs\\test\\lib\\site-packages\\pymc\\distributions\\distribution.py:308, in Distribution.__new__(cls, name, rng, dims, initval, observed, total_size, transform, *args, **kwargs)
305 elif observed is not None:
306 kwargs[\"shape\"] = tuple(observed.shape)
--> 308 rv_out = cls.dist(*args, **kwargs)
310 rv_out = model.register_rv(
311 rv_out,
312 name,
(...)
317 initval=initval,
318 )
320 # add in pretty-printing support
File c:\\ProgramData\\miniconda3\\envs\\test\\lib\\site-packages\\pymc\\distributions\\continuous.py:3622, in Interpolated.dist(cls, x_points, pdf_points, *args, **kwargs)
3620 @classmethod
3621 def dist(cls, x_points, pdf_points, *args, **kwargs):
-> 3622 interp = InterpolatedUnivariateSpline(x_points, pdf_points, k=1, ext=\"zeros\")
3624 Z = interp.integral(x_points[0], x_points[-1])
3625 cdf_points = interp.antiderivative()(x_points) / Z
File c:\\ProgramData\\miniconda3\\envs\\test\\lib\\site-packages\\scipy\\interpolate\\fitpack2.py:667, in InterpolatedUnivariateSpline.__init__(self, x, y, w, bbox, k, ext, check_finite)
664 raise ValueError('x must be strictly increasing')
666 # _data == x,y,w,xb,xe,k,s,n,t,c,fp,fpint,nrdata,ier
--> 667 self._data = dfitpack.fpcurf0(x, y, k, w=w, xb=bbox[0],
668 xe=bbox[1], s=0)
669 self._reset_class()
ValueError: too many axes: 2 (effrank=2), expected rank=1
"
}