Hi there,
I would like to define the dimensionality of my data via coords/dims, independent if either the feature or target data is multivariate or univariate. As such, I would always like to define the data via dims = (N_samples, N_features)
If both feature and target are multivariate, that works fine, but if either one is univariate the dims argument seems to not work via (N_data, N_features)
, but instead just takes on a scalar argument N_data
.
Is there any way around that to always define dims = (N_samples, N_features)
.
This is a M(non)WE with the error code I get for the desired workflow:
###################################################
# Import some libraries
import numpy as np
import pandas as pd
import arviz as az
import pymc as pm
print(f"Running on PyMC v{pm.__version__}") #Running on PyMC v5.16.2
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
###################################################
# Sample some data, target (univariate) and feature (multivariate) as dataframe
dates = pd.date_range(start='2023-01-01', periods=100, freq='D')
data = np.random.randn(100, 5)
df = pd.DataFrame(data, index=dates, columns=['f1', 'f2', 'f3', 'f4', 'f5'])
coefs = np.array([1.5, -2.0, 3.0, -0.5, 2.0])
noise = np.random.randn(100) * 0.1
df['target'] = df.dot(coefs) + noise
df_features = df[['f1', 'f2', 'f3', 'f4', 'f5']]
df_target = df[['target']]
###################################################
# Assign Model
# Define PYMC Model
coords = {
"T_features": df_features.index, "N_features": df_features.columns,
"N_target": df_target.columns
}
with pm.Model(coords=coords) as LinearRegression:
feature_array = pm.Data("feature", df_features.values, mutable=True, dims = ("T_features", "N_features") )
target_array = pm.Data("target", df_target.values, mutable=True, dims = ("T_features", "N_target") )
σ = pm.HalfNormal(name="σ", sigma=1)
# Beta estimates
β0 = pm.Normal(name="β0", mu=0.0, sigma=1)
β = pm.Normal(name="β", mu=0.0, sigma=1, dims = "N_features")
# Data
mu = pm.Deterministic("mu", β0 + pm.math.dot(feature_array, β), dims = "T_features")
observed = pm.Normal(name="observed", mu=mu, sigma=σ, observed=target_array, dims = ("T_features", "N_target"))
with LinearRegression:
trace = pm.sample()
pm.sample_posterior_predictive(trace, extend_inferencedata=True)
# ValueError: Runtime broadcasting not allowed. One input had a distinct dimension length of 1, but was not marked as broadcastable: (input[0].shape[1] = 1, input[1].shape[1] = 100). If broadcasting was intended, use `specify_broadcastable` on the relevant input.