Hi,
I’m quite new to pymc and want to add strict constraints to a linear regression model.
import pymc as pm
import pandas as pd
import numpy as np
import pytensor.tensor as at
if __name__ == "__main__":
df = pd.read_csv("BLR_lib_GU48-1234.csv", header=None, index_col=False)
X = df.iloc[:, :-1]
# Standardize the features
stds = X.std().values
X -= X.mean()
X /= X.std()
y_obs = df.iloc[:, -1]
with pm.Model(coords={"predictors": X.columns.values}) as model:
# Prior on error SD
sigma = pm.HalfNormal("sigma", sigma=0.1)
# coefficients
betas = pm.Normal("beta", sigma=1, dims="predictors")
expected_value = at.dot(X.values, betas)
# constraints
con_1 = pm.math.ge(betas/stds[0] + betas/stds[1], 0.0)
con_2 = pm.math.ge(betas[2]/stds[2], 0.0)
con_3 = pm.math.le(betas[4]/stds[4], 0.0)
con_4 = pm.math.ge(betas[2]/stds[2] + betas[3]/stds[3] + betas[4]/stds[4], 0.0)
con_5 = pm.math.ge(betas[5]/stds[5], 0.0)
con_6 = pm.math.le(betas[8]/stds[8], 0.0)
con_7 = pm.math.ge(betas[5]/stds[5] + betas[6]/stds[6] + betas[7]/stds[7] + betas[8]/stds[8], 0.0)
potential_1 = pm.Potential("con_1", pm.math.log(pm.math.switch(con_1, 1, 0.0)))
potential_2 = pm.Potential("con_2", pm.math.log(pm.math.switch(con_2, 1, 0.0)))
potential_3 = pm.Potential("con_3", pm.math.log(pm.math.switch(con_3, 1, 0.0)))
potential_4 = pm.Potential("con_4", pm.math.log(pm.math.switch(con_4, 1, 0.0)))
potential_5 = pm.Potential("con_5", pm.math.log(pm.math.switch(con_5, 1, 0.0)))
potential_6 = pm.Potential("con_6", pm.math.log(pm.math.switch(con_6, 1, 0.0)))
potential_7 = pm.Potential("con_7", pm.math.log(pm.math.switch(con_7, 1, 0.0)))
y = pm.Normal('y', mu=expected_value, sigma=sigma, observed=y_obs.values)
start = {"sigma": 1.0, "beta": np.zeros(len(X.columns))}
idata = pm.sample(draws=2000, tune=1000, chains=4, step=[pm.NUTS(target_accept=0.99, max_treedepth=30)])
The regression is performed with standardized input data. The restrictions are specified for the unstandardized coefficients, which is why they need to be transformed. During execution, when strict constraints are applied (using pm.math.log(pm.math.switch(constraint, 1, 0.0))
), I receive the error message:
pymc.exceptions.SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'sigma_log__': array(-2.67328751), 'beta': array([ 0.95591867, 0.33871937, 0.00713485, 0.73745527, 0.74412467,
-0.82814385, 0.69393661, 0.62493708, 0.95804281, -0.80145671,
-0.51079383, 0.11153434, -0.66721516])}
This error disappears when I use a low number for the false state in pm.math.switch
. Does anyone know why this is happening and whether the constraints have been integrated into the model?