Using yt.csv (4.8 KB) dataset, I’m trying to fit a multiple linear regression model with the followin code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import arviz as az
import pymc3 as pm
df = pd.read_csv('C:\yt.csv')
MultiRegress=pm.Model()
with MultiRegress:
# Paranmeter
alpha = pm.Normal('alpha', mu=np.mean(df['sales']), sd=1)
beta1 = pm.Normal('beta1', mu=0, sd=1)
beta2 = pm.Normal('beta2', mu=0, sd=1)
beta3 = pm.Normal('beta3', mu=0, sd=1)
# variables
x_1 = pm.Data("x_1", df['youtube']-np.mean(df['youtube']))
x_2 = pm.Data("x_2", df['newspaper']-np.mean(df['newspaper']))
x_3 = pm.Data("x_3", df['facebook']-np.mean(df['facebook']))
# Linear regression
mu = alpha + beta1 * x_1 + beta2 * x_2 + beta3 * x_3
e = pm.HalfCauchy('e', 5)
# Data likelihood
Y_obs = pm.Normal('Y_obs', mu=mu, sd=e, observed=df['sales'])
trace = pm.sample(2000, tune=1000)
ppc = pm.sample_posterior_predictive(trace, samples=2000, model=MultiRegress)
data_ppc = az.from_pymc3(trace=trace, posterior_predictive=ppc)
ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
ax[0].legend(fontsize=15)
This is the result:
Could please suggest some corrections to the code in order to get a better fit arround the observed Y variable.Thanks & Regards.