How can I use "if __name__ == '__main__':" in my code below?

Hi all,

After I run my code I get a warning as:

"RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.

    This probably means that you are not using fork to start your
    child processes and you have forgotten to use the proper idiom
    in the main module:

        if __name__ == '__main__':
            freeze_support()
            ...

    The "freeze_support()" line can be omitted if the program
    is not going to be frozen to produce an executable."

I wanted to know how and where I should add “if name == ‘main’:” in my code below:

from scipy import stats, optimize
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

#from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from theano import shared

np.random.seed(9)

#Load the Data
dataset = pd.read_csv(‘PV-PCM.csv’)
X=dataset.iloc[:,[0,1,2,3,4]].values
y=dataset.iloc[:,5].values

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size = 0.2, random_state=42)

#Shapes
X.shape, y.shape, X_tr.shape, X_te.shape

#Preprocess data for Modeling
shA_X = shared(X_tr)

#Generate Model
linear_model = pm.Model()

with linear_model:
# Priors for unknown model parameters

alpha = pm.Normal("alpha", mu=y_tr.mean(),sd=10)
betas = pm.Normal("betas", mu=0, sd=1000, shape=X.shape[1])
sigma = pm.HalfNormal("sigma", sd=100) # you could also try with a HalfCauchy that has longer/fatter tails
mu = alpha + pm.math.dot(betas, X_tr.T)
likelihood = pm.Normal("likelihood", mu=mu, sd=sigma, observed=y_tr)
step = pm.NUTS()
trace = pm.sample(1000, step)

chain = trace[100:]
#pm.traceplot(chain);

#Traceplot
pm.traceplot(trace)

ppc = pm.sample_prior_predictive(samples=1000, random_seed=9)

pm.plot_posterior(trace, figsize = (12, 10))

sns.kdeplot(y_tr, alpha=0.5, lw=4, c=‘b’)
for i in range(100):
sns.kdeplot(ppc[‘likelihood’][i], alpha=0.1, c=‘g’)

alpha_pred = chain[‘alpha’].mean()
betas_pred = chain[‘betas’].mean(axis=0)

y_pred = alpha_pred + np.dot(betas_pred, X_tr.T)

Thank you all.

There are more clever ways of doing it, but if you just want to get it working, you can try wrapping your entire script as a giant function:


from scipy import stats, optimize
# other imports

def giant_function():
    #Load the Data

    # model data and sample

    # plot results

if __name__ == '__main__':
    giant_function()
1 Like

Thank you.
This problem resolved but I ran into another problem. There is an error/warning as follow:

Sampling 4 chains for 2_500 tune and 1_000 draw iterations (10_000 + 4_000 draws total) took 75 seconds.
There were 103 divergences after tuning. Increase target_accept or reparameterize.
There were 94 divergences after tuning. Increase target_accept or reparameterize.
There were 2 divergences after tuning. Increase target_accept or reparameterize.
The number of effective samples is smaller than 25% for some parameters.

It produces the plots but it does not produce any variable (Nothing!!). I don’t know what is going on here! For example, I need my “y_pred” in the code below. My CSV file includes 5 IVs and 1 DV with 203 rows of data. I just need to get y_pred with my csv file with Bayesian linear regression and I stuck for days. Can you please tell me what should I do?

from scipy import stats, optimize
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn import metrics

#from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from theano import shared

np.random.seed(9)

def giant_function():
#Load the Data
dataset=pd.read_csv(‘PV-PCM.csv’)
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,5].values

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size = 0.2, random_state=42)

#Shapes
X.shape, y.shape, X_tr.shape, X_te.shape


#Preprocess data for Modeling
shA_X = shared(X_tr)


#Generate Model
linear_model = pm.Model()


with linear_model: 
    alpha = pm.Normal("alpha", mu=y_tr.mean(),sd=10)
    betas = pm.Normal("betas", mu=0, sd=1000, shape=X.shape[1])
    sigma = pm.HalfNormal("sigma", sd=100) # you could also try with a HalfCauchy that has longer/fatter tails
    mu = alpha + pm.math.dot(betas, X_tr.T)
    likelihood = pm.Normal("likelihood", mu=mu, sd=sigma, observed=y_tr)
    step = pm.NUTS()
    trace = pm.sample(1000,step,tune=2500)
    
    chain = trace[100:]
    #pm.traceplot(chain);

    #Traceplot
    pm.traceplot(trace)
    
    ppc = pm.sample_prior_predictive(samples=1000,random_seed=9)
    
    pm.plot_posterior(trace, figsize = (12, 10))
    

sns.kdeplot(y_tr, alpha=0.5, lw=4, c='b')
for i in range(100):
    sns.kdeplot(ppc['likelihood'][i], alpha=0.1, c='g')


alpha_pred = chain['alpha'].mean()
betas_pred = chain['betas'].mean(axis=0)

y_pred = alpha_pred + np.dot(betas_pred, X_tr.T)

if name == ‘main’:
giant_function()

Thank you again for taking the time.

Is this what you are looking for?

a = np.repeat(trace['alpha'].reshape((-1,1)),len(y_tr), axis=1)
y_pred = a + np.dot(trace['betas'], X_tr.T)

That will generate a matrix of samples credible values of mu, one for each row in X_tr, Note that ignoring sigma will artificially create more certainty in these predictions that your model suggests.