Not able to obtain Predicted values in Bayesian neural network


#1

I am trying to construct Bayesian neural network to solve my problem, with reference to the PyMC3 tutorial case “Variational Inference: Bayesian Neural Networks”. I am using Continuous dependent variable but not able to obtain the predicted values in Bayesian neural network.

Code used for construction of neural network :

def construct_nn(ann_input, ann_output):
    n_hidden = 5

    # Initialize random weights between each layer
    init_1 = np.random.randn(X.shape[1], n_hidden).astype(floatX)
    init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
    init_out = np.random.randn(n_hidden).astype(floatX)

    with pm.Model() as neural_network:
        # Weights from input to hidden layer
        weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
                                 shape=(X.shape[1], n_hidden),
                                 testval=init_1)

        # Weights from 1st to 2nd layer
        weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
                                shape=(n_hidden, n_hidden),
                                testval=init_2)

        # Weights from hidden layer to output
        weights_2_out = pm.Normal('w_2_out', 0, sd=1,
                                  shape=(n_hidden,),
                                  testval=init_out)

        # Build neural-network using tanh activation function
        act_1 = pm.math.tanh(pm.math.dot(ann_input,
                                         weights_in_1))
        act_2 = pm.math.tanh(pm.math.dot(act_1,
                                         weights_1_2))
        act_out = pm.math.sigmoid(pm.math.dot(act_2,
                                              weights_2_out))
    
        out = pm.Normal('out',mu=act_out, observed=ann_output)
                          
    return neural_network

ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn(ann_input, ann_output)

from pymc3.theanof import set_tt_rng, MRG_RandomStreams
set_tt_rng(MRG_RandomStreams(42))

#Fitting the model
with neural_network:
    inference = pm.ADVI()
    approx = pm.fit(n=3000, method=inference)

trace = approx.sample(draws=5000) 

After this step, i want to obtain the predicted values for a continuous variable.


#2

Have a look at: https://docs.pymc.io/notebooks/posterior_predictive.html

You can use:

preds = pm.sample_ppc(trace, model=model, samples=100) 
y_preds = preds['y']

to produce sample predictions on the data you used to train the model.

If you need to make predictions using other data as input, you can swap the data into ann_input since you use a shared Theano layer, and then run the sample_ppc


#3

Thanks simon… I produced predicted values as you said. But the accuracy is coming out to be very poor.
This is my code:

def construct_nn(ann_input, ann_output):
    n_hidden = 5

    # Initialize random weights between each layer
    init_1 = np.random.randn(X.shape[1], n_hidden).astype(floatX)
    init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
    init_out = np.random.randn(n_hidden).astype(floatX)
     # Initialize random biases in each layer
    init_b_1 = np.random.randn(n_hidden).astype(floatX)
    init_b_2 = np.random.randn(n_hidden).astype(floatX)
    init_b_out = np.random.randn(1).astype(floatX)

    with pm.Model() as neural_network:
        # Weights from input to hidden layer
        weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
                                 shape=(X.shape[1], n_hidden),
                                 testval=init_1)
        bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1)
        # Weights from 1st to 2nd layer
        weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
                                shape=(n_hidden, n_hidden),
                                testval=init_2)
        bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2)
        # Weights from hidden layer to output
        weights_2_out = pm.Normal('w_2_out', 0, sd=1,
                                  shape=(n_hidden,),
                                  testval=init_out)
        bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out)

        # Build neural-network using tanh activation function
        act_1 = pm.math.tanh(pm.math.dot(ann_input, weights_in_1) + bias_1)
        act_2 = pm.math.tanh(pm.math.dot(act_1, weights_1_2) + bias_2)
        act_out = pm.math.dot(act_2, weights_2_out) + bias_out
        sd = pm.HalfNormal('sd', sd=1)
        out = pm.Normal('out',mu=act_out,sd=sd, observed=ann_output)
                          
    return neural_network

ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn(ann_input, ann_output)

from pymc3.theanof import set_tt_rng, MRG_RandomStreams
set_tt_rng(MRG_RandomStreams(42))


with neural_network:
    inference = pm.ADVI()
    approx = pm.fit(n=3000, method=inference)
    
    
trace = approx.sample(draws=5000)  
pm.summary(trace)
plt.plot(-inference.hist)
plt.ylabel('ELBO')
plt.xlabel('iteration');
plt.show()

# posterior predictive checks
predANN = Y_train
#with neural_network:
ppc = pm.sample_ppc(trace, model=neural_network, samples=5000)

pred_ppc=ppc['y']

plt.plot(Y_train, pred_ppc, 'o')
plt.plot(Y_train, predANN, 'o')
plt.ylabel('Predicted')
plt.xlabel('Actual')
plt.show()

#4

There are many things that could be going wrong here, but maybe as a starting point you could have a look at the ADVI troubleshooting/tuning tips found in the thread: