Help Converting Keras Model to PYMC3 syntax

Hello.

I would like to learn how to better translate a keras deep learning model to a bayesian neural net. I just got done with Eric J. Ma’s 2017 pydata talk on “Demystifying Bayesian Deep Learning” which helped my conceptual knowledge immensely.

Thanks for all the help thus far pymc3 community!

I was wondering if anyone can help me translate the below keras deep learning regression model to pymc3 syntax? Honestly, if anyone can translate the first three layers and the compiler/optimizer, then I can apply to the rest of the model.

model = Sequential()
model.add(Dense(256, input_dim=12, kernel_initializer=‘normal’, activation=‘relu’
, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dense(128, activation = ‘relu’, kernel_regularizer = regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dropout(.5))
model.add(Dense(128, activation = ‘relu’, kernel_regularizer = regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dense(64, activation=‘relu’, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dense(32, activation=‘relu’,kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dense(16, activation = ‘relu’, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(.5))
model.add(Dense(8, activation = ‘relu’, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, activation=‘linear’))
model.summary()
model.compile(loss=‘mse’, optimizer=‘adam’, metrics=[‘mse’,‘mae’])
history = model.fit(X_train, y_train, epochs=25, verbose=1, validation_split=0.2)

The core of any neural network is just chaining the weights. The L2 regularizer you have is spiritually similar to a normal prior, so that would make a weight layer look like

weight_sd = 0.1 # could use a HalfNormal hyperprior if you really wanted
W1 = pm.Normal('weight_1', 0, weight_sd, shape=(output_dim, input_dim))

Dropout are a set of Bernoulli variables

drop_rate = 0.5
D1 = pm.Bernoulli('dropout_1', drop_rate, shape=(output_dim, input_dim))
WD1 = pm.Deterministic('dropout_weights_1', W1 * D1)

With the weights constructed it’s just multiplication and ReLU

L2 = pm.Deterministic('Layer2', tt.nnet.relu(tt.dot(WD1, L1)))

Bias could be added, if you want

B2 = pm.Normal('bias2', 0., 1.)
L2 = pm.Deterministic('Layer2', tt.nnet.relu(tt.dot(WD1, L1)) + B2)

More generally, see https://twiecki.io/blog/2018/08/13/hierarchical_bayesian_neural_network/

1 Like

I would remove the dropout part, as:

  • dropout is more a model training technique
  • discrete node are hard to train

If you really want something identical, you can try using the theano raw random stream and index to it:
http://deeplearning.net/software/theano/library/tensor/raw_random.html

1 Like

Thank you @chartl. I thought I had it but I keep getting the same error. The code and error log are below. Does anything immediately jump out to you?

code:

ann_input = tt.shared(np.asarray(X_train))
y_train_t = y_train.transpose()
ann_output = tt.shared(np.asarray(y_train_t))

n_hidden = 15

Initialize random weights between each layer

init_1 = np.random.randn(X_train.shape[1], n_hidden)
init_2 = np.random.randn(n_hidden, n_hidden)
init_3 = np.random.randn(n_hidden, n_hidden)
init_4 = np.random.randn(n_hidden, n_hidden)
init_5 = np.random.randn(n_hidden, n_hidden)
init_6 = np.random.randn(n_hidden, n_hidden)
init_7 = np.random.randn(n_hidden, n_hidden)
init_8 = np.random.randn(n_hidden, n_hidden)
init_9 = np.random.randn(n_hidden, n_hidden)
init_10 = np.random.randn(n_hidden, n_hidden)
init_11 = np.random.randn(n_hidden, n_hidden)
init_12 = np.random.randn(n_hidden, n_hidden)
init_13 = np.random.randn(n_hidden, n_hidden)
init_14 = np.random.randn(n_hidden, n_hidden)
init_15 = np.random.randn(n_hidden, n_hidden)
init_out = np.random.randn(n_hidden)

with pm.Model() as neual_network:
# Weights from input to hidden layer
weights_in_1 = pm.Normal(‘w_in_1’, 0, sd=1,
shape=(X_train.shape[1], n_hidden),
testval=init_1)

weights_1_2 = pm.Normal('w_1_2', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_2)
weights_2_3 = pm.Normal('_2_3', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_3)
weights_3_4 = pm.Normal('w_3_4', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_4)
weights_4_5 = pm.Normal('w_4_5', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_5)
weights_5_6 = pm.Normal('w_5_6', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_6)
weights_6_7 = pm.Normal('w_6_7', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_7)
weights_7_8 = pm.Normal('w_7_8', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_8)
weights_8_9 = pm.Normal('w_8_9', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_9)

weights_9_10 = pm.Normal('w_9_10', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_10)

weights_10_11 = pm.Normal('w_10_11', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_11)

weights_11_12 = pm.Normal('w_11_12', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_12)

weights_12_13 = pm.Normal('w_12_13', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_13)

weights_13_14 = pm.Normal('w_13_14', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_14)

weights_14_15 = pm.Normal('w_14_15', 0, sd=1, 
                        shape=(n_hidden, n_hidden), 
                        testval=init_15)

# Weights from hidden layer to output
weights_15_out = pm.Normal('w_15_out', 0, sd=1, 
                          shape=(n_hidden,), 
                          testval=init_out)



# Build neural-network using tanh activation function

B2 = pm.Normal('bias2', 0., 1.)
L1 = pm.Deterministic('Layer1', T.dot(ann_input, weights_in_1) + B2)
L2 = pm.Deterministic('Layer2', T.nnet.relu(T.dot(L1, weights_1_2)+ B2))
L3 = pm.Deterministic('Layer3', T.nnet.relu(T.dot(L2, weights_2_3)+ B2))
L4 = pm.Deterministic('Layer4', T.nnet.relu(T.dot(L3, weights_3_4)+ B2))
L5 = pm.Deterministic('Layer5', T.nnet.relu(T.dot(L4, weights_4_5)+ B2))
L6 = pm.Deterministic('Layer6', T.nnet.relu(T.dot(L5, weights_5_6)+ B2))
L7 = pm.Deterministic('Layer7', T.nnet.relu(T.dot(L6, weights_6_7)+ B2))
L8 = pm.Deterministic('Layer8', T.nnet.relu(T.dot(L7, weights_7_8)+ B2))
L9 = pm.Deterministic('Layer9', T.nnet.relu(T.dot(L8, weights_8_9)+ B2))
L10 = pm.Deterministic('Laye10', T.nnet.relu(T.dot(L9, weights_10_11)+ B2))
L11 = pm.Deterministic('Layer11', T.nnet.relu(T.dot(L10, weights_11_12)+ B2))
L12 = pm.Deterministic('Layer12', T.nnet.relu(T.dot(L11, weights_12_13)+ B2))
L13 = pm.Deterministic('Layer13', T.nnet.relu(T.dot(L12, weights_13_14)+ B2))
L14 = pm.Deterministic('Layer14', T.nnet.relu(T.dot(L13, weights_14_15)+ B2))  
L15 = pm.Deterministic('Layer15', T.nnet.relu(T.dot(L14, weights_15_out)+ B2))
act_out = T.dot(L15, weights_15_out)


out = pm.Normal('out', mu = act_out, observed=ann_output, shape = y_train.shape)

inference = pm.ADVI()
approx = pm.fit(n=50000, method=inference)
trace = pm.sample(draws=5000, progress_bar = True)

error log:


ValueError Traceback (most recent call last)
in
119 L14 = pm.Deterministic(‘Layer14’, T.nnet.relu(T.dot(L13, weights_14_15)+ B2))
120 L15 = pm.Deterministic(‘Layer15’, T.nnet.relu(T.dot(L14, weights_15_out)+ B2))
→ 121 act_out = T.dot(L15, weights_15_out)
122
123

~/anaconda3/lib/python3.7/site-packages/theano/tensor/basic.py in dot(a, b)
6103 return tensordot(a, b, [[a.ndim - 1], [np.maximum(0, b.ndim - 2)]])
6104 else:
→ 6105 return _dot(a, b)
6106
6107

~/anaconda3/lib/python3.7/site-packages/theano/gof/op.py in call(self, *inputs, **kwargs)
672 thunk.outputs = [storage_map[v] for v in node.outputs]
673
→ 674 required = thunk()
675 assert not required # We provided all inputs
676

~/anaconda3/lib/python3.7/site-packages/theano/gof/op.py in rval(p, i, o, n)
890 # default arguments are stored in the closure of rval
891 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
→ 892 r = p(n, [x[0] for x in i], o)
893 for o in node.outputs:
894 compute_map[o][0] = True

~/anaconda3/lib/python3.7/site-packages/theano/tensor/basic.py in perform(self, node, inp, out)
5968 # gives a numpy float object but we need to return a 0d
5969 # ndarray
→ 5970 z[0] = np.asarray(np.dot(x, y))
5971
5972 def grad(self, inp, grads):

ValueError: shapes (151437,) and (15,) not aligned: 151437 (dim 0) != 15 (dim 0)

Nevermind @chartl. I had one more dimension.