Hi Anurag,
In general, the key of turning for-loop into theano.scan
(or tf.while_loop
in TensorFlow) is to keep in mind of the functional form - you need to be able to express the body of the for loop into one single function. We start with looking at the numpy version:
eta0 = np.random.rand(100, 1000, 3)
def ftfunc(P_curr, xi, eta):
xi2 = eta * (np.dot(P_curr.T, xi[0:2]))
return xi2
p = np.zeros((100, 1000, 2, 3))
for i in range(100):
for j in range(1000):
p[i, j, 0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[i, j, 0, 1] = 0.05
p[i, j, 0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[i, j, 1, 0] = 0.01
p[i, j, 1, 1] = 0.94
p[i, j, 1, 2] = 0.05
ft2 = np.zeros((100, 1000))
xi2 = np.zeros((100, 1000, 3))
for i in range(100):
xi2[i, 0] = ftfunc(p[i, 0], np.asarray([1, 0, 0]), eta0[i, 0])
ft2[:, 0] = xi2[:, 0].sum(axis=1)
xi2[:, 0, 0] = xi2[:, 0, 0]/ft2[:, 0]
xi2[:, 0, 1] = xi2[:, 0, 1]/ft2[:, 0]
xi2[:, 0, 2] = xi2[:, 0, 2]/ft2[:, 0]
for i in range(100):
for j in range(1, 1000):
P_curr = p[i, j]
Eta = eta0[i, j]
Xi_ = xi2[i, j - 1]
xi2[i, j] = ftfunc(P_curr, Xi_, Eta)
ft2[:, j] = xi2[:, j].sum(axis=1)
xi2[:, j, 0] = xi2[:, j, 0]/ft2[:, j]
xi2[:, j, 1] = xi2[:, j, 1]/ft2[:, j]
xi2[:, j, 2] = xi2[:, j, 2]/ft2[:, j]
plt.plot(xi2[:, 99, :]);
Something immediately jump out to me is that:
- the generation of the time variate transition matrix
P
and initial latent state xi2
should both be “fold” into the nested for-loop. Likely we dont need to construct P
explicitly,
- ordering of the nested for-loop: since we can consider we have 100 copy of the time series, and time point is something that have to use theano.scan, best is to vectorized
ftfunc
, and only keep one loop that would be translate into scan:
ft2_new = np.zeros((1000, 100, 1))
xi2_new = np.zeros((1000, 100, 3))
eta0_ = np.transpose(eta0, [1, 0, -1])
def generate_p(j):
p = np.zeros((2, 3))
p[0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[0, 1] = 0.05
p[0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[1, 0] = 0.01
p[1, 1] = 0.94
p[1, 2] = 0.05
return p
def ftfunc_batch(P_curr, xi, eta):
# shape changes here is ((3, 2), (j, 1, 2)) --> (j, 3, 2) --> (j, 3)
xi2 = eta * np.sum(P_curr.T * xi[:, np.newaxis, :2], axis=-1)
return xi2
# run once so we have the inital state
p_curr = generate_p(0)
xi2_j = ftfunc_batch(p_curr, np.asarray([[1, 0, 0]]), eta0_[0])
ft2_new[0] = np.sum(xi2_j, axis=-1, keepdims=True)
xi2_new[0] = xi2_j / ft2_new[0]
for j in range(1, 1000):
last_states = generate_p(j), xi2_new[j-1], eta0_[j]
xi_new = ftfunc_batch(*last_states)
ft2_new[j] = np.sum(xi_new, axis=-1, keepdims=True)
xi2_new[j] = xi_new / ft2_new[j]
np.testing.assert_array_almost_equal(xi2_new[j], xi2[:, j])
plt.plot(xi2_new[99]);
Now, recall that the function signature of theano.scan is:
The general order of function parameters to fn
is:
sequences (if any), prior result(s) (if needed), non-sequences (if any)
Thus, we can further restructure the for-loop into:
ft2_new = np.zeros((1000, 100, 1))
xi2_new = np.zeros((1000, 100, 3))
eta0_ = np.transpose(eta0, [1, 0, -1])
def generate_p(j):
p = np.zeros((2, 3))
p[0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[0, 1] = 0.05
p[0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[1, 0] = 0.01
p[1, 1] = 0.94
p[1, 2] = 0.05
return p
def ftfunc_batch(eta, P_curr, xi, fi):
# eta, P_curr is sequence
# xi, fi is previous result
# shape changes here is ((3, 2), (j, 1, 2)) --> (j, 3, 2) --> (j, 3)
xi2 = eta * np.sum(P_curr.T * xi[:, np.newaxis, :2], axis=-1)
fi2 = np.sum(xi2, axis=-1, keepdims=True)
return xi2 / fi2, fi2
# run once so we have the inital state
p_curr = generate_p(0)
xi2_new[0], ft2_new[0] = ftfunc_batch(
eta0_[0], p_curr, np.asarray([[1, 0, 0]]), None)
for j in range(1, 1000):
sequence_input = eta0_[j], generate_p(j)
prior_results = xi2_new[j-1], ft2_new[j-1]
xi2_new[j], ft2_new[j] = ftfunc_batch(*sequence_input, *prior_results)
np.testing.assert_array_almost_equal(xi2_new[j], xi2[:, j])
Hope this gives you some inspiration to solve your problem.