Hi Anurag,
In general, the key of turning for-loop into theano.scan (or tf.while_loop in TensorFlow) is to keep in mind of the functional form - you need to be able to express the body of the for loop into one single function. We start with looking at the numpy version:
eta0 = np.random.rand(100, 1000, 3)
def ftfunc(P_curr, xi, eta):
xi2 = eta * (np.dot(P_curr.T, xi[0:2]))
return xi2
p = np.zeros((100, 1000, 2, 3))
for i in range(100):
for j in range(1000):
p[i, j, 0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[i, j, 0, 1] = 0.05
p[i, j, 0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[i, j, 1, 0] = 0.01
p[i, j, 1, 1] = 0.94
p[i, j, 1, 2] = 0.05
ft2 = np.zeros((100, 1000))
xi2 = np.zeros((100, 1000, 3))
for i in range(100):
xi2[i, 0] = ftfunc(p[i, 0], np.asarray([1, 0, 0]), eta0[i, 0])
ft2[:, 0] = xi2[:, 0].sum(axis=1)
xi2[:, 0, 0] = xi2[:, 0, 0]/ft2[:, 0]
xi2[:, 0, 1] = xi2[:, 0, 1]/ft2[:, 0]
xi2[:, 0, 2] = xi2[:, 0, 2]/ft2[:, 0]
for i in range(100):
for j in range(1, 1000):
P_curr = p[i, j]
Eta = eta0[i, j]
Xi_ = xi2[i, j - 1]
xi2[i, j] = ftfunc(P_curr, Xi_, Eta)
ft2[:, j] = xi2[:, j].sum(axis=1)
xi2[:, j, 0] = xi2[:, j, 0]/ft2[:, j]
xi2[:, j, 1] = xi2[:, j, 1]/ft2[:, j]
xi2[:, j, 2] = xi2[:, j, 2]/ft2[:, j]
plt.plot(xi2[:, 99, :]);
Something immediately jump out to me is that:
- the generation of the time variate transition matrix
P and initial latent state xi2 should both be “fold” into the nested for-loop. Likely we dont need to construct P explicitly,
- ordering of the nested for-loop: since we can consider we have 100 copy of the time series, and time point is something that have to use theano.scan, best is to vectorized
ftfunc, and only keep one loop that would be translate into scan:
ft2_new = np.zeros((1000, 100, 1))
xi2_new = np.zeros((1000, 100, 3))
eta0_ = np.transpose(eta0, [1, 0, -1])
def generate_p(j):
p = np.zeros((2, 3))
p[0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[0, 1] = 0.05
p[0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[1, 0] = 0.01
p[1, 1] = 0.94
p[1, 2] = 0.05
return p
def ftfunc_batch(P_curr, xi, eta):
# shape changes here is ((3, 2), (j, 1, 2)) --> (j, 3, 2) --> (j, 3)
xi2 = eta * np.sum(P_curr.T * xi[:, np.newaxis, :2], axis=-1)
return xi2
# run once so we have the inital state
p_curr = generate_p(0)
xi2_j = ftfunc_batch(p_curr, np.asarray([[1, 0, 0]]), eta0_[0])
ft2_new[0] = np.sum(xi2_j, axis=-1, keepdims=True)
xi2_new[0] = xi2_j / ft2_new[0]
for j in range(1, 1000):
last_states = generate_p(j), xi2_new[j-1], eta0_[j]
xi_new = ftfunc_batch(*last_states)
ft2_new[j] = np.sum(xi_new, axis=-1, keepdims=True)
xi2_new[j] = xi_new / ft2_new[j]
np.testing.assert_array_almost_equal(xi2_new[j], xi2[:, j])
plt.plot(xi2_new[99]);
Now, recall that the function signature of theano.scan is:
The general order of function parameters to fn is:
sequences (if any), prior result(s) (if needed), non-sequences (if any)
Thus, we can further restructure the for-loop into:
ft2_new = np.zeros((1000, 100, 1))
xi2_new = np.zeros((1000, 100, 3))
eta0_ = np.transpose(eta0, [1, 0, -1])
def generate_p(j):
p = np.zeros((2, 3))
p[0, 0] = 0.95 - np.exp(-6) * np.exp(0.05*j/20)
p[0, 1] = 0.05
p[0, 2] = np.exp(-6) * np.exp(0.05*j/20)
p[1, 0] = 0.01
p[1, 1] = 0.94
p[1, 2] = 0.05
return p
def ftfunc_batch(eta, P_curr, xi, fi):
# eta, P_curr is sequence
# xi, fi is previous result
# shape changes here is ((3, 2), (j, 1, 2)) --> (j, 3, 2) --> (j, 3)
xi2 = eta * np.sum(P_curr.T * xi[:, np.newaxis, :2], axis=-1)
fi2 = np.sum(xi2, axis=-1, keepdims=True)
return xi2 / fi2, fi2
# run once so we have the inital state
p_curr = generate_p(0)
xi2_new[0], ft2_new[0] = ftfunc_batch(
eta0_[0], p_curr, np.asarray([[1, 0, 0]]), None)
for j in range(1, 1000):
sequence_input = eta0_[j], generate_p(j)
prior_results = xi2_new[j-1], ft2_new[j-1]
xi2_new[j], ft2_new[j] = ftfunc_batch(*sequence_input, *prior_results)
np.testing.assert_array_almost_equal(xi2_new[j], xi2[:, j])
Hope this gives you some inspiration to solve your problem.