I tried to implement this using PyMC3 : https://discourse.edwardlib.org/t/iterative-estimators-bayes-filters-in-edward/104/4

This is my code:

```
seed = 7
np.random.seed(seed)
d = 5
m = 10
coeffs = np.random.uniform(-10, 10, d)
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def next_set(w, m=10, d=3):
x = np.random.rand(m, d)
y = np.dot(x, w)
y = y + np.random.normal(0, 0.1, m)
return x, y
test_x, test_y = next_set(coeffs, 50, d)
model = pm.Model()
shared_w_mu = shared(np.full(d, 0.0))
shared_w_sd = shared(np.full(d, 1.0))
shared_sigma_mu = shared(0.0)
shared_sigma_sd = shared(1.0)
x, y = next_set(coeffs, m, d)
shared_x = shared(x)
shared_y = shared(y)
with model:
w = pm.Normal('w', mu=shared_w_mu.get_value(), sd=shared_w_sd.get_value(), shape=d)
sigma = pm.Normal("s", shared_sigma_mu.get_value(), sd=shared_sigma_sd.get_value())
mu = tt.dot(shared_x, w)
pm.Normal("y", mu=mu, sd=sigma, observed=shared_y)
advi = pm.ADVI(total_size = 500)
for i in range(50):
with model:
apprx = advi.fit(50)
x, y = next_set(coeffs, m, d)
shared_x.set_value(x)
shared_y.set_value(y)
mu_dic = apprx.groups[0].bij.rmap(apprx.params[0].eval())
sd_dic = apprx.groups[0].bij.rmap(apprx.params[1].eval())
shared_w_mu.set_value(mu_dic['w'])
shared_w_sd.set_value(sd_dic['w'])
shared_sigma_mu.set_value(mu_dic['s'])
shared_sigma_sd.set_value(sd_dic['s'])
pred = np.dot(test_x, mu_dic['w']) # avoid ppc to improve the performance
print(mean_absolute_percentage_error(test_y, pred))
```

Here, I try to extend the ADVI for streaming ML using Bayesian Filtering, assuming that we know the total_size. However, the accuracy of estimated coefficients improves very slowly compared to the Edward implementation. What am I doing wrong?