I’ve been trying to follow the Minibatching tutorial here: Introduction to Variational Inference with PyMC — PyMC example gallery
But when I modify it to 1. Use data containers and 2. adapt to my data, I get infinite losses and incorrect predictions when I use Minibatching. I’m hoping someone can help me. I must be doing something wrong, because Minibatching speeds up the fitting process even when the minibatch is 1, and if I increase the batch size, it seems to have no effect on how long it fits.
def sigmoid(z):
return 1/(1 + np.exp(-z))
def generate_df(n_entities):
const = -1.5
combinations = np.random.randint(1, 20, size=n_entities)
entity_ids = np.repeat(np.arange(n_entities), combinations)
size = np.sum(combinations)
entity_const_coefs = {i: x for i, x in enumerate(np.random.normal(0, 1, size=n_entities))}
clicks = np.random.randint(1, 100, size=size)
cvrs = []
for ent in entity_ids:
ent_const_coef = entity_const_coefs.get(ent)
cvr = sigmoid(ent_const_coef+ const)
cvrs.append(cvr)
cvrs = np.array(cvrs)
df = pd.DataFrame({"cvrs": cvrs, "entity_id": entity_ids, "clicks": clicks})
df["transactions"] = df["cvrs"] * df["clicks"]
df["transactions"] = df["transactions"].apply(np.floor)
return df, entity_const_coefs
n_entities = 1000
df, entity_const_coefs = generate_df(n_entities)
np.random.seed(42)
coords = {
"entity_id": np.arange(n_entities)
}
## First Version Without Minibatching. Works great, but is slow
with pm.Model(coords = coords) as model:
clicks = pm.Data("clicks", df.clicks.values)
transactions = pm.Data("transactions", df.transactions.values)
entity_ids = pm.Data("entity_ids", df.entity_id.values)
mu = pm.Normal("mu", 0, 1, dims="entity_id")
const = pm.Normal("const", 0, 1)
cvr = pm.Deterministic("cvr", pm.math.invlogit(mu[entity_ids] + const))
likelihood = pm.Binomial("likelihood", p=cvr, n=clicks, observed=transactions, total_size=df.shape[0])
advi = pm.ADVI()
advifit = advi.fit(10000)
with model:
trace = advifit.sample(1000)
posterior_predictive = pm.sample_posterior_predictive(trace, var_names = ["cvr"])
estimated_cvrs = posterior_predictive.posterior_predictive.cvr.mean(dim=("chain", "draw")).values
estimated_transactions = estimated_cvrs * df.clicks
print("R^2", r2_score(df.transactions, estimated_transactions)) # Usually around 99%
## Second Version with Minibatching. Oddly, I've set the batch size to 1, and it still goes much faster. Changing the batch size doesn't seem to affect how fast it goes.
with pm.Model(coords = coords) as model:
clicks = pm.Data("clicks", df.clicks.values)
transactions = pm.Data("transactions", df.transactions.values)
entity_ids = pm.Data("entity_ids", df.entity_id.values)
mu = pm.Normal("mu", 0, 1, dims="entity_id")
const = pm.Normal("const", 0, 1)
cvr = pm.Deterministic("cvr", pm.math.invlogit(mu[entity_ids] + const))
likelihood = pm.Binomial("likelihood", p=cvr, n=clicks, observed=transactions, total_size=df.shape[0])
advi = pm.ADVI()
batch_size = 1
clicks_minibatch = pm.Minibatch(df.clicks.values.astype(np.int32), batch_size = batch_size)
transactions_minibatch = pm.Minibatch(df.transactions.values.astype(np.float64), batch_size = batch_size)
entity_ids_minibatch = pm.Minibatch(df.entity_id.values.astype(np.int32), batch_size = batch_size)
advifit = advi.fit(10000, more_replacements = {clicks: clicks_minibatch, transactions: transactions_minibatch, entity_ids: entity_ids_minibatch})
with model:
trace = advifit.sample(1000)
posterior_predictive = pm.sample_posterior_predictive(trace, var_names = ["cvr"])
estimated_cvrs = posterior_predictive.posterior_predictive.cvr.mean(dim=("chain", "draw")).values
estimated_transactions = estimated_cvrs * df.clicks
print("R^2", r2_score(df.transactions, estimated_transactions)) ## Normally and R2 around 30%