I’m trying to understand what’s normal. This model hovers around 2 seconds per draw and which results in expected 1-3 hours ETA on the progress bar.
How long does sampling usually take for simple models on datasets of this size (1 million observations)?
import pymc3
import numpy as np
import pandas as pd
def standardize(x):
return (x - x.mean()) / x.std()
if __name__ == "__main__":
diamonds = pd.read_csv(
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv"
)
diamonds = diamonds.sample(n=1_000_000, replace=True)
diamonds = diamonds.assign(price_std=lambda x: standardize(x["price"]))
print(diamonds)
model = pymc3.glm.GLM.from_formula(
"price_std ~ C(cut) + C(color) + C(clarity) + carat + depth + table + x + z",
priors={"Intercept": pymc3.Normal.dist(), "Regressor": pymc3.Normal.dist(),},
data=diamonds,
)
fit = pymc3.sample(init="adapt_diag", model=model,)