Hello, I am trying to write an implementation for this model for the camera dataset bayesm Overview, my model already runs for 2 clients and for 332 clients it takes to long to start the sampling. Can you give me a hint, so that I can write the model in a more efficient way?
This is my code:
import json
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm # Import tqdm
import networkx as nx
figsize = (16, 10)
plt.style.use('dark_background')
palette = "Spectral"
sns.set_palette(palette)
# Load the JSON file
with open("data/camera.json", "r") as f:
data = json.load(f)
columns = [
"canon"
,"sony"
,"nikon"
,"panasonic"
,"pixels"
,"zoom"
,"video"
,"swivel"
,"wifi"
,"price"
]
print(len(data))
data = data[:2]
# Convert data to NumPy array
a = np.array([[data[k]["X"][l*5:(l+1)*5] for l in range(len(data[k]["y"]))] for k in range(len(data))])
# Define Bayesian Model in PyMC
n_features = len(columns)
nvar = n_features # Number of random coefficients
nu = nvar + 3 # Degrees of freedom for the IW prior
# Get observed output vectors
y_obs = np.array([np.array(data[k]["y"])-1 for k in range(len(data))])
with pm.Model() as model:
a_mu = 0.01 # Small precision factor
st_dist = pm.MvNormal.dist(mu=np.ones(n_features) * 1/a_mu, cov=np.eye(n_features), shape=n_features)
chol, corr, stds = pm.LKJCholeskyCov(
"chol", n=n_features, eta=4.0,
sd_dist=st_dist,
)
# Recover the full covariance matrix
v_beta = pm.Deterministic("v_beta", chol @ chol.T)
# Define the mean of the random coefficients
mu_bar = np.zeros(nvar) # Prior mean
mu_beta = pm.MvNormal("mu_beta", mu=mu_bar, chol=chol)
beta = pm.MvNormal("beta", mu=mu_beta, cov=v_beta, shape=(len(data), len(columns)))
# Track progress using tqdm
y = []
p_ = []
for k in tqdm(range(len(data)), desc="Processing Customers", colour="yellow"):
p = []
for l in tqdm(range(16), desc=f"Processing questions for customer {k}", colour="green"):
utilities = pm.math.dot(a[k, l], beta[k]) # Compute utilities
# p.append(pm.math.exp(utilities) / pm.math.sum(pm.math.exp(utilities), axis=0)) # Convert to probabilities
p.append(pm.math.softmax(utilities))
# Observed choices
p_.append(p)
y.append(pm.Categorical(f"y_{k}", p=p, observed=y_obs[k]))
p_per_user = pm.Deterministic("p", pm.math.stack(p_, axis=0))
y_per_user = pm.Deterministic("y", pm.math.stack(y, axis=0))
model.debug()
# Sample from the posterior with a progress bar
#idata = pm.sample(1000, tune=1000, target_accept=0.95, progressbar=True)
idata = pm.sample(2000, tune=1000, target_accept=0.95, progressbar=True,nuts_sampler="numpyro", chain_method = "vectorized")
# Convert model to networkx graph
graph = pm.model_to_networkx(model)
# Create plot
plt.figure(figsize=(10,8))
nx.draw(graph, with_labels=True, node_color='lightblue',
node_size=2000, arrowsize=20)
plt.savefig("app/data/graph.png")
plt.close()
# Display results
print(az.summary(idata))
az.plot_trace(idata, figsize=(10, 7), var_names=["mu_beta", "beta", "y"], combined=True)
plt.savefig("app/data/plot.png")
plt.close()