Managed to get it going by passing data in pm.Data but still stumped on how to get more than one set of data into model. Right now the inputs are one column of df_np (which contains the droplet size distribution) and one number kill (which is number of bacteria killed). I would like to pass i columns of df_np for drug(i) and i numbers of kills too. How do I do that? Thanks!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from theano import shared
import scipy.stats as stats
from scipy.stats import gamma, norm
import pymc3 as pm
import theano.tensor as tt
import arviz as az
Make up fake data
mu = 3
sd = .5
D = norm(dia_mu, dia_sd)
d = D.rvs(100)
df_np = pd.DataFrame({‘Dia’:d})
n = 2.7 # what I am trying to solve for
active = df_np[‘Dia’] > mu + n * sd
kill = np.sum(df_np[‘Dia’][active])
with pm.Model() as model_b:
#Priors
tau = pm.Normal(‘tau’, mu = 2, sd=2)
ϵ = pm.HalfCauchy(‘ϵ’, 5)
#Observed
df_tt = pm.Data('df_tt', df_np['Dia'])
act = df_tt > mu + tau * sd
# Likelihood
μ = pm.Deterministic('μ', pm.math.sum(df_tt[act]))
kill_pred = pm.Normal('kill_pred', mu=μ, sd=ϵ, observed=kill )
trace_b = pm.sample()
pm.traceplot(trace_b);