I keep getting an error on a small test case. See attached file for dataset df3.csv (301 Bytes). What’s going on?
Relevant packages:
python 3.8.6 h60c2a47_0_cpython conda-forge
pymc3 3.9.3 py_1 conda-forge
theano 1.0.5 py38h7ae7562_0 conda-forge
Source code:
import numpy as np
import pandas as pd
import pymc3 as pm
import theano
import theano.tensor as tt
def fit(df: pd.DataFrame, time_column: str = "time", event_column: str = "event", vague_prior_sd: float = 20.0):
# Extract censoring and predictor data
y = df[time_column].values
X_censor = df[event_column].values
X_ones = np.ones_like(X_censor).reshape((len(X_censor), 1))
X_predictors = df[df.columns[~df.columns.isin((time_column, event_column))]]
if X_predictors.empty:
X_predictors = X_ones
else:
X_predictors = np.concatenate((X_ones, X_predictors.values), axis=1)
X_predictors_ = theano.shared(X_predictors)
cens = X_censor == 0
cens_ = theano.shared(cens)
with pm.Model() as bayesian_model:
# Defining Log complementary cdf of Weibull distribution
def weibull_lccdf(x, beta, eta):
return -(x / eta) ** beta
# Priors for unknown model parameters
beta = pm.Gamma("beta", mu=3, sigma=vague_prior_sd) # alpha=0.001, beta=0.001)
eta = pm.Normal("eta", 0, vague_prior_sd, shape=X_predictors.shape[1])
reg = X_predictors_.dot(eta)
# Expected value of lambda parameter
lambda_obs = pm.Deterministic("lambda_obs", tt.exp(reg[~cens_]))
lambda_cens = pm.Deterministic("lambda_cens", tt.exp(reg[cens_]))
# Likelihood (sampling distribution) of observations
y_obs = pm.Weibull("y_obs", alpha=beta, beta=lambda_obs, observed=y[~cens])
y_cens = pm.Potential("y_cens", weibull_lccdf(y[cens], beta, lambda_cens))
draws = 2000
warmup_ratio = 1.0
tune = int(warmup_ratio * draws)
with bayesian_model:
trace = pm.sample(target_accept=0.9, draws=draws, tune=tune, random_seed=123, return_inferencedata=True)
def main():
df = pd.read_csv("df3.csv")
fit(df, time_column="T", event_column="c")
if __name__ == "__main__":
main()
Error message:
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [eta, beta]
Traceback (most recent call last):
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\sampling.py", line 1486, in _mp_sample
for draw in sampler:
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\parallel_sampling.py", line 492, in __iter__
draw = ProcessAdapter.recv_draw(self._active)
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\parallel_sampling.py", line 352, in recv_draw
ready = multiprocessing.connection.wait(pipes)
File "C:\Users\abreucbr\Miniconda3\lib\multiprocessing\connection.py", line 849, in wait
fileno = getattr(o, 'fileno')
KeyboardInterrupt
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/abreucbr/Projects/Scratch/WeibullAFTBayesianFitter.py", line 294, in <module>
main()
File "C:/Users/abreucbr/Projects/Scratch/WeibullAFTBayesianFitter.py", line 290, in main
fit(df, time_column="T", event_column="c")
File "C:/Users/abreucbr/Projects/Scratch/WeibullAFTBayesianFitter.py", line 285, in fit
trace = pm.sample(target_accept=0.9, draws=draws, tune=tune, random_seed=123, return_inferencedata=True)
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\sampling.py", line 545, in sample
trace = _mp_sample(**sample_args, **parallel_args)
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\sampling.py", line 1512, in _mp_sample
traces, length = _choose_chains(traces, tune)
File "C:\Users\abreucbr\Miniconda3\lib\site-packages\pymc3\sampling.py", line 1530, in _choose_chains
raise ValueError("Not enough samples to build a trace.")
ValueError: Not enough samples to build a trace.