Starting Point failure

I am new to PyMC1, and thought I understood it :slight_smile: , but obviously not.
In the below, I encode a simple discrete Bayesian model with no hyper-parameters. I encode the observations in an observation, which is passed to the model. In the experiments below, I set β€˜l’ to 0 and to β€˜1’. β€˜l’ is one of the two possible results from sampling a Bernoulli. The β€˜l’:0 case does not have a problem. The β€˜l’:1 case does. I read up on this error, and find that it often occurs when a parameter is out of range with respects to constrains imposed by some distribution (for example, a negative standard deviation). However, in the present case, I cannot figure it out. Any help is very much appreciated. Thanks. Gordon.

import pymc3 as pm
import pymc3.distributions.discrete as discrete
import matplotlib.pyplot as plt
import arviz as az
# import aesara.tensor as at
import numpy as np
from tqdm import tqdm
import pandas as pd
from theano import tensor as tt
from theano.ifelse import ifelse

# SOME DETERMINISTIC FUNCTIONS
def grade(i, d):
    a00 = tt.and_(tt.eq(i,0), tt.eq(d,0))
    a10 = tt.and_(tt.eq(i,1), tt.eq(d,0))
    a01 = tt.and_(tt.eq(i,0), tt.eq(d,1))
    a11 = tt.and_(tt.eq(i,1), tt.eq(d,1))
    cat1 = [0.3,0.4, 0.3]
    cat2 = [0.05, 0.25, 0.7]
    cat3 = [0.9, 0.08, 0.02]
    cat4 = [0.5, 0.3,  0.2]
    return a00*cat1 + a10*cat2 + a01*cat3 + a11*cat4

def letter(g):
    return tt.eq(g, gA)*0.9 + tt.eq(g, gB)*0.6 + tt.eq(g, gC)*0.01

def score(i):
     return tt.eq(i, 0)*0.05 + tt.eq(i, 1)*0.8

gA = 2
gB = 5
gC = 10

# MODEL CREATION: observations passed in as arguments through a dictionary
def create_model(obs):
    with pm.Model() as grade_model:
        if "d" in obs.keys():
            d = discrete.Bernoulli("d", 0.4, observed=obs["d"])
        else:
            d = discrete.Bernoulli("d", 0.4)
            
        if "i" in obs.keys():
            i = discrete.Bernoulli("i", 0.3, observed=obs["i"])
        else:
            i = discrete.Bernoulli("i", 0.3)

        p_grade = grade(i, d)
        if "g" in obs.keys():
            g = discrete.Categorical("g", p_grade, observed=obs["g"])
        else:
            g = discrete.Categorical("g", p_grade)
            
        p_letter = letter(g)
        if "l" in obs.keys():
            l = discrete.Bernoulli("l", p_letter, observed=obs["l"])
        else:
            l = discrete.Bernoulli("l", p_letter)
            print("l: ", l)
            
        p_score = score(i)
        if "s" in obs.keys():
            s = discrete.Bernoulli("s", p_score, observed=obs["s"])
        else:
            s = discrete.Bernoulli("s", p_score)
    return grade_model;

# 
def run_model(model):
    with model:
        step = pm.Metropolis()
        trace = pm.sample(n_init=5000, step=step, draws=5000, tune=1000, return_inferencedata=False, progressbar=False)
        display(az.summary(trace, kind="stats"))
        lst = list(trace.points())
#         glst = [dct['g'] for dct in lst]
#         slst = [dct['s'] for dct in lst]
#         llst = [dct['l'] for dct in lst]
        return lst, trace #, [np.mean(x) for x in [glst, slst, llst]];

def run(obs):
    model = create_model(obs)
#     model = create_model_new(obs)
    lst, trace = run_model(model)
    return model, trace

def run(obs):
    model = create_model(obs)
#     model = create_model_new(obs)
    lst, trace = run_model(model)
    return model, trace

model1, trace1 = run(obs={})

# RUNS FINE. 
run(obs={'l':0.})

# PRODUCES ERROR (the observation is in a Bernoulli)
run(obs='l':1})   # <<< ERROR

This last line produces the following error:

---------------------------------------------------------------------------
SamplingError                             Traceback (most recent call last)
<ipython-input-12-93d95129a24b> in <module>
      1 # 'l':1 gives an error related to starting conditions
----> 2 run(obs={'l':1})

<ipython-input-8-364b5c5a1e7d> in run(obs)
      2     model = create_model(obs)
      3 #     model = create_model_new(obs)
----> 4     lst, trace = run_model(model)
      5     return model, trace

<ipython-input-7-f445ae35680c> in run_model(model)
      3         # return_inferencedata=True does not work
      4         step = pm.Metropolis()
----> 5         trace = pm.sample(n_init=5000, step=step, draws=5000, tune=1000, return_inferencedata=False, progressbar=False)
      6         display(az.summary(trace, kind="stats"))
      7         lst = list(trace.points())

~/anaconda3/lib/python3.8/site-packages/deprecat/classic.py in wrapper_function(wrapped_, instance_, args_, kwargs_)
    213                         else:
    214                             warnings.warn(message, category=category, stacklevel=_routine_stacklevel)
--> 215                 return wrapped_(*args_, **kwargs_)
    216 
    217             return wrapper_function(wrapped)

~/anaconda3/lib/python3.8/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, initvals, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, start, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
    442     start = deepcopy(start)
    443     if start is None:
--> 444         check_start_vals(model.test_point, model)
    445     else:
    446         if isinstance(start, dict):

~/anaconda3/lib/python3.8/site-packages/pymc3/util.py in check_start_vals(start, model)
    235 
    236         if not np.all(np.isfinite(initial_eval)):
--> 237             raise SamplingError(
    238                 "Initial evaluation of model at starting point failed!\n"
    239                 "Starting values:\n{}\n\n"

SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'d': array(0), 'i': array(0), 'g': array(1), 's': array(0)}

Initial evaluation results:
d   -0.51
i   -0.36
g   -0.92
s   -0.05
l    -inf
Name: Log-probability of test_point, dtype: float64
1 Like

Welcome!

I suspect it’s because p_letter is 0.0, which makes the observed data impossible (logp= -inf). I may be misreading your model, but I can’t quite figure out how letter() is supposed to work. It takes a categorical parameter (g) which indexes 3 categories (because grade() returns a triplet of probabilities). But then letter() compares this categorical value to the values 2, 5, and 10. So I suspect that letter(g) always returns a value of 0.0 (which would make the observed value of 1 impossible). But I may be missing something.

You hit the nail on the head, @cluhmann ! The three grades are categorical variables (gA, gB, gC), but I did not realize that discrete.Categorical return the categories (when sampled) as 0.,1.,2., real numbers (which is strange. Why not integers?). The letter method takes as input one of the three categories, so all I had to do to fix the problem is replace the three grades by gA, gB, gC = (0., 1., 2.) .

gA, gB, gC = (0., 1., 2.)
def letter(g):
    return tt.eq(g, gA)*0.9 + tt.eq(g, gB)*0.6 + tt.eq(g, gC)*0.01

Of course, equality operators acting on real numbers is never a good idea, so I should probably round g to an integer and make the grades gA,etc into integers. Another improvement would be to guarantee that one of the conditions in the return statement is satisfied on the off-chance that none of the equalities match due to rounding problems. Finally, I would like a more elegant and faster way of expressing this function. If you have any ideas, I am open to them! Thanks for your help!

1 Like

Sampled values of pm.Categorical should be integers. An easy way to check would be to insert a pm.Deterministic variable:

g_det = pm.Deterministic('g_det', g)

and inspect the trace:

In [2]: trace['g_det'][0:30]
Out[2]: 
array([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1])

and you can check the dtype explicitly:

In [3]: trace['g_det'][0].dtype
Out[3]: dtype('int64')

Somewhat separately, I would strongly recommend you avoid setting the step method explicitly when sampling (e.g., step = pm.Metropolis()). PyMC is pretty good about selecting appropriate sampling algorithms based on your model.

Great, thanks. Good suggestions! My coffee works now. The next step is to use a mixture of categoricals. Then multiple observations, a Bayesian formulation, and a variational implementation through pymc,

1 Like

I tried to implement the Grade function as a Mixture of Categorical distributions. It seemed to work if this mixture was observed. But if not, I got an incomprehensible error. Note that I removed the call to grade(...). Obviously, I am mistaken. Yet, I feel that a mixture of Categorical distributions is what is needed. Any insights? :slight_smile:

def create_model_cat(obs):
    with pm.Model() as grade_model:
        w = np.asarray([.42, .28, .18, .12])
        d00 = pm.Categorical.dist(p=[.3,.4,.3])
        d01 = pm.Categorical.dist(p=[.05,.25,.7])
        d10 = pm.Categorical.dist(p=[.9,.08,.02])
        d11 = pm.Categorical.dist(p=[.5,.3,.2])

        if "d" in obs.keys():
            d = discrete.Bernoulli("d", 0.4, observed=obs["d"])
        else:
            d = discrete.Bernoulli("d", 0.4)
            
        if "i" in obs.keys():
            i = discrete.Bernoulli("i", 0.3, observed=obs["i"])
        else:
            i = discrete.Bernoulli("i", 0.3)

#         p_grade = grade(i, d)
        if "g" in obs.keys():
            # returns a category
            g = pm.Mixture('g', w=w, comp_dists=[d00,d01,d10,d11], observed=obs["g"])
#             g = discrete.Categorical("g", p_grade, observed=obs["g"])
        else:
#             g = discrete.Categorical("g", p_grade)
            g = pm.Mixture('g', w=w, comp_dists=[d00,d01,d10,d11])
            
        p_letter = letter(g)
        if "l" in obs.keys():
            l = discrete.Bernoulli("l", p_letter, observed=obs["l"])
        else:
            l = discrete.Bernoulli("l", p_letter)
            
        p_score = score(i)
        if "s" in obs.keys():
            s = discrete.Bernoulli("s", p_score, observed=obs["s"])
        else:
            s = discrete.Bernoulli("s", p_score)
    return grade_model;

# ----
def run_model(model):
    with model:
        # return_inferencedata=True does not work
#         step = pm.Metropolis()
        trace = pm.sample(n_init=5000, draws=4000, tune=2000, return_inferencedata=False, progressbar=False)
        display(az.summary(trace, kind="stats"))
        lst = list(trace.points())
        return lst, trace

#----
def run(obs):
    model = create_model_cat(obs)   # New function, see above  <<<<<<<<
#     pm.model_graph.model_to_graphviz(model)
#     model = create_model_new(obs)
    lst, trace = run_model(model)
    return model, trace

#----
run(obs={})

with error:

---------------------------------------------------------------------------
RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/theano/compile/function/types.py", line 974, in __call__
    self.fn()
TypeError: expected type_num 5 (NPY_INT32) got 7

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py", line 137, in run
    self._start_loop()
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py", line 191, in _start_loop
    point, stats = self._compute_point()
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py", line 216, in _compute_point
    point, stats = self._step_method.step(self._point)
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/compound.py", line 42, in step
    point, state = method.step(point)
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/arraystep.py", line 196, in step
    apoint, stats = self.astep(self.bij.map(point))
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/metropolis.py", line 217, in astep
    accept = self.delta_logp(q, q0)
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/theano/compile/function/types.py", line 987, in __call__
    raise_with_op(
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/theano/link/utils.py", line 508, in raise_with_op
    raise exc_value.with_traceback(exc_trace)
  File "/home/erlebach/anaconda3/lib/python3.8/site-packages/theano/compile/function/types.py", line 974, in __call__
    self.fn()
TypeError: expected type_num 5 (NPY_INT32) got 7
Apply node that caused the error: Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}(InplaceDimShuffle{}.0, TensorConstant{0}, TensorConstant{2})
Toposort index: 12
Inputs types: [TensorType(int32, scalar), TensorType(int8, scalar), TensorType(int64, scalar)]
Inputs shapes: [(), (), ()]
Inputs strides: [(), (), ()]
Inputs values: [array(2), array(0, dtype=int8), array(2)]
Outputs clients: [[Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf})]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
"""

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py in run()
    136             self._point = self._make_numpy_refs()
--> 137             self._start_loop()
    138         except KeyboardInterrupt:

~/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py in _start_loop()
    190                 try:
--> 191                     point, stats = self._compute_point()
    192                 except SamplingError as e:

~/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py in _compute_point()
    215         if self._step_method.generates_stats:
--> 216             point, stats = self._step_method.step(self._point)
    217         else:

~/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/compound.py in step()
     41                 if method.generates_stats:
---> 42                     point, state = method.step(point)
     43                     states.extend(state)

~/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/arraystep.py in step()
    195         if self.generates_stats:
--> 196             apoint, stats = self.astep(self.bij.map(point))
    197             return self.bij.rmap(apoint), stats

~/anaconda3/lib/python3.8/site-packages/pymc3/step_methods/metropolis.py in astep()
    216 
--> 217         accept = self.delta_logp(q, q0)
    218         q_new, accepted = metrop_select(accept, q, q0)

~/anaconda3/lib/python3.8/site-packages/theano/compile/function/types.py in __call__()
    986                     thunk = self.fn.thunks[self.fn.position_of_error]
--> 987                 raise_with_op(
    988                     self.maker.fgraph,

~/anaconda3/lib/python3.8/site-packages/theano/link/utils.py in raise_with_op()
    507         # extra long error message in that case.
--> 508     raise exc_value.with_traceback(exc_trace)
    509 

~/anaconda3/lib/python3.8/site-packages/theano/compile/function/types.py in __call__()
    973             outputs = (
--> 974                 self.fn()
    975                 if output_subset is None

TypeError: expected type_num 5 (NPY_INT32) got 7
Apply node that caused the error: Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}(InplaceDimShuffle{}.0, TensorConstant{0}, TensorConstant{2})
Toposort index: 12
Inputs types: [TensorType(int32, scalar), TensorType(int8, scalar), TensorType(int64, scalar)]
Inputs shapes: [(), (), ()]
Inputs strides: [(), (), ()]
Inputs values: [array(2), array(0, dtype=int8), array(2)]
Outputs clients: [[Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf}), Elemwise{Composite{Switch(i0, log(i1), i2)}}(Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2)))}}.0, Subtensor{int64}.0, TensorConstant{-inf})]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
<ipython-input-83-04d620c6b186> in <module>
----> 1 run(obs={})
      2 # model2, trace2 = run(obs={'d':1, 's':1, 'l':1})

<ipython-input-82-b218e73e01f1> in run(obs)
      3 #     pm.model_graph.model_to_graphviz(model)
      4 #     model = create_model_new(obs)
----> 5     lst, trace = run_model(model)
      6     return model, trace

<ipython-input-81-d77149c5eee1> in run_model(model)
      3         # return_inferencedata=True does not work
      4 #         step = pm.Metropolis()
----> 5         trace = pm.sample(n_init=5000, draws=4000, tune=2000, return_inferencedata=False, progressbar=False)
      6         display(az.summary(trace, kind="stats"))
      7         lst = list(trace.points())

~/anaconda3/lib/python3.8/site-packages/deprecat/classic.py in wrapper_function(wrapped_, instance_, args_, kwargs_)
    213                         else:
    214                             warnings.warn(message, category=category, stacklevel=_routine_stacklevel)
--> 215                 return wrapped_(*args_, **kwargs_)
    216 
    217             return wrapper_function(wrapped)

~/anaconda3/lib/python3.8/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, initvals, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, start, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
    573         _print_step_hierarchy(step)
    574         try:
--> 575             trace = _mp_sample(**sample_args, **parallel_args)
    576         except pickle.PickleError:
    577             _log.warning("Could not pickle model, sampling singlethreaded.")

~/anaconda3/lib/python3.8/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, pickle_backend, **kwargs)
   1494         try:
   1495             with sampler:
-> 1496                 for draw in sampler:
   1497                     trace = traces[draw.chain - chain]
   1498                     if trace.supports_sampler_stats and draw.stats is not None:

~/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py in __iter__(self)
    477 
    478         while self._active:
--> 479             draw = ProcessAdapter.recv_draw(self._active)
    480             proc, is_last, draw, tuning, stats, warns = draw
    481             self._total_draws += 1

~/anaconda3/lib/python3.8/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
    357             else:
    358                 error = RuntimeError("Chain %s failed." % proc.chain)
--> 359             raise error from old_error
    360         elif msg[0] == "writing_done":
    361             proc._readable = True

RuntimeError: Chain 0 failed.