Compile Error when model contains a TruncatedNormal

Hello,
I’m having trouble implementing the ordered-probit model from this Kruschke blog post, the article which it references and his book. Here is a minimum working code section:

import numpy as np
import pymc3 as pm
import theano.tensor as tt


def normal_lcdf(mu, sigma, x):
    """Compute the log of the cumulative density function of the normal."""
    z = (x - mu) / sigma
    return tt.switch(
        tt.lt(z, -1.0),
        tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
        tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
    )


def tt_outcome_probabilities(mu, sigma, teta_mu, teta):
    """ gymnastics to create the outcome probabilities using Theano ops, which allows us to use
        NUTS """
    l = [ teta_mu[:2], teta, teta_mu[-2:] ]
    tt_teta = tt.concatenate(l).reshape((len(teta_mu),))
    tt_tmu_roll = np.array([ False, True ] + list(np.repeat(True,len(teta_mu)-4)) + [ True, True  ])
    tt_tmu = np.array([ True, True ] + list(np.repeat(True,len(teta_mu)-4)) + [ True, False ])
    pr = tt.switch(
            tt.lt(
                tt.exp(normal_lcdf(mu=mu, sigma=sigma, x = tt_teta[tt_tmu_roll])) -
                tt.exp(normal_lcdf(mu=mu, sigma=sigma, x = tt_teta[tt_tmu])),
                0),0,
                tt.exp(normal_lcdf(mu=mu, sigma=sigma, x = tt_teta[tt_tmu_roll])) -
                tt.exp(normal_lcdf(mu=mu, sigma=sigma, x = tt_teta[tt_tmu]))
    )
    return pr


# large value so that norm.cdf(vgrd) ~= 1. If vgrd = np.inf, theano complains that the mass
# matrix contains zero values on its diagonal.
vgrd = 50

# ordinal scale
npesc = np.asarray(range(0,6))

# how many marks were there in each category
obs = np.array((16,24,35,49,30,7))
obs_n = obs.sum()

# specifying ordinal->probit model
with pm.Model() as ordprobit:
    # dist. a priori
    mu_prior = npesc.mean()
    s = 0.212
    mu = pm.Normal(name='hyper_mu', mu=mu_prior, sd=len(npesc))
    sigma = pm.Lognormal(name='hyper_sd', mu=0.5, sd=s)

    # threshold dist. parameters
    teta_mu = np.asarray([-vgrd] + list((npesc[1:] + npesc[:-1]) / 2) + [vgrd])
    teta_sigma = (npesc[1] + npesc[0]) / 2

    # threshold distributions. Only middle values are RVs; the first and last thresholds are fixed
    teta = pm.Normal(name='teta', mu=teta_mu[2:-2], sd=teta_sigma, shape=teta_mu.size - 4)

    # distribution of the underlying metric scale
    metric = pm.Normal(name='metric', mu=mu, sd=sigma)

    pr = tt_outcome_probabilities(mu, sigma, teta_mu, teta)

    # distribution of the categories
    y = pm.Multinomial(name='cats', n=obs_n, p=pr, observed=obs)

This model runs correctly. However, if I try to use a truncated normal for the metric RV, like metric = pm.TruncatedNormal(name='metric', mu=mu, sd=sigma, lower=npesc[0], upper=npesc[-1]), I get a compilation error with the following traceback:

Traceback (most recent call last):
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-b8a5e6dda959>", line 1, in <module>
    ordprobit.check_test_point()
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\pymc3\model.py", line 1028, in check_test_point
    return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs},
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\pymc3\model.py", line 1028, in <dictcomp>
    return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs},
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\pymc3\model.py", line 205, in logp
    return self.model.fn(self.logpt)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\pymc3\model.py", line 925, in fn
    return LoosePointFunc(self.makefn(outs, mode, *args, **kwargs), self)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\pymc3\model.py", line 910, in makefn
    mode=mode, *args, **kwargs)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\compile\function.py", line 317, in function
    output_keys=output_keys)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\compile\pfunc.py", line 486, in pfunc
    output_keys=output_keys)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\compile\function_module.py", line 1841, in orig_function
    fn = m.create(defaults)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\compile\function_module.py", line 1715, in create
    input_storage=input_storage_lists, storage_map=storage_map)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\link.py", line 699, in make_thunk
    storage_map=storage_map)[:3]
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\vm.py", line 1091, in make_all
    impl=impl))
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\op.py", line 955, in make_thunk
    no_recycling)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\op.py", line 858, in make_c_thunk
    output_storage=node_output_storage)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cc.py", line 1217, in make_thunk
    keep_lock=keep_lock)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cc.py", line 1157, in __compile__
    keep_lock=keep_lock)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cc.py", line 1624, in cthunk_factory
    key=key, lnk=self, keep_lock=keep_lock)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cmodule.py", line 1189, in module_from_key
    module = lnk.compile_cmodule(location)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cc.py", line 1527, in compile_cmodule
    preargs=preargs)
  File "D:\programacao\python\Anaconda3\envs\pymc\lib\site-packages\theano\gof\cmodule.py", line 2396, in compile_str
    (status, compile_stderr.replace('\n', '. ')))
Exception: ('The following error happened while compiling the node', Elemwise{Composite{((i0 + Switch(Cast{int8}((GT(i1, i2) * GE(Composite{(i0 * scalar_sigmoid(i1))}(i3, i4), i5) * LE(Composite{(i0 * scalar_sigmoid(i1))}(i3, i4), i6))), (Switch(Cast{int8}(GT(i1, i2)), (i7 * ((-(Composite{inv(sqr(i0))}(i1) * sqr((Composite{(i0 * scalar_sigmoid(i1))}(i3, i4) - i8)))) + log((i9 * Composite{inv(sqr(i0))}(i1))))), i10) - (Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15) + Switch(LT(Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17), i18), log((-expm1((-Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17))))), log1p((-exp((-Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17)))))))), i10)) - ((i19 * scalar_softplus((-i4))) + i4))}}[(0, 1)](TensorConstant{1.6094379124341003}, hyper_sd, TensorConstant{0}, TensorConstant{5.0}, metric_interval__, TensorConstant{0}, TensorConstant{5}, TensorConstant{0.5}, hyper_mu, TensorConstant{0.15915494309189535}, TensorConstant{-inf}, Elemwise{sub,no_inplace}.0, TensorConstant{-1.0}, Elemwise{Erfcx}[(0, 0)].0, TensorConstant{-0.5}, TensorConstant{0.7071067932881648}, Elemwise{Erfcx}[(0, 0)].0, TensorConstant{-0.7071067932881648}, TensorConstant{0.683}, TensorConstant{2.0}), '\n', "Compilation failed (return status=1): C:\\Users\\LIMO~1\\AppData\\Local\\Temp\\ccb5gt7q.s: Assembler messages:\r. C:\\Users\\LIMO~1\\AppData\\Local\\Temp\\ccb5gt7q.s:9796: Error: no such instruction: `vfnmadd312sd 336(%rsp),%xmm14,%xmm13'\r. ", '[Elemwise{Composite{((i0 + Switch(Cast{int8}((GT(i1, i2) * GE(Composite{(i0 * scalar_sigmoid(i1))}(i3, i4), i5) * LE(Composite{(i0 * scalar_sigmoid(i1))}(i3, i4), i6))), (Switch(Cast{int8}(GT(i1, i2)), (i7 * ((-(Composite{inv(sqr(i0))}(i1) * sqr((Composite{(i0 * scalar_sigmoid(i1))}(i3, i4) - i8)))) + log((i9 * Composite{inv(sqr(i0))}(i1))))), i10) - (Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15) + Switch(LT(Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17), i18), log((-expm1((-Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17))))), log1p((-exp((-Composite{(i0 - Switch(LT(Composite{((-i0) / i1)}(i1, i2), i3), (log((i4 * i5)) - (i4 * sqr(Composite{((-i0) / i1)}(i1, i2)))), log1p((i6 * erfc(((i7 * i1) / i2))))))}(Composite{Switch(LT((i0 / i1), i2), (log((i3 * i4)) - (i3 * sqr((i0 / i1)))), log1p((i5 * erfc(((i6 * i0) / i1)))))}(i11, i1, i12, i7, i13, i14, i15), i8, i1, i12, i7, i16, i14, i17)))))))), i10)) - ((i19 * scalar_softplus((-i4))) + i4))}}[(0, 1)](TensorConstant{1.6094379124341003}, hyper_sd, TensorConstant{0}, TensorConstant{5.0}, metric_interval__, TensorConstant{0}, TensorConstant{5}, TensorConstant{0.5}, hyper_mu, TensorConstant{0.15915494309189535}, TensorConstant{-inf}, <TensorType(float64, scalar)>, TensorConstant{-1.0}, <TensorType(float64, scalar)>, TensorConstant{-0.5}, TensorConstant{0.7071067932881648}, <TensorType(float64, scalar)>, TensorConstant{-0.7071067932881648}, TensorConstant{0.683}, TensorConstant{2.0})]')

I honestly have no idea how to approach this. Any tips?

Looks like this theano issue. Could you try what’s suggested there and see if it is solved?

1 Like

It worked, but the model compilation is a lot slower. I don’t know if it has to do with the compiler flag or the more complicated model (or maybe both).

Thanks for the suggestion!

1 Like

I don’t know if the slow down is due to the compiler flag or to the more complex model. To test if it was the flag, you could maybe try to compare your simple model with both compile flags and see if they take the same time to run, or maybe even profile the models logp. If you get the same result, it is likely to have just been the more complex model

On my computer it takes 55 seconds for the model with the unbounded Normal to compile, and 62 seconds for the model with TruncatedNormal to compile, both with the march flag active. So it takes 12% longer, but not a big deal, really.