Assertion Error in PYMC3

I am having an assertion error while running a Bayesian regression model on a Windows 10 computer. My PYMC version is 3.11.5 with theano version 1.1.2. The libraries and code and error are shown below.

import numpy as np
import pymc3 as pm 
import theano.tensor as tt

#saturation function
def saturate(x,a):
    return 1- tt.exp(-a*x)

#carryover function
def carryover(x, strength, length = 3):
    w = tt.as_tensor_variable([tt.power(strength,i) for i in range(length)])
    
    x_lags = tt.stack([tt.concatenate([tt.zeros(i),x[:x.shape[0]-i]]) for i in range(length)])
    
    return tt.dot(w,x_lags)

#Pymc code below
with pm.Model() as model_data:
    
    channel_contributions = []
    
    for channel in var_list:
        if 'lag' in channel:
            channel_contributions_granular = []
            coef = pm.Exponential(f'coef_{channel}', lam = 0.1)
            sat = pm.Exponential(f'sat_{channel}', lam=1)
            car = pm.Beta(f'car_{channel}', alpha=2, beta=2)
            
            for j in dic_data.keys():
                X_temp = dic_data.[j][channel].values
                channel_contribution_temp = coef *X_temp
                channel_contribution_temp_sub = channel_contribution_temp[3:]  #removing the first 3 months of data which creates a subtensor
                channel_contributions_granular.append(channel_contribution_temp_sub) # array of arrays 
                
            
            
            channel_contribution = tt.stack(channel_contributions_granular) # stack sub subtensors
            channel_contributions.append(channel_contribution)
        
        
            
        else:
            channel_contributions_granular = []
            coef = pm.Exponential(f'coef_{channel}', lam = 0.0001)
            sat = pm.Exponential(f'sat_{channel}', lam=1)
            car = pm.Beta(f'car_{channel}', alpha=2, beta=2)
            
            for j in in dic_data.keys().keys():
                X_temp = in dic_data.keys()[j][channel].values
                channel_contribution_temp = coef * saturate(carryover(X_temp,car),sat)
                channel_contribution_temp_sub = channel_contribution_temp[3:]  #removing the first 3 months of data which creates a subtensor
                channel_contributions_granular.append(channel_contribution_temp_sub) # array of arrays 
                
            
            
            channel_contribution =  tt.stack(channel_contributions_granular) #stack sub subtensors
            channel_contributions.append(channel_contribution)
            
            
            
        
        
            
            
            
            
        
        
    base = pm.Exponential('base', lam=0.0001)
    noise = pm.Exponential('noise', lam=0.0001)
    
    Sales = pm.Normal('Sales',mu=sum(channel_contributions) + base,sigma=noise,observed= y) 
    
    trace_data = pm.sample(return_inferencedata= True, tune=2000)

Assertion Error is shown below

ERROR (theano.graph.opt): Optimization failure due to: constant_folding
ERROR (theano.graph.opt): node: Join(TensorConstant{0}, TensorConstant{(1,) of 0.0}, TensorConstant{[45.841808...03101647]})
ERROR (theano.graph.opt): TRACEBACK:
ERROR (theano.graph.opt): Traceback (most recent call last):
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\graph\opt.py", line 2017, in process_node
    replacements = lopt.transform(fgraph, node)
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\graph\opt.py", line 1209, in transform
    return self.fn(*args, **kwargs)
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\tensor\opt.py", line 7006, in constant_folding
    thunk = node.op.make_thunk(
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\graph\op.py", line 634, in make_thunk
    return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\graph\op.py", line 600, in make_c_thunk
    outputs = cl.make_thunk(
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\basic.py", line 1203, in make_thunk
    cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\basic.py", line 1138, in __compile__
    thunk, module = self.cthunk_factory(
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\basic.py", line 1634, in cthunk_factory
    module = get_module_cache().module_from_key(key=key, lnk=self)
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\cmodule.py", line 1157, in module_from_key
    module = self._get_from_hash(module_hash, key)
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\cmodule.py", line 1060, in _get_from_hash
    key_data.add_key(key, save_pkl=bool(key[0]))
  File "C:\Users\J_Pushkin\Anaconda3\envs\pymc_env\lib\site-packages\theano\link\c\cmodule.py", line 497, in add_key
    assert key not in self.keys
AssertionError

AssertionError                            Traceback (most recent call last)
Input In [22], in <cell line: 1>()
     53 noise = pm.Exponential('noise', lam=0.0001)
     55 Sales = pm.Normal('Sales',mu=sum(channel_contributions) + base,sigma=noise,observed=y)
---> 57 trace_data = pm.sample(return_inferencedata=True, tune=2000)

What am I doing wrong. Thanks

Welcome!

I tried to clean up the formatting of your code, but there are some pretty basic syntax errors that prevent the code from running. I assume these are weird copy and paste errors?

Examples:

X_temp = dic_data.[j][channel].values

and

for j in in dic_data.keys().keys():

Thanks @cluhmann! Yes these are copy and past errors. I was trying to do do tt.stack on about 18000 data frames in dic_data each with a date time index of 12 months (each data frame needs to be processed separately for time lag within the data frame). I think my windows machine could not handle it. So I reduced the number of data frames in dic_data to 2, 10, 100 respectively and the code now runs. My thinking is that, on my windows laptop, the theano computational engine is unable to create a computational graph for such a large amount of data frames? I am new to theano. Thanks

1 Like