Prediction with new data brings shape error

Sarah · December 6, 2023, 1:41pm

Hi all,

I build a simple model like this and want to do a prediction for a new data input.

with pm.Model() as model:
    # data container for all variables
    target = pm.MutableData("target", df_model['target_scaled'])
    TV = pm.MutableData("TV", df_model['TV_scaled'])
    OOH = pm.MutableData("OOH", df_model['OOH_scaled'])
    Print = pm.MutableData("Print", df_model['Print_scaled'])

    #priors
    alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
    beta_TV = pm.HalfNormal("beta_TV", sigma = 0.2)
    beta_OOH = pm.HalfNormal("beta_OOH", sigma = 0.2)
    beta_Print = pm.HalfNormal("beta_Print", sigma = 0.2)
     
    #transformation
    channel_adstock_TV = pm.Deterministic(name="channel_adstock_TV", var=geometric_adstock(x=TV, alpha=0.8))
    channel_adstock_OOH = pm.Deterministic(name="channel_adstock_OOH", var=geometric_adstock(x=OOH, alpha=0.6))
    channel_adstock_Print = pm.Deterministic(name="channel_adstock_Print", var=geometric_adstock(x=Print, alpha=0.4))
    
    mu = pm.Deterministic(name="mu", var=alpha + beta_TV*channel_adstock_TV + beta_OOH*channel_adstock_OOH + beta_Print*channel_adstock_Print)
    #mu = pm.Deterministic(name="mu", var=alpha + beta_TV*geometric_adstock(x=TV, alpha=0.8) + beta_OOH*geometric_adstock(x=OOH, alpha=0.6) + beta_Print*geometric_adstock(x=Print, alpha=0.4))

    sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
    y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=TV.shape[0])
    
    trace = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
    posterior_predictive = pm.sample_posterior_predictive(trace)

When I try it without the media transformation everything works fine. But since I implemented it I get this error when I set the new data input.

with model:
    pm.set_data(
        {"TV": np.array([0.08710, 0.29998,  0.33917, 0.48765, 0.53105]),
         "OOH": np.array([0.8, 0.8, 0.1, 0.92466743, 0.92466743]),
         "Print": np.array([0.83331646, 0.97947805, 0.99924164, 0.90886795, 0.71668052])
        }
    )
    predictions = pm.sample_posterior_predictive(
        trace, var_names=["y"], random_seed=RANDOM_SEED
    )

This is the full error message

Sampling: [y]

0.00% [0/200 00:00<?]

ValueError Traceback (most recent call last)
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:406, in Loop.call(self)
403 for thunk, node, old_storage in zip_longest(
404 self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
405 ):
→ 406 thunk()
407 for old_s in old_storage:

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\graph\op.py:518, in Op.make_py_thunk..rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
→ 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\tensor\subtensor.py:1614, in IncSubtensor.perform(self, node, inputs, out_)
1612 else:
1613 # sub_x += -sub_x + y
→ 1614 x.setitem(cdata, y)
1615 else:
1616 # scalar case

ValueError: could not broadcast input array from shape (4,) into shape (0,)

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
Cell In [22], line 9
1 with model:
2 pm.set_data(
3 {“TV”: np.array([0.08710, 0.29998, 0.33917, 0.48765, 0.53105]),
4 #“TV”: np.array([0.08710, 0.08710, 0.08710, 0.08710, 0.08710]),
(…)
7 }
8 )
----> 9 predictions = pm.sample_posterior_predictive(
10 trace, var_names=[“y”], random_seed=RANDOM_SEED
11 )

File ~\AppData\Roaming\Python\Python39\site-packages\pymc\sampling\forward.py:644, in sample_posterior_predictive(trace, model, var_names, sample_dims, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
639 # there’s only a single chain, but the index might hit it multiple times if
640 # the number of indices is greater than the length of the trace.
641 else:
642 param = trace[idx % len_trace]
→ 644 values = sampler_fn(**param)
646 for k, v in zip(vars, values):
647 ppc_trace_t.insert(k.name, v, idx)

File ~\AppData\Roaming\Python\Python39\site-packages\pymc\util.py:393, in point_wrapper..wrapped(**kwargs)
391 def wrapped(**kwargs):
392 input_point = {k: v for k, v in kwargs.items() if k in ins}
→ 393 return core_function(**input_point)

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\compile\function\types.py:970, in Function.call(self, *args, **kwargs)
967 t0_fn = time.perf_counter()
968 try:
969 outputs = (
→ 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
975 restore_defaults()

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:410, in Loop.call(self)
408 old_s[0] = None
409 except Exception:
→ 410 raise_with_op(self.fgraph, node, thunk)
412 return self.perform_updates()

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
526 warnings.warn(
527 f"{exc_type} error does not allow us to add an extra error message"
528 )
529 # Some exception need extra parameter in inputs. So forget the
530 # extra long error message in that case.
→ 531 raise exc_value.with_traceback(exc_trace)

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:406, in Loop.call(self)
402 try:
403 for thunk, node, old_storage in zip_longest(
404 self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
405 ):
→ 406 thunk()
407 for old_s in old_storage:
408 old_s[0] = None

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\graph\op.py:518, in Op.make_py_thunk..rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
→ 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:
520 compute_map[o][0] = True

File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\tensor\subtensor.py:1614, in IncSubtensor.perform(self, node, inputs, out_)
1611 sub_x += y
1612 else:
1613 # sub_x += -sub_x + y
→ 1614 x.setitem(cdata, y)
1615 else:
1616 # scalar case
1617 if not self.set_instead_of_inc:

ValueError: could not broadcast input array from shape (4,) into shape (0,)
Apply node that caused the error: SetSubtensor{start:stop, i}(SetSubtensor{start:stop, i}.0, Subtensor{:stop}.0, 6, ScalarFromTensor.0, 6)
Toposort index: 131
Inputs types: [TensorType(float64, shape=(None, 12)), TensorType(float64, shape=(None,)), ScalarType(int64), ScalarType(int64), ScalarType(uint8)]
Inputs shapes: [(5, 12), (4,), (), (), ()]
Inputs strides: [(96, 8), (8,), (), (), ()]
Inputs values: [‘not shown’, array([0.0871 , 0.29998, 0.33917, 0.48765]), 6, 5, 6]
Outputs clients: [[SetSubtensor{start:stop, i}(SetSubtensor{start:stop, i}.0, Subtensor{:stop}.0, 7, ScalarFromTensor.0, 7)]]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 2995, in _run_cell
return runner(coro)
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\async_helpers.py”, line 129, in pseudo_sync_runner
coro.send(None)
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3194, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3373, in run_ast_nodes
if await self.run_code(code, result, async=asy):
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3433, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\s.leschke.CI10229\AppData\Local\Temp\ipykernel_16164\1565565827.py”, line 14, in
channel_adstock_TV = pm.Deterministic(name=“channel_adstock_TV”, var=geometric_adstock(x=TV, alpha=0.8))
File “C:\Users\s.leschke.CI10229\AppData\Roaming\Python\Python39\site-packages\pymc_marketing\mmm\transformers.py”, line 100, in geometric_adstock
return batched_convolution(x, w, axis=axis)
File “C:\Users\s.leschke.CI10229\AppData\Roaming\Python\Python39\site-packages\pymc_marketing\mmm\transformers.py”, line 50, in batched_convolution
padded_x = pt.set_subtensor(

HINT: Use the PyTensor flag exception_verbosity=high for a debug print-out and storage map footprint of this Apply node.

I already tried to specify the model in another way, but I brings the same error.

coords = {"channel": ['TV_scaled','OOH_scaled','Print_scaled']}

with pm.Model(coords = coords) as model:
    model.add_coord("KW", np.arange(299), mutable = True)
    
    ## data container for all variables
    target = pm.MutableData("target", df_model['target_scaled'], dims="KW")
    input = pm.MutableData("input", df_model[features], dims=("KW", "channel") )

    #priors
    alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
    beta = pm.HalfNormal("beta", sigma = 0.2, dims=("channel",))
    
    #transform media
    channel_adstock = pm.Deterministic(name="channel_adstock", var=geometric_adstock(x=input, alpha=0.8), dims=("KW", "channel"),)
    
    channel_contributions = pm.Deterministic(
                name="channel_contributions",
                var=channel_adstock * beta,
                dims=("KW", "channel"),
            )
    
    #define target
    mu_var = alpha + channel_contributions.sum(axis=-1)
    mu = pm.Deterministic(name="mu", var=mu_var, dims="KW")

    sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
    y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=df_model['TV_scaled'].shape[0])
    
    trace = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
    posterior_predictive = pm.sample_posterior_predictive(trace)


new_data = pd.DataFrame({'KW': [0,1,2,3,4],
                        'TV_scaled': [0.08710, 0.29998,  0.33917, 0.48765, 0.53105],
                        'OOH_scaled': [0.8, 0.8, 0.1, 0.92466743, 0.92466743],
                       'Print_scaled': [0.83331646, 0.97947805, 0.99924164, 0.90886795, 0.71668052]})

new_data = new_data.set_index(new_data['KW'])
new_data = new_data[features]
new_data

with model:
    pm.set_data(
        {"input": new_data}, coords = {"channel": ['TV_scaled','OOH_scaled','Print_scaled'], "KW":[0,1,2,3,4]}
    )
    predictions = pm.sample_posterior_predictive(
        trace, var_names=["y"], random_seed=RANDOM_SEED
    )

Any suggestions? Are the data containers and coords defined correctly like this?
Thanks in advance!

Sarah · December 13, 2023, 1:06pm

@Community_Team: Since this is a very important topic for my work, I would be very grateful if someone could take a look at it.

cluhmann · December 13, 2023, 2:00pm

It’s difficult to know what is going on without an example that I can run. But if I had to guess, I might assume that you are replacing TV, OOH, and Print, with new values (and a different number of values/shape), but not replacing target (so target is still the original shape) and the mismatch is causing the issue. Does that seem plausible?

Sarah · December 13, 2023, 3:29pm

Thanks for your reply @cluhmann . But I think the reason must be something else.
Because I defined all variables as mutable and made the target variable depending on the shape of a input variable as it is explained in one of the PyMC examples.

The prediction for this basic model works well (even with different shape of the input):

with pm.Model() as model_basic:
    ## data container for all variables
    target = pm.MutableData("target", df_model['target_scaled'])
    TV = pm.MutableData("TV", df_model['TV_scaled'])
    OOH = pm.MutableData("OOH", df_model['OOH_scaled'])
    Print = pm.MutableData("Print", df_model['Print_scaled'])

    #priors
    alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
    beta_TV = pm.HalfNormal("beta_TV", sigma = 0.2)
    beta_OOH = pm.HalfNormal("beta_OOH", sigma = 0.2)
    beta_Print = pm.HalfNormal("beta_Print", sigma = 0.2)
        
    mu = pm.Deterministic(name="mu", var=alpha + beta_TV*TV + beta_OOH*OOH + beta_Print*beta_Print)
    
    sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
    y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=TV.shape[0])
    
    trace_basic = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
    posterior_predictive_basic = pm.sample_posterior_predictive(trace_basic)


with model_basic:
    pm.set_data(
        {"TV": np.array([0.08710, 0.29998,  0.33917, 0.48765, 0.53105]),
         "OOH": np.array([0.8, 0.8, 0.1, 0.92466743, 0.92466743]),
         "Print": np.array([0.83331646, 0.97947805, 0.99924164, 0.90886795, 0.71668052])
        }
    )
    predictions = pm.sample_posterior_predictive(
        trace_basic, predictions=True
    )

The error ocurrs when I want to run the model with adstock transformation like this.

with pm.Model() as model_adstock:
    ## data container for all variables
    target = pm.MutableData("target", df_model['target_scaled'])
    TV = pm.MutableData("TV", df_model['TV_scaled'])
    OOH = pm.MutableData("OOH", df_model['OOH_scaled'])
    Print = pm.MutableData("Print", df_model['Print_scaled'])

    #priors
    alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
    beta_TV = pm.HalfNormal("beta_TV", sigma = 0.2)
    beta_OOH = pm.HalfNormal("beta_OOH", sigma = 0.2)
    beta_Print = pm.HalfNormal("beta_Print", sigma = 0.2)
     
    #transformation
    channel_adstock_TV = pm.Deterministic(name="channel_adstock_TV", var=geometric_adstock(x=TV, alpha=0.8))
    channel_adstock_OOH = pm.Deterministic(name="channel_adstock_OOH", var=geometric_adstock(x=OOH, alpha=0.6))
    channel_adstock_Print = pm.Deterministic(name="channel_adstock_Print", var=geometric_adstock(x=Print, alpha=0.4))
    
    mu = pm.Deterministic(name="mu", var=alpha + beta_TV*channel_adstock_TV + beta_OOH*channel_adstock_OOH + beta_Print*channel_adstock_Print)
    #mu = pm.Deterministic(name="mu", var=alpha + beta_TV*geometric_adstock(x=TV, alpha=0.8) + beta_OOH*geometric_adstock(x=OOH, alpha=0.6) + beta_Print*geometric_adstock(x=Print, alpha=0.4))

    sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
    y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=TV.shape[0])
    
    trace = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
    posterior_predictive = pm.sample_posterior_predictive(trace)

My assumptions is that something wents wrong because I have to define the transformed variables as pm.Deterministic and that this can’t be changed in the prediction. But I also saw that in PyMC-Marketing it’s done in a similar way.

This is how I created the data:

import pandas as pd
import numpy as np
import pickle
from pytensor import tensor as pt
import arviz as az
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

def adbank_geom(variable, factor):
    variable=list(variable)
    temp = []
    for i in range(0,len(variable)):
      if i == 0:
        temp.append(variable[0])
      else:
        temp.append(variable[i] + factor * temp[i-1])
    return(temp)

def hill_transform(x, inflexion, slope): 
  return 1 / (1 + (x / inflexion)**(-slope))


dat_example=pd.DataFrame()
dat_example['TV']=np.array([ 500,  400,  400,  300,  200,  400,  400,  300,  300,  200,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0, 1000,  800,  600, 1000,  800,  600,  200,  200,
        200,  200,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,  250,  200,  200,
        150,  100,  400,  400,  300,  300,  200,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
       1500, 1200,  900, 1000, 2400,  900,  600,  600,  400,  200,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,  500,  400,  300,  500,  400,  300,  200,  200,  200,
        200,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0])

dat_example['OOH']=np.array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0, 100, 100, 100, 200,
       200, 200, 300, 300, 300,  50,  50,  50,  50,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  50,  50,
        50, 200, 200, 200, 100, 100, 100,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,  40,  40,  80,  80, 240, 240, 500, 500, 300, 200, 100,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0, 600, 500, 400, 600, 500, 400, 300, 200, 100,  50,
        50,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0])

dat_example['Print']=np.array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0, 100,  80,  80,  60,  60,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,  60,  60,  80,  80,
       100, 100,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
        20,  40,  60,  80, 100, 100,  80,  60,  40,  20,  10,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  20,
        40,  60,  80, 100, 100,  80,  60,  40,  20,  10,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0])


dat_example['TV_tf'] = hill_transform(np.array(adbank_geom(dat_example['TV'], 0.6)), 800, 5)
dat_example['OOH_tf'] = hill_transform(np.array(adbank_geom(dat_example['OOH'], 0.4)), 300, 7)
dat_example['Print_tf'] = hill_transform(np.array(adbank_geom(dat_example['Print'], 0.2)), 70, 3)

dat_example['target'] =  dat_example['TV_tf']*60 + dat_example['OOH_tf']*40 + dat_example['Print_tf']*20 + 10
dat_example['KW'] = range(0, 299)
dat_example['Region'] = 'Test'

df_model = dat_example

scaler_tv = MinMaxScaler()
scaler_ooh = MinMaxScaler()
scaler_print = MinMaxScaler()
scaler_tg = MinMaxScaler()

df_model['TV_scaled'] = scaler_tv.fit_transform(np.array(df_model['TV_tf']).reshape(-1, 1)).flatten()
df_model['OOH_scaled'] = scaler_ooh.fit_transform(np.array(df_model['OOH_tf']).reshape(-1, 1)).flatten()
df_model['Print_scaled'] = scaler_print.fit_transform(np.array(df_model['Print_tf']).reshape(-1, 1)).flatten()
df_model['target_scaled'] = scaler_tg.fit_transform(np.array(df_model['target']).reshape(-1, 1)).flatten()

features = ['TV_scaled','OOH_scaled','Print_scaled']

mcao · December 18, 2023, 6:10am

Hi @Sarah, I ran your codes and it worked on my end.

You didn’t provide the geometric_adstock function, so I used the one below which I got from this post.

def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12):
    """Geometric adstock transformation."""
    cycles = [
        pt.concatenate(
            [pt.zeros(i), x[: x.shape[0] - i]]
        )
        for i in range(l_max)
    ]
    x_cycle = pt.stack(cycles)
    w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
    return pt.dot(w, x_cycle)

Sarah · December 18, 2023, 9:50am

Hi @mcao,
thanks, that’s a good hint. I’ll have a closer look on the adstock function itself.
Which versions for pymc and pytensor are you using?

Sarah · January 2, 2024, 2:29pm

Solution: number of input values for prediction has to be minimum the value of l_max in the adstock function.

Topic		Replies	Views
Sample_posterior_predicitve not catching shape of new data v5 prediction	10	1257	August 24, 2022
Pm.set_data throws error v5 bug	2	433	March 5, 2023
Shape mismatch sample posterior predictive with Binomial v5 shape_issue	2	316	November 15, 2023
"shape mismatch" when new data is set as a predictor for sample_posterior_predictive v5 prediction	2	2825	November 20, 2022
I'm having trouble finding where the shape error is v5	3	552	June 18, 2022

Prediction with new data brings shape error

0.00% [0/200 00:00<?]

Related topics