Hi all,
I build a simple model like this and want to do a prediction for a new data input.
with pm.Model() as model:
# data container for all variables
target = pm.MutableData("target", df_model['target_scaled'])
TV = pm.MutableData("TV", df_model['TV_scaled'])
OOH = pm.MutableData("OOH", df_model['OOH_scaled'])
Print = pm.MutableData("Print", df_model['Print_scaled'])
#priors
alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
beta_TV = pm.HalfNormal("beta_TV", sigma = 0.2)
beta_OOH = pm.HalfNormal("beta_OOH", sigma = 0.2)
beta_Print = pm.HalfNormal("beta_Print", sigma = 0.2)
#transformation
channel_adstock_TV = pm.Deterministic(name="channel_adstock_TV", var=geometric_adstock(x=TV, alpha=0.8))
channel_adstock_OOH = pm.Deterministic(name="channel_adstock_OOH", var=geometric_adstock(x=OOH, alpha=0.6))
channel_adstock_Print = pm.Deterministic(name="channel_adstock_Print", var=geometric_adstock(x=Print, alpha=0.4))
mu = pm.Deterministic(name="mu", var=alpha + beta_TV*channel_adstock_TV + beta_OOH*channel_adstock_OOH + beta_Print*channel_adstock_Print)
#mu = pm.Deterministic(name="mu", var=alpha + beta_TV*geometric_adstock(x=TV, alpha=0.8) + beta_OOH*geometric_adstock(x=OOH, alpha=0.6) + beta_Print*geometric_adstock(x=Print, alpha=0.4))
sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=TV.shape[0])
trace = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
posterior_predictive = pm.sample_posterior_predictive(trace)
When I try it without the media transformation everything works fine. But since I implemented it I get this error when I set the new data input.
with model:
pm.set_data(
{"TV": np.array([0.08710, 0.29998, 0.33917, 0.48765, 0.53105]),
"OOH": np.array([0.8, 0.8, 0.1, 0.92466743, 0.92466743]),
"Print": np.array([0.83331646, 0.97947805, 0.99924164, 0.90886795, 0.71668052])
}
)
predictions = pm.sample_posterior_predictive(
trace, var_names=["y"], random_seed=RANDOM_SEED
)
This is the full error message
Sampling: [y]
0.00% [0/200 00:00<?]
ValueError Traceback (most recent call last)
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:406, in Loop.call(self)
403 for thunk, node, old_storage in zip_longest(
404 self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
405 ):
→ 406 thunk()
407 for old_s in old_storage:
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\graph\op.py:518, in Op.make_py_thunk..rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
→ 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\tensor\subtensor.py:1614, in IncSubtensor.perform(self, node, inputs, out_)
1612 else:
1613 # sub_x += -sub_x + y
→ 1614 x.setitem(cdata, y)
1615 else:
1616 # scalar case
ValueError: could not broadcast input array from shape (4,) into shape (0,)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In [22], line 9
1 with model:
2 pm.set_data(
3 {“TV”: np.array([0.08710, 0.29998, 0.33917, 0.48765, 0.53105]),
4 #“TV”: np.array([0.08710, 0.08710, 0.08710, 0.08710, 0.08710]),
(…)
7 }
8 )
----> 9 predictions = pm.sample_posterior_predictive(
10 trace, var_names=[“y”], random_seed=RANDOM_SEED
11 )
File ~\AppData\Roaming\Python\Python39\site-packages\pymc\sampling\forward.py:644, in sample_posterior_predictive(trace, model, var_names, sample_dims, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
639 # there’s only a single chain, but the index might hit it multiple times if
640 # the number of indices is greater than the length of the trace.
641 else:
642 param = trace[idx % len_trace]
→ 644 values = sampler_fn(**param)
646 for k, v in zip(vars, values):
647 ppc_trace_t.insert(k.name, v, idx)
File ~\AppData\Roaming\Python\Python39\site-packages\pymc\util.py:393, in point_wrapper..wrapped(**kwargs)
391 def wrapped(**kwargs):
392 input_point = {k: v for k, v in kwargs.items() if k in ins}
→ 393 return core_function(**input_point)
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\compile\function\types.py:970, in Function.call(self, *args, **kwargs)
967 t0_fn = time.perf_counter()
968 try:
969 outputs = (
→ 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
975 restore_defaults()
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:410, in Loop.call(self)
408 old_s[0] = None
409 except Exception:
→ 410 raise_with_op(self.fgraph, node, thunk)
412 return self.perform_updates()
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
526 warnings.warn(
527 f"{exc_type} error does not allow us to add an extra error message"
528 )
529 # Some exception need extra parameter in inputs. So forget the
530 # extra long error message in that case.
→ 531 raise exc_value.with_traceback(exc_trace)
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\link\vm.py:406, in Loop.call(self)
402 try:
403 for thunk, node, old_storage in zip_longest(
404 self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
405 ):
→ 406 thunk()
407 for old_s in old_storage:
408 old_s[0] = None
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\graph\op.py:518, in Op.make_py_thunk..rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
→ 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:
520 compute_map[o][0] = True
File ~\AppData\Roaming\Python\Python39\site-packages\pytensor\tensor\subtensor.py:1614, in IncSubtensor.perform(self, node, inputs, out_)
1611 sub_x += y
1612 else:
1613 # sub_x += -sub_x + y
→ 1614 x.setitem(cdata, y)
1615 else:
1616 # scalar case
1617 if not self.set_instead_of_inc:
ValueError: could not broadcast input array from shape (4,) into shape (0,)
Apply node that caused the error: SetSubtensor{start:stop, i}(SetSubtensor{start:stop, i}.0, Subtensor{:stop}.0, 6, ScalarFromTensor.0, 6)
Toposort index: 131
Inputs types: [TensorType(float64, shape=(None, 12)), TensorType(float64, shape=(None,)), ScalarType(int64), ScalarType(int64), ScalarType(uint8)]
Inputs shapes: [(5, 12), (4,), (), (), ()]
Inputs strides: [(96, 8), (8,), (), (), ()]
Inputs values: [‘not shown’, array([0.0871 , 0.29998, 0.33917, 0.48765]), 6, 5, 6]
Outputs clients: [[SetSubtensor{start:stop, i}(SetSubtensor{start:stop, i}.0, Subtensor{:stop}.0, 7, ScalarFromTensor.0, 7)]]
Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 2995, in _run_cell
return runner(coro)
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\async_helpers.py”, line 129, in pseudo_sync_runner
coro.send(None)
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3194, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3373, in run_ast_nodes
if await self.run_code(code, result, async=asy):
File “C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3433, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\s.leschke.CI10229\AppData\Local\Temp\ipykernel_16164\1565565827.py”, line 14, in
channel_adstock_TV = pm.Deterministic(name=“channel_adstock_TV”, var=geometric_adstock(x=TV, alpha=0.8))
File “C:\Users\s.leschke.CI10229\AppData\Roaming\Python\Python39\site-packages\pymc_marketing\mmm\transformers.py”, line 100, in geometric_adstock
return batched_convolution(x, w, axis=axis)
File “C:\Users\s.leschke.CI10229\AppData\Roaming\Python\Python39\site-packages\pymc_marketing\mmm\transformers.py”, line 50, in batched_convolution
padded_x = pt.set_subtensor(
HINT: Use the PyTensor flag exception_verbosity=high
for a debug print-out and storage map footprint of this Apply node.
I already tried to specify the model in another way, but I brings the same error.
coords = {"channel": ['TV_scaled','OOH_scaled','Print_scaled']}
with pm.Model(coords = coords) as model:
model.add_coord("KW", np.arange(299), mutable = True)
## data container for all variables
target = pm.MutableData("target", df_model['target_scaled'], dims="KW")
input = pm.MutableData("input", df_model[features], dims=("KW", "channel") )
#priors
alpha = pm.Gamma("alpha", alpha=2, beta=0.5) #like intercept
beta = pm.HalfNormal("beta", sigma = 0.2, dims=("channel",))
#transform media
channel_adstock = pm.Deterministic(name="channel_adstock", var=geometric_adstock(x=input, alpha=0.8), dims=("KW", "channel"),)
channel_contributions = pm.Deterministic(
name="channel_contributions",
var=channel_adstock * beta,
dims=("KW", "channel"),
)
#define target
mu_var = alpha + channel_contributions.sum(axis=-1)
mu = pm.Deterministic(name="mu", var=mu_var, dims="KW")
sigma = pm.Gamma("sigma", alpha=2, beta=0.5)
y = pm.Normal("y", mu=mu, sigma=sigma, observed=target, shape=df_model['TV_scaled'].shape[0])
trace = pm.sample(random_seed=RANDOM_SEED, chains=2, draws=100, tune=100, target_accept = 0.9)
posterior_predictive = pm.sample_posterior_predictive(trace)
new_data = pd.DataFrame({'KW': [0,1,2,3,4],
'TV_scaled': [0.08710, 0.29998, 0.33917, 0.48765, 0.53105],
'OOH_scaled': [0.8, 0.8, 0.1, 0.92466743, 0.92466743],
'Print_scaled': [0.83331646, 0.97947805, 0.99924164, 0.90886795, 0.71668052]})
new_data = new_data.set_index(new_data['KW'])
new_data = new_data[features]
new_data
with model:
pm.set_data(
{"input": new_data}, coords = {"channel": ['TV_scaled','OOH_scaled','Print_scaled'], "KW":[0,1,2,3,4]}
)
predictions = pm.sample_posterior_predictive(
trace, var_names=["y"], random_seed=RANDOM_SEED
)
Any suggestions? Are the data containers and coords defined correctly like this?
Thanks in advance!