Hey I was hoping for some help with this problem, I can’t get matrix multiplication to work when using dimensions (or shapes) for a pymc model with multiple linear regression. Essentially I want y = ax1 + bx2 + c.
Here is my code and error. Any help is super appreciated!
# we have 30 products in the data and we have 2 variables for linear regression
product_map = [i for i in range(30)]
xdims = [i for i in range(2)]
# create a product id for each input (we have 200 datapoints) and create x data
product = np.random.randint(0, high=30, size=200)
x = np.random.normal(loc=0.0, scale=1.0, size=(200,2))
print(x.shape, product.shape)
# create the model
unpooled_model = pm.Model(coords={'product':product_map, 'xdims':xdims})
with unpooled_model:
m = pm.Normal('m', mu=1, sigma=20, dims=("xdims","product"))
b = pm.Normal('b', mu=30_000, sigma=50_000, dims="product")
std = pm.HalfNormal('std', sigma=400_000)
xdata = pm.Data('xdata', x, mutable=True)
product_data = pm.Data('product_data', product, mutable=True)
print(xdata.shape.eval())
print(m.shape.eval())
print(b.shape.eval())
mean = xdata.dot(m) + b
print(mean.shape.eval())
obs = pm.Normal('obs', mu=mean[:, product_data], sigma=std, observed=y)
unpooled_trace_variety = pm.sample(tune=N_TUNE, return_inferencedata=True, chains=N_CHAINS, target_accept=TARGET_ACCEPT, cores=N_CORES)
(200, 2) (200,)
[200 2]
[ 2 30]
[30]
[200 30]
---------------------------------------------------------------------------
ShapeError Traceback (most recent call last)
Input In [60], in <cell line: 10>()
21 mean = xdata.dot(m) + b
22 print(mean.shape.eval())
---> 23 obs = pm.Normal('obs', mu=mean[:, product_data], sigma=std, observed=y)
25 unpooled_trace_variety = pm.sample(tune=N_TUNE, return_inferencedata=True, chains=N_CHAINS, target_accept=TARGET_ACCEPT, cores=N_CORES)
File ~/miniforge3/envs/pymc/lib/python3.10/site-packages/pymc/distributions/distribution.py:271, in Distribution.__new__(cls, name, rng, dims, initval, observed, total_size, transform, *args, **kwargs)
267 if resize_shape:
268 # A batch size was specified through `dims`, or implied by `observed`.
269 rv_out = change_rv_size(rv=rv_out, new_size=resize_shape, expand=True)
--> 271 rv_out = model.register_rv(
272 rv_out,
273 name,
274 observed,
275 total_size,
276 dims=dims,
277 transform=transform,
278 initval=initval,
279 )
281 # add in pretty-printing support
282 rv_out.str_repr = types.MethodType(str_for_dist, rv_out)
File ~/miniforge3/envs/pymc/lib/python3.10/site-packages/pymc/model.py:1375, in Model.register_rv(self, rv_var, name, data, total_size, dims, transform, initval)
1368 raise TypeError(
1369 "Variables that depend on other nodes cannot be used for observed data."
1370 f"The data variable was: {data}"
1371 )
1373 # `rv_var` is potentially changed by `make_obs_var`,
1374 # for example into a new graph for imputation of missing data.
-> 1375 rv_var = self.make_obs_var(rv_var, data, dims, transform)
1377 return rv_var
File ~/miniforge3/envs/pymc/lib/python3.10/site-packages/pymc/model.py:1401, in Model.make_obs_var(self, rv_var, data, dims, transform)
1398 data = convert_observed_data(data).astype(rv_var.dtype)
1400 if data.ndim != rv_var.ndim:
-> 1401 raise ShapeError(
1402 "Dimensionality of data and RV don't match.", actual=data.ndim, expected=rv_var.ndim
1403 )
1405 if aesara.config.compute_test_value != "off":
1406 test_value = getattr(rv_var.tag, "test_value", None)
ShapeError: Dimensionality of data and RV don't match. (actual 1 != expected 2)