Help with `size length error`

Any suggestion is appreciated.

I run the code listed below and obtain error the message “Size length is incompatible with batched dimensions of parameter 0 alpha: len(size) = 1, len(batched dims alpha) = 2. Size length must be 0 or >= 2”. My code is listed below. If I changed the link function

mu = pm.Deterministic("mu", pm.math.invlogit(a*np.sqrt(x)+b)) to mu = pm.Deterministic("mu", pm.math.invlogit(a[0] + a[1] * x[:, 1]))

It works fine. However it is not what I want. Thank you very much for your any thoughts.

> RANDOM_SEED = 8927
> rng = np.random.default_rng(RANDOM_SEED)
> 
> data = pd.read_excel("BN_samples1.xlsx", usecols=['Effective water level above ground [m]', 'Building damage ratio'])
> sns.pairplot(data=data, kind="scatter");
> data["Intercept"] = np.ones(len(data))
> labels = ["Intercept", "Effective water level above ground [m]"]
> x = data[labels].to_numpy()
> 
> rloss = data['Building damage ratio'].to_numpy()
> var = data['Effective water level above ground [m]'].var()
> 
> #train-test split
> test_indices = range(len(x) - 10, len(x))
> train_indices = range(len(x) - 10)
> x_train, x_test = x[train_indices], x[test_indices]
> rloss_train, rloss_test = rloss[train_indices], rloss[test_indices]
> 
> #Define and fit the model
> coords = {"coeffs": labels}
> 
> with pm.Model(coords=coords) as model:
>     # Data containers
>     x = pm.MutableData("x", x_train)
>     rloss = pm.MutableData("rloss", rloss_train)
>     
>     # Priors
>     a = pm.Normal("a", mu=0, sigma=1, dims="coeffs")
>     b = pm.Normal("b", mu=0, sigma=1, dims="coeffs")
> 
>     # Link function
>     mu = pm.Deterministic("mu", pm.math.invlogit(a*np.sqrt(x)+b))
>     phi = pm.Deterministic("phi", pm.math.exp(mu*(1-mu)/var-1))
>   
>     alpha = pm.Deterministic("alpha", mu * phi)
>     beta = pm.Deterministic("beta", (1 - mu) * phi)
>     
>     
>     def logp_beta(obsLoss, alpha, beta): 
>         return pm.logp(pm.Beta.dist(alpha=alpha, beta=beta), obsLoss)
>     
>     def random(alpha, beta, rng= None, size=None) :
>         return scipy.stats.beta(alpha, beta).rvs(size)
>     
>     rloss_custom = pm.CustomDist('rloss_custom', alpha, beta, logp=logp_beta, random=random, observed=rloss, shape=x.shape[0])
>     
> with model:
>     step = pm.Metropolis()
>     idata = pm.sample(4800, step=step, chains=4, cores=1)

Can you provide a fully reproducible example (the simpler the better) and also the full stack trace of the error?

Thank you very much for your quick response. I really appreciate any of your thoughts. Here is my complete code. I did my best to make it simpler.

> import numpy as np
> import pymc as pm
> import arviz as az
> import pandas as pd
> import pytensor.tensor as pt
> import warnings
> import seaborn as sns
> import scipy
> from scipy.stats import linregress
> 
> RANDOM_SEED = 8927
> rng = np.random.default_rng(RANDOM_SEED)
> 
> data = pd.read_excel("BN_samples1.xlsx", usecols=['Effective water level above ground [m]', 'Building damage ratio'])
> sns.pairplot(data=data, kind="scatter");
> data["Intercept"] = np.ones(len(data))
> labels = ["Intercept", "Effective water level above ground [m]"]
> x = data[labels].to_numpy()
> 
> rloss = data['Building damage ratio'].to_numpy()
> var = data['Effective water level above ground [m]'].var()
> test_indices = range(len(x) - 10, len(x))
> train_indices = range(len(x) - 10)
> x_train, x_test = x[train_indices], x[test_indices]
> rloss_train, rloss_test = rloss[train_indices], rloss[test_indices]
> 
> coords = {"coeffs": labels}
> 
> 
> with pm.Model(coords=coords) as model:
>     x = pm.MutableData("x", x_train)
>     rloss = pm.MutableData("rloss", rloss_train)
>     
>     a = pm.Normal("a", mu=0, sigma=1, dims="coeffs")
>     b = pm.Normal("b", mu=0, sigma=1, dims="coeffs")
> 
>     mu = pm.Deterministic("mu", pm.math.invlogit(a*np.sqrt(x)+b))
>     phi = pm.Deterministic("phi", pm.math.exp(mu*(1-mu)/var-1))
>   
>     alpha = pm.Deterministic("alpha", mu * phi)
>     beta = pm.Deterministic("beta", (1 - mu) * phi)
>     
>     
>     def logp_beta(obsLoss, alpha, beta): 
>         return pm.logp(pm.Beta.dist(alpha=alpha, beta=beta), obsLoss)
>     
>     def random(alpha, beta, rng= None, size=None) :
>         return scipy.stats.beta(alpha, beta).rvs(size)
>     
>     rloss_custom = pm.CustomDist('rloss_custom', alpha, beta, logp=logp_beta, random=random, observed=rloss, shape=x.shape[0])
>   
> with model:
>     step = pm.Metropolis()
>     idata = pm.sample(4800, step=step, chains=4, cores=1) 
> 
> with model:
>     pm.set_data({"x": x_test}, coords = {"coeffs": labels})
>     pp = pm.sample_posterior_predictive(idata, predictions=True).predictions
>     rloss_custom_np = pp["rloss_custom"].values
>     rloss_custom_combined = rloss_custom_np.reshape(-1, rloss_custom_np.shape[-1])
>     result_rloss_custom = np.mean(rloss_custom_combined, axis=0)

for the error message, here is the full version:

Traceback (most recent call last):

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/spyder_kernels/py3compat.py:356 in compat_exec
    exec(code, globals, locals)

  File ~/Library/CloudStorage/Dropbox/00 Research_Bayesian Research_Flood_Damage_GEE/SDF length error/SDF.py:75
    rloss_custom = pm.CustomDist('rloss_custom', alpha, beta, logp=logp_beta, random=random, observed=rloss, shape=x.shape[0])

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/distributions/distribution.py:970 in __new__
    return _CustomDist(

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/distributions/distribution.py:308 in __new__
    rv_out = cls.dist(*args, **kwargs)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/distributions/distribution.py:508 in dist
    return super().dist(

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/distributions/distribution.py:387 in dist
    rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/distributions/distribution.py:563 in rv_op
    return rv_op(*dist_params, **kwargs)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:289 in __call__
    res = super().__call__(rng, size, dtype, *args, **kwargs)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/graph/op.py:295 in __call__
    node = self.make_node(*inputs, **kwargs)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:334 in make_node
    shape = self._infer_shape(size, dist_params)

  File ~/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:201 in _infer_shape
    raise ValueError(

ValueError: Size length is incompatible with batched dimensions of parameter 0 alpha:
len(size) = 1, len(batched dims alpha) = 2. Size length must be 0 or >= 2

The data I used in the code is here:
https://shorturl.at/rB0DM

Once again, thank you very much.

Hi,
I am encountering the same error in a different setup with PyMC version 5.10.3.
Were you able to find out anything more?

Cheers!

Your problem is that alpha/beta are 2d but your observation in CustomDist is a single 1D vector, and shape is also 1D. Unless you intend to write a multivariate distribution that is not valid.

You can check that alpha.eval().shape returns (90, 2)