Shape mismatch sample posterior predictive with Binomial

Hey all,

I get a shape mismatch error when running the following model

region_idx, regions = fitting_subset['Region'].factorize()
position_idx, positions = fitting_subset['Position'].factorize()
spec_idx, specialties = fitting_subset['spec'].factorize()
gebiet_idx, gebiete = fitting_subset['Gebiet'].factorize()

coords = {"spec": specialties, "region": regions, "position": positions, "gebiet": gebiete}

with pm.Model(coords=coords) as team_model:

    position_ind = pm.MutableData('position_idx', position_idx)
    region_ind = pm.MutableData('region_idx', region_idx)
    spec_ind = pm.MutableData('spec_idx', spec_idx)
    gebiet_ind = pm.MutableData('gebiet_idx', gebiet_idx)

    # Group means
    m_mu = pm.Normal('m_mu', mu=-2, sigma=1)
    s_mu = pm.HalfNormal('s_mu', 1)
    mu = pm.Normal('mu', mu=m_mu, sigma=s_mu, dims=('position'))
    gamma = pm.Normal('gamma', mu=m_mu, sigma=s_mu, dims=('gebiet'))

    theta = pm.ZeroSumNormal('theta', sigma=1, dims=('region'))
    print(theta.shape.eval())

    # Individual random effects
    sigma = pm.HalfNormal('sigma', 1)
    epsilon = pm.Normal('epsilon', mu=0, sigma=sigma, dims=('spec'))
    print(epsilon.shape.eval())
    
    p = pm.math.invlogit(mu[position_ind] + theta[region_ind] + gamma[gebiet_ind] + epsilon[spec_ind])
    
    print(p.shape.eval())

    y = pm.Binomial('y', n=total_visits, p=p, observed=visits, shape=p.shape)

with regional_model:
    regional_trace = pm.sample(1000, tune=2000, chains=4, cores=4, target_accept=0.9, random_seed=RANDOM_SEED)

with regional_model:

    pm.set_data({
        "position_idx": matrix[:10,0],
        "region_idx": matrix[:10, 1],
        "gebiet_idx": matrix[:10, 2],
        "spec_idx": matrix[:10, 3]

    }, coords=coords)

    print(epsilon.shape.eval())
    print(theta.shape.eval())
    print(p.shape.eval())
    print('y', y.shape.eval())
    print(position_ind.shape.eval())

with regional_model:
    pm.sample_posterior_predictive(regional_trace, 
                                   predictions=True,
                                   var_names=['y'])


ValueError: shape mismatch: objects cannot be broadcast to a single shape.  Mismatch is between arg 0 with shape (10,) and arg 2 with shape (1758,).

During handling of the above exception, another exception occurred:

Toposort index: 16
Inputs types: [RandomGeneratorType, TensorType(int64, shape=(1,)), TensorType(int64, shape=()), TensorType(int32, shape=(1758,)), TensorType(float64, shape=(None,))]
Inputs shapes: ['No shapes', (1,), (), (1758,), (10,)]
Inputs strides: ['No strides', (8,), (), (4,), (8,)]
Inputs values: [Generator(PCG64) at 0x1294FC609E0, array([10], dtype=int64), array(4, dtype=int64), 'not shown', 'not shown']
Outputs clients: [['output'], ['output']]

I made changes to incorporate shape into the ‘y’ variable using shape=p.shape, and it still doesn’t work.

any tips?

When I change to a pm.Normal for my y variable, it works as expected, however not with Binomial

I figured it out:

I set the n parameter in the pm.Binomial to Mutable data, otherwise it continues using the original observed value.

The final code:

region_idx, regions = fitting_subset['Region'].factorize()
position_idx, positions = fitting_subset['Position'].factorize()
spec_idx, specialties = fitting_subset['spec'].factorize()
gebiet_idx, gebiete = fitting_subset['Gebiet'].factorize()

coords = {"spec": specialties, "region": regions, "position": positions, "gebiet": gebiete}

with pm.Model(coords=coords) as regional_model:

    position_ind = pm.MutableData('position_idx', position_idx)
    region_ind = pm.MutableData('region_idx', region_idx)
    spec_ind = pm.MutableData('spec_idx', spec_idx)
    gebiet_ind = pm.MutableData('gebiet_idx', gebiet_idx)
    num_visits = pm.MutableData('total_visits', total_visits, dims="obs_id")

    # Group means
    m_mu = pm.Normal('m_mu', mu=-2, sigma=1)
    s_mu = pm.HalfNormal('s_mu', 1)
    # err = pm.HalfCauchy('err', 1)
    mu = pm.Normal('mu', mu=m_mu, sigma=s_mu, dims=('position'))
    gamma = pm.Normal('gamma', mu=m_mu, sigma=s_mu, dims=('gebiet'))

    theta = pm.ZeroSumNormal('theta', sigma=1, dims=('region'))
    print(theta.shape.eval())

    # Individual random effects
    sigma = pm.HalfNormal('sigma', 1)
    epsilon = pm.Normal('epsilon', mu=0, sigma=sigma, dims=('spec'))
    print(epsilon.shape.eval())
    
    p = pm.math.invlogit(mu[position_ind] + theta[region_ind] + gamma[gebiet_ind] + epsilon[spec_ind])
    # p = pm.Deterministic('p', mu[position_ind] + theta[region_ind] + gamma[gebiet_ind] + epsilon[spec_ind])

    print(p.shape.eval())

    y = pm.Binomial('y', n=num_visits, p=p, observed=visits, shape=p.shape, dims="obs_id")
    # y = pm.Poisson('y', mu=p, observed=visits, shape=p.shape)

with regional_model:
    regional_trace = pm.sample(1000, tune=2000, chains=4, cores=4, target_accept=0.9, random_seed=RANDOM_SEED)

with regional_model:

    pm.set_data({
        "position_idx": np.array([0]),
        "region_idx": np.array([0]),
        "gebiet_idx": np.array([2]),
        "spec_idx": np.array([0]),
        "total_visits": np.array([1000])

    })

    print(epsilon.shape.eval())
    print(theta.shape.eval())
    print(mu.shape.eval())
    print(gamma.shape.eval())

    print(position_ind.shape.eval())
    print(region_ind.shape.eval())
    print(gebiet_ind.shape.eval())
    print(spec_ind.shape.eval())
    
    print(p.shape.eval())
    print('y', y.shape.eval())

with regional_model:
    idata_2 = pm.sample_posterior_predictive(
        regional_trace,
        var_names=["y"],
        return_inferencedata=True,
        predictions=True,
    )
1 Like