Implementing multivariate skew normal likelihood in PyMC

Hi, I’m trying to modify my multivariate normal model to use a skew normal distribution instead. Here’s my current code:

with pm.Model() as cov_model:
    mu = pm.Normal('mu', mu=0, sigma=1, shape=num_features_theta)
    
    chol, corr, stds = pm.LKJCholeskyCov(
        'chol_cov',
        n=num_features_theta,
        eta=1.0,
        sd_dist=pm.Exponential.dist(lam=10),
        compute_corr=True
    )
    
    obs = pm.MvNormal('obs', mu=mu, chol=chol, observed=Theta_normalized)

I want to:

  1. Change from MvNormal to multivariate skew normal

  2. Add skewness parameters alpha

  3. Keep the same LKJ covariance structure

Main questions:

  • How to implement the multivariate skew normal logp function?

  • What prior should I use for the new alpha parameters?

Data shape: (30, 7) - 30 samples, 7 features

Thanks for any help!

Wikipedia: Skew normal distribution shows how you can define the univariate density in terms of a normal pdf and cdf. You can couple with a Gaussian copula to define the multivariate version. It looks like that’s what this article suggests from the abstract. I don’t know if that’s the usual way to think of multivariate skew normal—it’s not popular enough to get its own Wikipedia page.

1 Like

Try this

import pymc as pm
import numpy as np
import pytensor.tensor as pt
from scipy.stats import skewnorm

# Set dimensions and generate synthetic data for a runnable example
# Data shape: (30, 7) - 30 samples, 7 features
num_samples = 30
num_features_theta = 7
np.random.seed(42)
# Generate some slightly skewed data for demonstration
Theta_normalized = skewnorm.rvs(a=np.linspace(-3, 3, num_features_theta), loc=0, scale=1, size=(num_samples, num_features_theta))


with pm.Model() as skew_normal_model:
    # Priors for location (mu)
    mu = pm.Normal('mu', mu=0, sigma=1, shape=num_features_theta)
    
    # Priors for covariance structure using LKJ
    # This part remains the same as it defines the underlying covariance
    chol, corr, stds = pm.LKJCholeskyCov(
        'chol_cov',
        n=num_features_theta,
        eta=2.0,  # A slightly more informative prior than 1.0
        sd_dist=pm.Exponential.dist(1.0),
        compute_corr=True
    )
    
    # Priors for skewness parameters (alpha)
    # A Normal prior centered at 0 is a common choice
    alpha = pm.Normal('alpha', mu=0, sigma=1, shape=num_features_theta)

    # Custom log-likelihood function for Multivariate Skew-Normal
    def mv_skew_normal_logp(value, mu, chol, stds, alpha):
        #  Log-probability density function for the multivariate skew-normal distribution.
        # Based on the formulation: f(x) = 2 * pdf_mvnormal(x; mu, Sigma) * cdf_normal(alpha.T @ z)
        # where z = D^-1 * (x - mu) are the standardized residuals.
        
        # log probability of the multivariate normal part
        # Use pm.logp for robust log-probability calculation within a CustomDist
        logp_mvnormal = pm.logp(pm.MvNormal.dist(mu=mu, chol=chol), value)
        
        # Standardize the residuals
        # value shape: (num_samples, num_features)
        # mu shape: (num_features,)
        # stds shape: (num_features,)
        # Broadcasting handles the subtraction and division correctly.
        z = (value - mu) / stds
        
        # Argument for the standard normal CDF
        # alpha shape: (num_features,)
        # z.T shape: (num_features, num_samples)
        # cdf_arg shape: (num_samples,)
        cdf_arg = pt.dot(alpha, z.T)
        
        # Log probability of the standard normal CDF part
        # Using pt.erf is numerically stable: CDF(x) = 0.5 * (1 + erf(x / sqrt(2)))
        logp_cdf = pt.log(0.5 * (1 + pt.erf(cdf_arg / pt.sqrt(2.0))))
        
        # Combine parts for the total log-likelihood for each observation
        # The result is a vector of log-likelihoods, one for each sample.
        logp_total_per_obs = pt.log(2) + logp_mvnormal + logp_cdf
        
        # Return the sum over all observations
        return pt.sum(logp_total_per_obs)

    # Define the custom distribution for the observed data
    obs = pm.CustomDist(
        'obs',
        mu,
        chol,
        stds,
        alpha,
        logp=mv_skew_normal_logp,
        observed=Theta_normalized
    )

# Example of how to sample from the model (optional)
with skew_normal_model:
    idata = pm.sample(1000, tune=1000, cores=1)