Wikipedia: Skew normal distribution shows how you can define the univariate density in terms of a normal pdf and cdf. You can couple with a Gaussian copula to define the multivariate version. It looks like that’s what this article suggests from the abstract. I don’t know if that’s the usual way to think of multivariate skew normal—it’s not popular enough to get its own Wikipedia page.
import pymc as pm
import numpy as np
import pytensor.tensor as pt
from scipy.stats import skewnorm
# Set dimensions and generate synthetic data for a runnable example
# Data shape: (30, 7) - 30 samples, 7 features
num_samples = 30
num_features_theta = 7
np.random.seed(42)
# Generate some slightly skewed data for demonstration
Theta_normalized = skewnorm.rvs(a=np.linspace(-3, 3, num_features_theta), loc=0, scale=1, size=(num_samples, num_features_theta))
with pm.Model() as skew_normal_model:
# Priors for location (mu)
mu = pm.Normal('mu', mu=0, sigma=1, shape=num_features_theta)
# Priors for covariance structure using LKJ
# This part remains the same as it defines the underlying covariance
chol, corr, stds = pm.LKJCholeskyCov(
'chol_cov',
n=num_features_theta,
eta=2.0, # A slightly more informative prior than 1.0
sd_dist=pm.Exponential.dist(1.0),
compute_corr=True
)
# Priors for skewness parameters (alpha)
# A Normal prior centered at 0 is a common choice
alpha = pm.Normal('alpha', mu=0, sigma=1, shape=num_features_theta)
# Custom log-likelihood function for Multivariate Skew-Normal
def mv_skew_normal_logp(value, mu, chol, stds, alpha):
# Log-probability density function for the multivariate skew-normal distribution.
# Based on the formulation: f(x) = 2 * pdf_mvnormal(x; mu, Sigma) * cdf_normal(alpha.T @ z)
# where z = D^-1 * (x - mu) are the standardized residuals.
# log probability of the multivariate normal part
# Use pm.logp for robust log-probability calculation within a CustomDist
logp_mvnormal = pm.logp(pm.MvNormal.dist(mu=mu, chol=chol), value)
# Standardize the residuals
# value shape: (num_samples, num_features)
# mu shape: (num_features,)
# stds shape: (num_features,)
# Broadcasting handles the subtraction and division correctly.
z = (value - mu) / stds
# Argument for the standard normal CDF
# alpha shape: (num_features,)
# z.T shape: (num_features, num_samples)
# cdf_arg shape: (num_samples,)
cdf_arg = pt.dot(alpha, z.T)
# Log probability of the standard normal CDF part
# Using pt.erf is numerically stable: CDF(x) = 0.5 * (1 + erf(x / sqrt(2)))
logp_cdf = pt.log(0.5 * (1 + pt.erf(cdf_arg / pt.sqrt(2.0))))
# Combine parts for the total log-likelihood for each observation
# The result is a vector of log-likelihoods, one for each sample.
logp_total_per_obs = pt.log(2) + logp_mvnormal + logp_cdf
# Return the sum over all observations
return pt.sum(logp_total_per_obs)
# Define the custom distribution for the observed data
obs = pm.CustomDist(
'obs',
mu,
chol,
stds,
alpha,
logp=mv_skew_normal_logp,
observed=Theta_normalized
)
# Example of how to sample from the model (optional)
with skew_normal_model:
idata = pm.sample(1000, tune=1000, cores=1)