Hitting a weird error to do with RNGs in Scan in a custom function inside a Potential

Okay… so back at laptop again and I’ve tried to narrow down to a handful of tests based on the suggestions you’ve kindly shared so far.

I’ve created a new function (not shown here for brevity) to manually calc the log_jcd using scan (based on pytensor.gradient.jacobian) with a bit more control over the scan, in particular to test enforcing strict=True, which actually hinders things there, very interesting….

Run this as a script, and you’ll get the following printout.

The TLDR is:

Test 1

.ravel() results in DisconnectedInputError, so although that’s a nice idea to avoid 2D issues, that seems a non-starter, this relates to our earlier discussion @jessegrabowski

Test 2

If for now I focus on just one of the columns of f_inv_x, then I can run scan along the vector, so that the grad(cost=f[i]) is a scalar, and I require scan strict=True .

This yields a MissingInputError for the 2 RNGs, which is due to logic in scan.basic.scan:1060 preventing them from being listed in other_shared_scan_args and thus passed into Scan.

I believe the expected thing tp dp here would be to add the RV into scan(..non_sequences=[here]), but in my case, the two RNGs come from a 2D pm.LogNormal.dist() and I’ve nothing to pass into scan() - quite interesting

Test 3:

Repeat Test 2 and allow scan strict=False

This time scan runs, because other_shared_scan_args contains the RNGs

However when I sample this in another script I get a UserWarning: RNG Variable RandomGeneratorSharedVariable(<Generator(PCG64) at 0x14942FBC0>) has multiple clients. This is likely an inconsistent random graph.

I guess I’ll try to dig into this UserWarning tomorrow…

# 994x.py
# just the essentials to allow debug

# Author: jonathan.sedar@oreum.io

# Last updated: 2023-10-21 16:06:18

# Python implementation: CPython
# Python version       : 3.10.12
# IPython version      : 8.16.1

# pymc    : 5.9.0
# pytensor: 2.17.3

# Compiler    : Clang 15.0.7 
# OS          : Darwin
# Release     : 22.6.0
# Machine     : x86_64
# Processor   : i386
# CPU cores   : 8
# Architecture: 64bit

import numpy as np
import pandas as pd
import pymc as pm
import pytensor
import pytensor.gradient as tg
import pytensor.tensor as pt
from pymc.distributions.dist_math import check_icdf_parameters, check_icdf_value
from scipy import stats

rng_np = np.random.default_rng(seed=42)
CLIP = 1e-15  # NOTE 1e-18 too small

def normal_icdf(
    x: pt.TensorVariable, mu: pt.TensorVariable = 0.0, sigma: pt.TensorVariable = 1.0
) -> pt.TensorVariable:
    """Normal icdf, default mean-center 1sd aka Inverse CDF aka PPF aka Probit
    NOTE:
    + Modified from pymc.distributions.continuous.Normal.icdf
    + Slow implementation of erfcinv not in C
    + See also https://stackoverflow.com/questions/60472139/computing-the-inverse-of-the-complementary-error-function-erfcinv-in-c
    + Hack to clip values away from edges [0., 1.]:
        Value = {0, 1} is theoretically allowed, but causes a numeric computational
        issue somewhere in pt.erfcinv which throws infs, so clip slightly away.
    + Used in oreum_lab.src.model.copula.model_a
        NOTE: Possibly after pymc > 5.5 will change to use
        y_cop_u_rv = pm.Normal.dist(mu=0., sigma=1.)
        pm.icdf(y_cop_u_rv, pt.stack([y_m1u, y_m2u], axis=1))
    """
    x = pt.clip(x, CLIP, 1 - CLIP)
    r = check_icdf_value(mu - sigma * pt.sqrt(2.0) * pt.erfcinv(2.0 * x), x)
    return check_icdf_parameters(r, sigma > 0.0, msg="sigma > 0")



## Create synthetic data =======================================================

def create_data():
    cfg = dict(r = -0.8)
    cfg.update( 
        cov = [[1.0, cfg['r']], [cfg['r'], 1.0]], 
        mu = [0.2, 2.0], 
        s = [0.5, 1.], 
        n = 10)

    c_dist = stats.multivariate_normal(mean=np.zeros(2), cov=cfg['cov'], seed=rng_np)
    m1_dist = stats.lognorm(scale=np.exp(cfg['mu'][0]), s=cfg['s'][0])
    m2_dist = stats.lognorm(scale=np.exp(cfg['mu'][1]), s=cfg['s'][1])
    u = stats.norm.cdf(c_dist.rvs(cfg['n']))
    dfy = pd.DataFrame(np.vstack([m1_dist.ppf(u[:, 0]), m2_dist.ppf(u[:, 1])]).T, 
                    columns=['m1', 'm2'])
    return cfg, dfy


## Build model =================================================================

def create_model(cfg, dfy):
    coords = dict(
        m_nm = ['m_m1', 'm_m2'],
        s_nm = ['s_m1', 's_m2'],
        y_nm = ['y_m1', 'y_m2'],
        y_u_nm = ['y_m1u', 'y_m2u'],
        y_c_nm = ['y_m1c', 'y_m2c'],
    )
    coords_m = dict(oid = dfy.index.values)

    with pm.Model(coords=coords, coords_mutable=coords_m) as mdl:
        y = pm.MutableData('y', dfy.copy(), dims=('oid', 'y_nm'))

        # 1. Define Observed Y marginal dists
        m_mu = pm.Normal('m_mu', mu=0.0, sigma=1.0, dims='m_nm')
        m_s = pm.InverseGamma('m_s', alpha=5.0, beta=4.0, dims='s_nm')
        m_d = pm.LogNormal.dist(mu=m_mu, sigma=m_s, shape=(cfg['n'], 2))

        # 3. Transform Observed Y to Uniform through marginal CDFs
        y_u = pm.Deterministic('y_u', pt.exp(pm.logcdf(m_d, y)), dims=('oid', 'y_u_nm'))

        # 4. Transform Uniformed Y to an Empirical Copula via MvN InvCDF
        y_c = pm.Deterministic('y_c', normal_icdf(y_u), dims=('oid', 'y_c_nm'))

    return mdl


## Define tests for scan  ======================================================

def test1_scan(f_inv_x, x):
    """1. Use ravel: get a `DisconnectedInputError`"""
    n = f_inv_x.shape[0]
    k = f_inv_x.shape[1]
    idx = pt.arange(n, dtype="int32")

    def get_grads(i, c, w):
        return tg.grad(cost=c[i], wrt=[w])

    grads, _ = pytensor.scan(get_grads, 
                    sequences=idx, non_sequences=[f_inv_x.ravel(), x.ravel()],
                    n_steps=n*k, name="get_grads", strict=True)
    return grads


def test2_scan(f_inv_x, x):
    """2. Focus on a single scalar cost `c[i, 0]` and `strict=True`: 
    get a `MissingInputError` for the RNG
    """
    n = f_inv_x.shape[0]
    idx = pt.arange(n, dtype="int32")

    def get_grads(i, c, w):
        return tg.grad(cost=c[i, 0], wrt=[w])

    grads, _ = pytensor.scan(get_grads, 
                sequences=idx, non_sequences=[f_inv_x, x],
                n_steps=n, name="get_grads", strict=True)
    
    return grads


def test3_scan(f_inv_x, x):
    """3. Focus on a single scalar cost `c[i, 0]` and relax `strict=False`: 
    this runs
    """
    n = f_inv_x.shape[0]
    idx = pt.arange(n, dtype="int32")

    def get_grads(i, c, w):
        return tg.grad(cost=c[i, 0], wrt=[w])

    grads, _ = pytensor.scan(get_grads, 
                sequences=idx, non_sequences=[f_inv_x, x],
                n_steps=n, name="get_grads", strict=False)
    
    return grads


## Run tests ===================================================================

if __name__ == '__main__':
    cfg, dfy = create_data()
    mdl = create_model(cfg, dfy)
    
    print("\n\nTest 1:")
    try:
        # 1. Use ravel: get a `DisconnectedInputError`
        t1 = test1_scan(mdl.y_c, mdl.y).eval()
    except tg.DisconnectedInputError as e:
        print(e)
    
    print("\n\nTest 2:")
    try:
        # Focus on a single scalar cost `c[i, 0]` and `strict=True`: 
        # get a `MissingInputError` for the 2 RNGs from the LogNormal.dist()
        t2 = test2_scan(mdl.y_c, mdl.y).eval()
    except pytensor.graph.utils.MissingInputError as e:
        print(e)
    
    print("\n\nTest 3:")
    # 3. Focus on a single scalar cost `c[i, 0]` and relax `strict=False`: 
    # runs and gives values
    t3 = test3_scan(mdl.y_c, mdl.y).eval()
    
    print(t3.shape)
    print(t3.sum(axis=0))
       

est 1:
 
Backtrace when that variable is created:

  File "/Users/jon/workspace/oreum/oreum_lab2/notebooks/994x.py", line 165, in <module>
    t1 = test1_scan(mdl.y_c, mdl.y).eval()
  File "/Users/jon/workspace/oreum/oreum_lab2/notebooks/994x.py", line 117, in test1_scan
    sequences=idx, non_sequences=[f_inv_x.ravel(), x.ravel()],



Test 2:
Scan is missing inputs: [RandomGeneratorSharedVariable(<Generator(PCG64) at 0x1465D92A0>), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x14A5211C0>)]


Test 3:
/Users/jon/miniforge/envs/oreum_lab2/lib/python3.10/site-packages/pytensor/tensor/rewriting/elemwise.py:700: UserWarning: Optimization Warning: The Op erfcinv does not provide a C implementation. As well as being potentially slow, this also disables loop fusion.
  warn(
(10, 10, 2)
[[1.17571404 0.        ]
 [1.06199971 0.        ]
 [0.42035114 0.        ]
 [0.96419869 0.        ]
 [0.98001082 0.        ]
 [1.15841781 0.        ]
 [0.74526782 0.        ]
 [1.23432585 0.        ]
 [1.19653064 0.        ]
 [1.31979877 0.        ]]