Okay… so back at laptop again and I’ve tried to narrow down to a handful of tests based on the suggestions you’ve kindly shared so far.
I’ve created a new function (not shown here for brevity) to manually calc the log_jcd using scan (based on pytensor.gradient.jacobian) with a bit more control over the scan, in particular to test enforcing strict=True
, which actually hinders things there, very interesting….
Run this as a script, and you’ll get the following printout.
The TLDR is:
Test 1
.ravel()
results in DisconnectedInputError
, so although that’s a nice idea to avoid 2D issues, that seems a non-starter, this relates to our earlier discussion @jessegrabowski
Test 2
If for now I focus on just one of the columns of f_inv_x
, then I can run scan along the vector, so that the grad(cost=f[i])
is a scalar, and I require scan strict=True
.
This yields a MissingInputError
for the 2 RNGs, which is due to logic in scan.basic.scan:1060 preventing them from being listed in other_shared_scan_args
and thus passed into Scan.
I believe the expected thing tp dp here would be to add the RV into scan(..non_sequences=[here])
, but in my case, the two RNGs come from a 2D pm.LogNormal.dist()
and I’ve nothing to pass into scan()
- quite interesting
Test 3:
Repeat Test 2 and allow scan strict=False
This time scan runs, because other_shared_scan_args contains the RNGs
However when I sample this in another script I get a UserWarning: RNG Variable RandomGeneratorSharedVariable(<Generator(PCG64) at 0x14942FBC0>) has multiple clients. This is likely an inconsistent random graph.
I guess I’ll try to dig into this UserWarning tomorrow…
# 994x.py
# just the essentials to allow debug
# Author: jonathan.sedar@oreum.io
# Last updated: 2023-10-21 16:06:18
# Python implementation: CPython
# Python version : 3.10.12
# IPython version : 8.16.1
# pymc : 5.9.0
# pytensor: 2.17.3
# Compiler : Clang 15.0.7
# OS : Darwin
# Release : 22.6.0
# Machine : x86_64
# Processor : i386
# CPU cores : 8
# Architecture: 64bit
import numpy as np
import pandas as pd
import pymc as pm
import pytensor
import pytensor.gradient as tg
import pytensor.tensor as pt
from pymc.distributions.dist_math import check_icdf_parameters, check_icdf_value
from scipy import stats
rng_np = np.random.default_rng(seed=42)
CLIP = 1e-15 # NOTE 1e-18 too small
def normal_icdf(
x: pt.TensorVariable, mu: pt.TensorVariable = 0.0, sigma: pt.TensorVariable = 1.0
) -> pt.TensorVariable:
"""Normal icdf, default mean-center 1sd aka Inverse CDF aka PPF aka Probit
NOTE:
+ Modified from pymc.distributions.continuous.Normal.icdf
+ Slow implementation of erfcinv not in C
+ See also https://stackoverflow.com/questions/60472139/computing-the-inverse-of-the-complementary-error-function-erfcinv-in-c
+ Hack to clip values away from edges [0., 1.]:
Value = {0, 1} is theoretically allowed, but causes a numeric computational
issue somewhere in pt.erfcinv which throws infs, so clip slightly away.
+ Used in oreum_lab.src.model.copula.model_a
NOTE: Possibly after pymc > 5.5 will change to use
y_cop_u_rv = pm.Normal.dist(mu=0., sigma=1.)
pm.icdf(y_cop_u_rv, pt.stack([y_m1u, y_m2u], axis=1))
"""
x = pt.clip(x, CLIP, 1 - CLIP)
r = check_icdf_value(mu - sigma * pt.sqrt(2.0) * pt.erfcinv(2.0 * x), x)
return check_icdf_parameters(r, sigma > 0.0, msg="sigma > 0")
## Create synthetic data =======================================================
def create_data():
cfg = dict(r = -0.8)
cfg.update(
cov = [[1.0, cfg['r']], [cfg['r'], 1.0]],
mu = [0.2, 2.0],
s = [0.5, 1.],
n = 10)
c_dist = stats.multivariate_normal(mean=np.zeros(2), cov=cfg['cov'], seed=rng_np)
m1_dist = stats.lognorm(scale=np.exp(cfg['mu'][0]), s=cfg['s'][0])
m2_dist = stats.lognorm(scale=np.exp(cfg['mu'][1]), s=cfg['s'][1])
u = stats.norm.cdf(c_dist.rvs(cfg['n']))
dfy = pd.DataFrame(np.vstack([m1_dist.ppf(u[:, 0]), m2_dist.ppf(u[:, 1])]).T,
columns=['m1', 'm2'])
return cfg, dfy
## Build model =================================================================
def create_model(cfg, dfy):
coords = dict(
m_nm = ['m_m1', 'm_m2'],
s_nm = ['s_m1', 's_m2'],
y_nm = ['y_m1', 'y_m2'],
y_u_nm = ['y_m1u', 'y_m2u'],
y_c_nm = ['y_m1c', 'y_m2c'],
)
coords_m = dict(oid = dfy.index.values)
with pm.Model(coords=coords, coords_mutable=coords_m) as mdl:
y = pm.MutableData('y', dfy.copy(), dims=('oid', 'y_nm'))
# 1. Define Observed Y marginal dists
m_mu = pm.Normal('m_mu', mu=0.0, sigma=1.0, dims='m_nm')
m_s = pm.InverseGamma('m_s', alpha=5.0, beta=4.0, dims='s_nm')
m_d = pm.LogNormal.dist(mu=m_mu, sigma=m_s, shape=(cfg['n'], 2))
# 3. Transform Observed Y to Uniform through marginal CDFs
y_u = pm.Deterministic('y_u', pt.exp(pm.logcdf(m_d, y)), dims=('oid', 'y_u_nm'))
# 4. Transform Uniformed Y to an Empirical Copula via MvN InvCDF
y_c = pm.Deterministic('y_c', normal_icdf(y_u), dims=('oid', 'y_c_nm'))
return mdl
## Define tests for scan ======================================================
def test1_scan(f_inv_x, x):
"""1. Use ravel: get a `DisconnectedInputError`"""
n = f_inv_x.shape[0]
k = f_inv_x.shape[1]
idx = pt.arange(n, dtype="int32")
def get_grads(i, c, w):
return tg.grad(cost=c[i], wrt=[w])
grads, _ = pytensor.scan(get_grads,
sequences=idx, non_sequences=[f_inv_x.ravel(), x.ravel()],
n_steps=n*k, name="get_grads", strict=True)
return grads
def test2_scan(f_inv_x, x):
"""2. Focus on a single scalar cost `c[i, 0]` and `strict=True`:
get a `MissingInputError` for the RNG
"""
n = f_inv_x.shape[0]
idx = pt.arange(n, dtype="int32")
def get_grads(i, c, w):
return tg.grad(cost=c[i, 0], wrt=[w])
grads, _ = pytensor.scan(get_grads,
sequences=idx, non_sequences=[f_inv_x, x],
n_steps=n, name="get_grads", strict=True)
return grads
def test3_scan(f_inv_x, x):
"""3. Focus on a single scalar cost `c[i, 0]` and relax `strict=False`:
this runs
"""
n = f_inv_x.shape[0]
idx = pt.arange(n, dtype="int32")
def get_grads(i, c, w):
return tg.grad(cost=c[i, 0], wrt=[w])
grads, _ = pytensor.scan(get_grads,
sequences=idx, non_sequences=[f_inv_x, x],
n_steps=n, name="get_grads", strict=False)
return grads
## Run tests ===================================================================
if __name__ == '__main__':
cfg, dfy = create_data()
mdl = create_model(cfg, dfy)
print("\n\nTest 1:")
try:
# 1. Use ravel: get a `DisconnectedInputError`
t1 = test1_scan(mdl.y_c, mdl.y).eval()
except tg.DisconnectedInputError as e:
print(e)
print("\n\nTest 2:")
try:
# Focus on a single scalar cost `c[i, 0]` and `strict=True`:
# get a `MissingInputError` for the 2 RNGs from the LogNormal.dist()
t2 = test2_scan(mdl.y_c, mdl.y).eval()
except pytensor.graph.utils.MissingInputError as e:
print(e)
print("\n\nTest 3:")
# 3. Focus on a single scalar cost `c[i, 0]` and relax `strict=False`:
# runs and gives values
t3 = test3_scan(mdl.y_c, mdl.y).eval()
print(t3.shape)
print(t3.sum(axis=0))
est 1:
Backtrace when that variable is created:
File "/Users/jon/workspace/oreum/oreum_lab2/notebooks/994x.py", line 165, in <module>
t1 = test1_scan(mdl.y_c, mdl.y).eval()
File "/Users/jon/workspace/oreum/oreum_lab2/notebooks/994x.py", line 117, in test1_scan
sequences=idx, non_sequences=[f_inv_x.ravel(), x.ravel()],
Test 2:
Scan is missing inputs: [RandomGeneratorSharedVariable(<Generator(PCG64) at 0x1465D92A0>), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x14A5211C0>)]
Test 3:
/Users/jon/miniforge/envs/oreum_lab2/lib/python3.10/site-packages/pytensor/tensor/rewriting/elemwise.py:700: UserWarning: Optimization Warning: The Op erfcinv does not provide a C implementation. As well as being potentially slow, this also disables loop fusion.
warn(
(10, 10, 2)
[[1.17571404 0. ]
[1.06199971 0. ]
[0.42035114 0. ]
[0.96419869 0. ]
[0.98001082 0. ]
[1.15841781 0. ]
[0.74526782 0. ]
[1.23432585 0. ]
[1.19653064 0. ]
[1.31979877 0. ]]