I am trying to understand why my inference keeps returning chains that do not converge even when I have no divergences.
I have a random (3 X 3) rotation matrix that I use to rotate a (M X 3) array of data. I then add random Gaussian noise to each column, same variance for each column. I had trouble doing the sampling on the 9 elements of the (3 X 3) rotation matrix so I transformed them into their angles. (Updated with new image and correct Theano implementation for yaw)
Code Below
import numpy as np
import pymc3 as pm
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import arviz as az
import scipy.linalg as la
import theano.tensor as tt
import theano.tensor.nlinalg as tt_la
from math import (asin, pi, atan2, cos, sin)
# Function Needed
def yaw_pitch_roll_inverse_calc(Q):
if Q[2,0] != 1 or Q[2,0] != -1:
pitch_1 = -1*asin(Q[2,0])
pitch_2 = pi - pitch_1
roll_1 = atan2( Q[2,1] / cos(pitch_1) , Q[2,2] /cos(pitch_1) )
roll_2 = atan2( Q[2,1] / cos(pitch_2) , Q[2,2] /cos(pitch_2) )
yaw_1 = atan2( Q[1,0] / cos(pitch_1) , Q[0,0] / cos(pitch_1) )
yaw_2 = atan2( Q[1,0] / cos(pitch_2) , Q[0,0] / cos(pitch_2) )
pitch = pitch_1
roll = roll_1
yaw = yaw_1
else:
yaw = 0 # anything (we default this to zero)
if Q[2, 0] == -1:
pitch = pi/2
roll = yaw + atan2(Q[0, 1], Q[0, 2])
else:
pitch = -pi/2
roll = -1*yaw + atan2(-1*Q[0, 1],-1*Q[0, 2])
# convert from radians to degrees
roll = roll*180/pi
pitch = pitch*180/pi
yaw = yaw*180/pi
rxyz_deg = [yaw, pitch, roll]
return rxyz_deg
np.random.seed(9002)
rotation = np.random.randint(low=1, high=100, size=(3,3)) # Create Random 3X3 Matrix
Q = np.transpose(la.orth(rotation)) # Orthogonalize it
M = 10_000 # Number of Data Points
D = 3 # Number of Dimensions
shape = np.random.uniform(low=0, high=20, size=(M, D)) # create an a time series object with 3-spatial coords
noise = np.random.normal(loc=0.0, scale=3.25, size=(M, D)) # Create Noise
# Only Rotation
target = (shape @ Q) + noise # Make the target shape
if __name__ == '__main__':
with pm.Model() as model_Q:
# Priors for unknown model parameter
sigma_a = pm.Lognormal('sigma_a', mu=0, sigma=1)
sigma_b = pm.Lognormal('sigma_b', mu=0, sigma=1)
sigma_g = pm.Lognormal('sigma_g', mu=0, sigma=1)
alpha = pm.Normal("alpha", mu=a, sigma=sigma_a)
beta = pm.Normal("beta", mu=b, sigma=sigma_b)
gamma = pm.Normal("gamma", mu=g, sigma=sigma_g)
# Math on Priors
yaw = tt.stack([[tt.cos(gamma), -tt.sin(gamma), 0,
tt.sin(gamma), tt.cos(gamma), 0,
0, 0, 1]).reshape((3, 3))
pitch = tt.stack([tt.cos(beta), 0, tt.sin(beta),
0, 1, 0,
-tt.sin(beta), 0, tt.cos(beta)]).reshape((3, 3))
roll = tt.stack([1, 0, 0,
0, tt.cos(alpha), -tt.sin(alpha),
0, tt.sin(alpha), tt.cos(alpha)]).reshape((3, 3))
T = tt.dot(tt.dot(yaw, pitch), roll)
sigma = pm.HalfNormal("sigma", sigma=3.5)
# Expected value of outcome
mu = tt.dot(shape, T)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=target)
with model_Q:
# draw 500 posterior samples
trace_Q = pm.sample(4000, tune=4000, cores=4, target_accept=0.95)