I have written a model (thanks to all the help from the pymc3 developpers) in which I compare observations with a parametrised model (T_low, ne, S2_abund and S3_abund)
Each point has a different uncertainty (because of how they are observed) so I define a normal likelihood for each data point:
from os import environ
environ["MKL_THREADING_LAYER"] = "GNU"
import pymc3 as pm
import numpy as np
import theano.tensor as tt
import matplotlib.pyplot as plt
def S2_emis(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * np.log10(temp_range) + np.log10(
1 + e * den_range)
def S3_emis(temp_range, den_range, a, b, c):
return a + b / temp_range + c * np.log10(temp_range)
def S2_emis_tt(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * tt.log10(temp_range) + tt.log10(
1 + e * den_range)
def S3_emis_tt(temp_range, den_range, a, b, c):
return a + b / temp_range + c * tt.log10(temp_range)
# Lines list
labels_array = np.array(
['S2_6717A', 'S2_6731A', 'S3_6312A', 'S3_9069A', 'S3_9531A'])
ions_array = np.array(['S2', 'S2', 'S3', 'S3', 'S3'])
range_lines = np.arange(labels_array.size)
abund = {'S2': 5.48, 'S3': 6.36}
func = {'S2': S2_emis, 'S3': S3_emis}
funcPm3 = {'S2': S2_emis_tt, 'S3': S3_emis_tt}
# True data
Te_true, ne_true = 12500.0, 115.0
te_err, ne_err = 500, 25
abund_true = {'S2': 5.48, 'S3': 6.36}
coeffs_dict = {
'S2_6717A':
np.array([4.600e+00, -9.185e+03, 4.219e-01, 1.000e+00, -4.815e-04]),
'S2_6731A':
np.array([4.494e+00, -9.018e+03, 4.091e-01, 1.000e+00, 1.166e-04]),
'S3_6312A':
np.array([3.012e+00, -1.629e+04, 5.648e-01]),
'S3_9069A':
np.array([3.077e+00, -6.182e+03, 5.895e-01]),
'S3_9531A':
np.array([3.470e+00, -6.182e+03, 5.895e-01])
}
# Generate synthetic observational values
flux_array = np.empty(labels_array.size)
for i in np.arange(labels_array.size):
# Get line label
line_label = labels_array[i]
# Ion emissivity function
emisfunc = func[ions_array[i]]
# Line flux
flux_array[i] = abund_true[ions_array[i]] + emisfunc(
Te_true, ne_true, *coeffs_dict[line_label]) - 12.0
# Artificial uncertainty on the measurements
err_array = np.abs(flux_array * 0.05)
# Run pymc3 model
with pm.Model() as model:
# Physical conditions priors
T_low = pm.Normal('T_low', mu=10000.0, sd=1000.0)
n_e = pm.Normal('n_e', mu=80, sd=50)
# Composition priors
abund_dict = {
'S2': pm.Uniform('S2_abund', lower=0, upper=10),
'S3': pm.Uniform('S3_abund', lower=0, upper=10)
}
for i in range_lines:
# Line data
line_label = labels_array[i]
line_ion = ions_array[i]
line_abund = abund_dict[line_ion]
line_coeffs = coeffs_dict[line_label]
line_func = funcPm3[line_ion]
# Line parameters
line_flux = line_abund + line_func(T_low, n_e, *line_coeffs) - 12.0
# Individual Likelihood for each object
chiSq = pm.Normal(
line_label + '_Y',
mu=line_flux,
sd=err_array[i],
observed=flux_array[i])
for RV in model.basic_RVs:
print(RV.name, RV.logp(model.test_point))
# Launch model
trace = pm.sample(10000, tune=2000)
print pm.summary(trace)
pm.traceplot(trace)
plt.show()
I would like to test the impact on the result if I define a global multivariable normal distribution as my likelihood with all the data points.
At this point however, I am not sure which is right container to store the line_fluxes defined at each step: Should it be an empty symbolic tensor flow array created at each step? A shared variable array which I filled up at each iteration?
Thanks for any advice.