# Defining an array to store RV before they are introduced in the likelihood

I have written a model (thanks to all the help from the pymc3 developpers) in which I compare observations with a parametrised model (T_low, ne, S2_abund and S3_abund)

Each point has a different uncertainty (because of how they are observed) so I define a normal likelihood for each data point:

``````from os import environ
import pymc3 as pm
import numpy as np
import theano.tensor as tt
import matplotlib.pyplot as plt

def S2_emis(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * np.log10(temp_range) + np.log10(
1 + e * den_range)

def S3_emis(temp_range, den_range, a, b, c):
return a + b / temp_range + c * np.log10(temp_range)

def S2_emis_tt(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * tt.log10(temp_range) + tt.log10(
1 + e * den_range)

def S3_emis_tt(temp_range, den_range, a, b, c):
return a + b / temp_range + c * tt.log10(temp_range)

# Lines list
labels_array = np.array(
['S2_6717A', 'S2_6731A', 'S3_6312A', 'S3_9069A', 'S3_9531A'])
ions_array = np.array(['S2', 'S2', 'S3', 'S3', 'S3'])
range_lines = np.arange(labels_array.size)
abund = {'S2': 5.48, 'S3': 6.36}
func = {'S2': S2_emis, 'S3': S3_emis}
funcPm3 = {'S2': S2_emis_tt, 'S3': S3_emis_tt}

# True data
Te_true, ne_true = 12500.0, 115.0
te_err, ne_err = 500, 25
abund_true = {'S2': 5.48, 'S3': 6.36}

coeffs_dict = {
'S2_6717A':
np.array([4.600e+00, -9.185e+03, 4.219e-01, 1.000e+00, -4.815e-04]),
'S2_6731A':
np.array([4.494e+00, -9.018e+03, 4.091e-01, 1.000e+00, 1.166e-04]),
'S3_6312A':
np.array([3.012e+00, -1.629e+04, 5.648e-01]),
'S3_9069A':
np.array([3.077e+00, -6.182e+03, 5.895e-01]),
'S3_9531A':
np.array([3.470e+00, -6.182e+03, 5.895e-01])
}

# Generate synthetic observational values
flux_array = np.empty(labels_array.size)
for i in np.arange(labels_array.size):
# Get line label
line_label = labels_array[i]

# Ion emissivity function
emisfunc = func[ions_array[i]]

# Line flux
flux_array[i] = abund_true[ions_array[i]] + emisfunc(
Te_true, ne_true, *coeffs_dict[line_label]) - 12.0
# Artificial uncertainty on the measurements
err_array = np.abs(flux_array * 0.05)

# Run pymc3 model
with pm.Model() as model:
# Physical conditions priors
T_low = pm.Normal('T_low', mu=10000.0, sd=1000.0)
n_e = pm.Normal('n_e', mu=80, sd=50)

# Composition priors
abund_dict = {
'S2': pm.Uniform('S2_abund', lower=0, upper=10),
'S3': pm.Uniform('S3_abund', lower=0, upper=10)
}

for i in range_lines:
# Line data
line_label = labels_array[i]
line_ion = ions_array[i]
line_abund = abund_dict[line_ion]
line_coeffs = coeffs_dict[line_label]
line_func = funcPm3[line_ion]

# Line parameters
line_flux = line_abund + line_func(T_low, n_e, *line_coeffs) - 12.0

# Individual Likelihood for each object
chiSq = pm.Normal(
line_label + '_Y',
mu=line_flux,
sd=err_array[i],
observed=flux_array[i])
for RV in model.basic_RVs:
print(RV.name, RV.logp(model.test_point))

# Launch model
trace = pm.sample(10000, tune=2000)

print pm.summary(trace)
pm.traceplot(trace)
plt.show()
``````

I would like to test the impact on the result if I define a global multivariable normal distribution as my likelihood with all the data points.

At this point however, I am not sure which is right container to store the line_fluxes defined at each step: Should it be an empty symbolic tensor flow array created at each step? A shared variable array which I filled up at each iteration?

(I edited your post using markdown code format so it is easier to copy and paste)
FYI you can do `model.check_test_point()` instead of

``````for RV in model.basic_RVs:
print(RV.name, RV.logp(model.test_point))
``````

You need to create a theano tensor/pymc3 RV `line_flux` that has the same first dimension as the observed, couple ways to do so:

``````    line_flux = []
for i in ...:
line_flux.append(...)
line_flux = tt.stack(line_flux)
``````
``````    line_flux = tt.zeros(flux_array.shape)
for i in ...:
line_flux_i = line_abund + line_func(T_low, n_e, *line_coeffs) - 12.0
line_flux = tt.inc_subtensor(line_flux[i], line_flux_i)
``````
1 Like

Thank you very much!! That worked great:

``````from os import environ
import pymc3 as pm
import numpy as np
import theano
import theano.tensor as tt
import matplotlib.pyplot as plt
theano.config.compute_test_value = "ignore"

def S2_emis(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * np.log10(temp_range) + np.log10(1 + e * den_range)

def S3_emis(temp_range, den_range, a, b, c):
return a + b / temp_range + c * np.log10(temp_range)

def S2_emis_tt(temp_range, den_range, a, b, c, d, e):
return a + b / temp_range + c * tt.log10(temp_range) + tt.log10(1 + e * den_range)

def S3_emis_tt(temp_range, den_range, a, b, c):
return a + b / temp_range + c * tt.log10(temp_range)

# Lines list
labels_array = np.array(['S2_6717A', 'S2_6731A', 'S3_6312A', 'S3_9069A', 'S3_9531A'])
ions_array = np.array(['S2', 'S2', 'S3', 'S3', 'S3'])
range_lines = np.arange(labels_array.size)
abund = {'S2': 5.48, 'S3': 6.36}
func = {'S2': S2_emis, 'S3': S3_emis}
funcPm3 = {'S2': S2_emis_tt, 'S3': S3_emis_tt}

# True data
Te_true, ne_true = 12500.0, 115.0
te_err, ne_err = 500, 25
abund_true = {'S2': 5.48, 'S3': 6.36}

coeffs_dict = {'S2_6717A': np.array([4.600e+00, -9.185e+03, 4.219e-01, 1.000e+00, -4.815e-04]),
'S2_6731A': np.array([4.494e+00, -9.018e+03, 4.091e-01, 1.000e+00, 1.166e-04]),
'S3_6312A': np.array([3.012e+00, -1.629e+04, 5.648e-01]),
'S3_9069A': np.array([3.077e+00, -6.182e+03, 5.895e-01]),
'S3_9531A': np.array([3.470e+00, -6.182e+03, 5.895e-01])}

# Generate synthetic observational values
flux_array = np.empty(labels_array.size)
for i in np.arange(labels_array.size):
# Get line label
line_label = labels_array[i]

# Ion emissivity function
emisfunc = func[ions_array[i]]

# Line flux
flux_array[i] = abund_true[ions_array[i]] + emisfunc(Te_true, ne_true, *coeffs_dict[line_label]) - 12.0

# Artificial uncertainty on the measurements
err_array = np.abs(flux_array * 0.05)
cov_array = err_array**2 * np.eye(flux_array.size)

# Run pymc3 model
line_flux_tt = tt.zeros(flux_array.size)
with pm.Model() as model:

# Physical conditions priors
T_low = pm.Normal('T_low', mu=10000.0, sd=1000.0)
n_e = pm.Normal('n_e', mu=80, sd=50)

# Composition priors
abund_dict = {'S2': pm.Uniform('S2_abund', lower=0, upper=10),
'S3': pm.Uniform('S3_abund', lower=0, upper=10)}

for i in range_lines:

# Line data
line_label = labels_array[i]
line_ion = ions_array[i]
line_abund = abund_dict[line_ion]
line_coeffs = coeffs_dict[line_label]
line_func = funcPm3[line_ion]

# Line parameters
line_flux_i = line_abund + line_func(T_low, n_e, *line_coeffs) - 12.0
line_flux_tt = tt.inc_subtensor(line_flux_tt[i], line_flux_i)

#Likelihood individual line
#chiSq = pm.Normal(line_label + '_Y', mu=line_flux, sd=err_list[i], observed=fluxes_list[i])

# Global normal Likelihood for all lines
#Y = pm.Normal('Y', mu=line_flux_tt, sd=err_array, observed=flux_array)

# Global multivariable likelihood for all lines
Y = pm.MvNormal('Y', mu=line_flux_tt, cov=cov_array, observed=flux_array)

# model.check_test_point()

# Launch model
trace = pm.sample(10000, tune=2000)

print pm.summary(trace)
pm.traceplot(trace)
plt.show()
``````
1. I tried the second approach (somehow it looked more natural since I know the number of data points). I got this error however from theano:

ValueError: Cannot compute test value: input 0 (Alloc.0) of Op Subtensor{int64}(Alloc.0, Constant{0}) missing default value.
Backtrace when that variable is created:
flux_array = np.empty(labels_array.size)

Which I solved adding this line to the code:

`theano.config.compute_test_value = "ignore"`

1. The â€ś.check test_point()â€ť attribute is in the 3.5 version right? I will be happy to try it, it is neat.

2. I run some tests defining several normal likelihoods, a single normal with all the points from my tensor array and single multivariable normal distribution. I am getting the same results which I think was the expected behaviour given the uncorrelated data points. Is there however, any advantage from the simulation point of view to define only one likelihood instead of several?

Thanks again!!

1 Like

Yes the `.check_test_point` is in the newest release.

You can simulated correlated data and check how different the estimation is - you might find MvNormal model quite a bit slower when you have lots of data, but in this case it should not matter too much.

2 Likes

thatâ€™s a great idea

Thanks again