Beginning user, problem set correctly (experimental data fit)?

Hello everyone,

first post, hope I’m not breaking any rules

I am approaching bayesian analysis.

My problem is the following:

A model to describe a certain phenomenon was proposed in a paper. The model depends on 4 parameters and these were estimated with a fit to experimental data. I want to update these values.

I have added points to the data base. I could follow the same procedure of the paper (a simple fit), but I think it is more correct to make a Bayesian inference to estimate the most probable values of the parameters.

So I defined a model in pymc that considers the 4 model parameters as Gaussians (using mean and sigma reported in the paper) - my priors - and defined a function that combines these parameters in agreement with the theoretical model.

I added an uncertainty to the experimental points, defining the sigma (e) as HalfCauchy

Then I estimate the value of the parameters

y_pre= pm.Normal(‘y_pre’, mu=u, sigma=e, observed=x_data)

where u is the function that combines the 4 parameters, and x_data the set of experimental points (old and new).

Is it reasonable?

Is it possible to post the full code?

Yes that makes sense and yes you can always share code in this forum :slight_smile:

1 Like

thanks for the feedback
this is my code so far

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import arviz as az
import scipy.stats as stats
import unidecode
import sys
import math
from scipy.optimize import curve_fit

def func(x, a, b, c, d):
    return ((1- np.exp(-a*x))/(a*x))*(1- b*((1-np.exp(-c*x))/(c*x)))*d
		
def main():
        #read dataset#
	df=pd.read_csv("kappa_lambda.csv",sep=";",decimal=",")
#	print(df.head())

        #clean and order it#
	kappa=df["k"].to_numpy()
	lam=df["lambda_p"].to_numpy()

	kappa =	np.array([float((unidecode.unidecode(ka)).replace(' ','').replace(',','.')) for ka in kappa])
	lam   =	np.array([float((unidecode.unidecode(la)).replace(' ','').replace(',','.')) for la in lam])	

	idx = np.argsort(lam)
	kappa=np.array(kappa)[idx]
	lam= np.array(lam)[idx]

#	plt.scatter(lam,kappa)
#	plt.show()	

        #define model#
	with pm.Model() as first_model:
		a = pm.Normal('a', mu=0.0412, sigma=0.0209)
		b = pm.Normal('b', mu=0.9568, sigma=0.0236)
		c = pm.Normal('c', mu=0.0608, sigma=0.0381)
		d = pm.Normal('d', mu=0.0300, sigma=0.0177)
		e = pm.HalfCauchy('e',0.1)

		u = ( (1 - pm.math.exp(-lam/a))/(lam/a) ) * d * ( 1 - b*(1 - pm.math.exp(-lam/c)/(lam/c) ) )
#		u = pm.Deterministic('u',a+(c*d+b)*lam)

		y_pre = pm.Normal('y_pre',mu=u, sigma=e, observed=kappa)
		trace=pm.sample(1000, tune=500, random_seed=123)

	print(az.summary(trace))

	a_m = float(trace.posterior['a'].mean())
	b_m = float(trace.posterior["b"].mean())
	c_m = float(trace.posterior["c"].mean())
	d_m = float(trace.posterior["d"].mean())

	
	#dummy fit#
	popt, pcov = curve_fit(func, lam, kappa)

        #plot#
	plt.figure(1)
	plt.plot(lam,kappa,'C0.')

	plt.plot(lam, func(lam,a_m,b_m,c_m,d_m), color="Green", label = 'bayes:'+str(round(a_m,4))+' '+str(round(b_m,4))
                 +' '+str(round(c_m,4))+' '+str(round(d_m,4)) )
	plt.plot(lam,func(lam,*popt),label='fit',color="Red")
	plt.plot(lam,func(lam,0.0412,0.9568,0.0608,0.0300), label = 'paper: 0.0412, 0.9568, 0.0608, 0.0300')
	plt.xlim(0,200)
	plt.ylim(0,0.02)
	plt.xlabel('lamba')
	plt.ylabel('kappa')
	plt.legend()

	plt.figure(0)
	az.plot_trace(trace, var_names=['a','b','c','d'])

	plt.show()

if __name__ == '__main__':
    main()