Bayesian analysis of chemical network

Trying to get sensitivity of k1 and k2 reaction rates for a simple chemical network A->B->C with k1_0 and k2_0, respectively.

import numpy as np
import pymc3 as pm
from matplotlib.pyplot import figure, scatter, legend, plot
from scipy.integrate import solve_ivp
from sys import exit
Nt = 11
time  = 10
tt = np.linspace(0,time,Nt+1)
y0 = [1,0,0]


k1_0, k2_0 = 1, 0.5
def equat(t,c):
    da_dt = -k1_0*c[0]
    db_dt = k1_0*c[0] - k2_0*c[1]
    dc_dt = k2_0*c[1]
    return da_dt, db_dt, dc_dt

c_est = solve_ivp(equat, t_span = [0,time], t_eval = tt, y0 = y0)  

niter = 10
with pm.Model() as reak_model:
    k1 = pm.Normal('k1', mu = 0, sd = 100)
    k2 = pm.Normal('k2', mu=0, sd=100, shape = 1)
    sigma = pm.HalfNormal('sigma', sd=1)
    def equat_2(t,c):
        da_dt = -k1*c[0]
        db_dt = k1*c[0] - k1*c[1]
        dc_dt = k1*c[1]
        return da_dt, db_dt, dc_dt
    c = solve_ivp(equat_2, t_span = [0,time], t_eval = tt, y0 = y0)
    likelihood = pm.Normal('y', mu=c.y, sd=sigma, observed=c_est.y)
    trace = pm.sample(niter, chains = 1)
pm.traceplot(trace, varnames=['k1','k2'])

 ValueError: setting an array element with a sequence.

Im getting thise error. Since im new to whole bayesian and pymc3 I am wondering if my whole idea is even right and do I need to switch my distributions of k1 and k2. THank you!

The fundamental issue here is that solve_ivp is performed on numeric arrays instead of theano.tensor objects. This is fine for c_est (since it’s observed data, not a random variable), but not for c.

To map from RandomVariable inputs (k1, k2) to RandomVariable outputs (y), you will need to use a theano implementation of your ODE solver. You can probably “borrow” an implementation from one of many blogposts (such as https://tmramalho.github.io/blog/2013/10/17/solving-stochastic-differential-equations-with-theano/).

Other than that, you probably want k1 and k2 to be strictly positive, so perhaps HalfNormal.

Finally, k2 does not appear to be used in equat_2.

2 Likes

Thank you so much! Ill try the theano, if i have any questions ill come back!

I tried to do something, I don’t even know if I am on the right path with this. But this is my attempt, found it online and modified it

import theano.tensor as tt
import pymc3 as pm
import numpy as np
import theano
import matplotlib.pyplot as plt
from scipy.integrate import solve_ivp
time = 10
Nt = 11 # Number of timesteps
deltat = time/Nt
# Generate random noise
noise_stdev = 0.1
noise = np.random.randn(Nt+1) * noise_stdev
niter = 1000
chains = 4

# We create a relatively sparse external forcing series

y =  np.random.randn(Nt)
y[y > -1.5] = 0 

k1_0 = 0.1    # Model's lone parameter
x0 = 1     # Initial value

# This function does a simple forward integration of the ODE
def f(x0,noise,a,Nt):
    x = np.zeros(Nt+1)
    x[0] = x0
    for t in range(0, Nt):
        x[t+1] = x[t] - x[t] * k1_0 * deltat
    return x + noise

# Generate and plot the data
data = f(x0,noise,k1_0,Nt)
plt.figure(figsize = (5,3))
_ = plt.plot(data,color = '0.3'),plt.ylabel('$x(t)$'),plt.xlabel('$t$')

# Generate and plot the data
def fn_step(current_y,previous_x,coeff):

    return previous_x - coeff*previous_x + current_y

from pymc3.distributions import distribution
floatX='float32' # This is just to fix an annoying casting issue that comes up when using scan

class Dynamical(distribution.Continuous):

    def __init__(self,coeff,sd,x0,*args,**kwargs):
        super(Dynamical,self).__init__(*args,**kwargs)
        self.coeff = tt.as_tensor_variable(coeff)
        self.sd    = tt.as_tensor_variable(sd)
        self.x0    = tt.as_tensor_variable(x0)
        
    def get_mu(self,x,x0,coeff):
        # Here, we are saying that the vector mu is calculated by iteratively applying 
        # the forward equation 'fn_step' and that it uses external sequence 'y'
        # along with initial value 'coeff' (i.e. a scalar and non-sequence variable).
        # The info about the output is the initial value 'x0'.
        mu,_ = theano.scan(fn = fn_step, outputs_info = tt.cast(x0,floatX),
                     non_sequences=tt.cast(coeff,floatX))
        return mu
    
    
    def logp(self,x):
        mu = self.get_mu(x,self.x0,self.coeff)
        sd = self.sd 
        return tt.sum(pm.Normal.dist(mu = mu,sd = sd).logp(x))
    
with pm.Model() as model:
    a_parameter = pm.Normal('a_parameter')
    sd = pm.HalfNormal('sd',sd = 0.2)
    x0 = pm.Flat('x0')
    x_testval = np.ones(len(y),dtype=np.float64)
    x  = Dynamical('x',coeff = a_parameter, sd= sd, x0=x0,shape = len(y),
                   testval=x_testval)
    
    observations = pm.Normal('observations',mu = x, sd = 0.01,observed=data)
    trace = pm.sample(niter = niter, chains = chains)

When I try to run the code it says :

Traceback (most recent call last):

File “”, line 1, in
runfile(’/code.py’, wdir=‘folder’)

File “/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”, line 786, in runfile
execfile(filename, namespace)

File “/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”, line 110, in execfile
exec(compile(f.read(), filename, ‘exec’), namespace)

File “/Documents/KI_delo/krneki.py”, line 84, in
testval=x_testval)

File /anaconda3/lib/python3.7/site-packages/pymc3/distributions/distribution.py", line 41, in new
dist = cls.dist(*args, **kwargs)

File “/anaconda3/lib/python3.7/site-packages/pymc3/distributions/distribution.py”, line 52, in dist
dist.init(*args, **kwargs)

File “code.py”, line 59, in init
self.coeff = tt.as_tensor_variable(coeff)

AttributeError: ‘numpy.ndarray’ object has no attribute ‘as_tensor_variable’

I am completely lost :frowning: Do you have any idea?

import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint, solve_ivp
import seaborn
import pymc3 as pm
import theano.tensor as T
from theano.compile.ops import as_op
from sys import exit
time = 10
Nt = 11
tt = np.linspace(0,time, Nt)
y0 = [1,2,0]
k1, k2 = 1, 1
#Actual Solution of the Differential Equation(Used to generate data)

def real(t,c):
        da_dt = -k1*c[0]
        da1_dt = -k2*c[1]
        db_dt = k1*c[0] + k2*c[1]
        return da_dt, da1_dt, db_dt

c_est = solve_ivp(real, t_span = [0,time], t_eval = tt, y0 = y0)


#Method For Solving the ODE
def lv(xdata, k1=1, k2=1):
    def equat(c,t):
        da_dt = -k1*c[0]
        da1_dt = -k2*c[1]
        db_dt = k1*c[0] + k2*c[1]
        return da_dt, da1_dt, db_dt
    Y, dict  = odeint(equat,y0,xdata,full_output=True)
    return Y

#Generating Data for Bayesian Inference
k1, k2 = 1, 1
ydata = c_est.y

# Adding some error to the ydata points
yerror = 10*np.random.rand(Nt)
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)

@as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])

def func(al,be):
    Q = lv(tt, k1=al, k2=be)
    return np.ravel(Q)


# Number of Samples and Initial Conditions
nsample = 5000
y0 = 1.0
sd = 0.2

# Model for Bayesian Inference
model = pm.Model()
with model:
    # Priors for unknown model parameters
    k1 = pm.HalfNormal('k1', sd = sd)
    k2 = pm.HalfNormal('k2', sd = sd)

    # Expected value of outcome
    mu = func(k1,k2)


    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=y_data)

    trace = pm.sample(nsample, nchains=1)


pm.traceplot(trace)
plt.show()

But it doesn’t “loop” through equat function. Output error:

Traceback (most recent call last):

  File "<ipython-input-217-14ca425a8735>", line 1, in <module>
    runfile('/codepy', wdir='/folder')

  File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 786, in runfile
    execfile(filename, namespace)

  File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "/code.py", line 79, in <module>
    mu = func(alpha,beta)

  File "/anaconda3/lib/python3.7/site-packages/theano/gof/op.py", line 674, in __call__
    required = thunk()

  File "/anaconda3/lib/python3.7/site-packages/theano/gof/op.py", line 892, in rval
    r = p(n, [x[0] for x in i], o)

  File "/anaconda3/lib/python3.7/site-packages/theano/compile/ops.py", line 555, in perform
    outs = self.__fn(*inputs)

  File "code.py", line 62, in func
    Q = lv(tt, k1=al, k2=be)

  File "/code.py", line 43, in lv
    Y, dict  = odeint(equat_2,y0,xdata,full_output=True)

  File "/anaconda3/lib/python3.7/site-packages/scipy/integrate/odepack.py", line 233, in odeint
    int(bool(tfirst)))

  File "code.py", line 40, in equat_2
    da1_dt = -k2*c[1]

I tried something with this as well, I don’t know why it doesn’t vary k1 and k2.

@dpananos is working on adding ODE support to PyMC3, see for example his great blog post on this:

1 Like
I think I solved it. Please tell me what you think.

from scipy.integrate import odeint, solve_ivp
import numpy as np
from matplotlib.pyplot import figure, plot, legend, scatter
from theano.compile.ops import as_op
import theano.tensor as T
import pymc3 as pm
import copy
from sys import exit
time = 10
Nt = 11
tt = np.linspace(0,time, Nt+1)
y0 = [1,2,0]
k1, k2 = 1, 1    
  
def real_equat(t,c):
    da_dt = -k1*c[0]
    da1_dt = -k2*c[1]
    db_dt = k1*c[0] + k2*c[1]
    return da_dt, da1_dt, db_dt
z = solve_ivp(real_equat, t_span=[0,time], t_eval= tt, y0 = y0)

def lv(xdata, k1=k1, k2=k2):
    def equat(c,t):
        da_dt = -k1*c[0]
        da1_dt = -k2*c[1]
        db_dt = k1*c[0] + k2*c[1]
        return da_dt, da1_dt, db_dt
    Y, dict = odeint(equat,y0,tt,full_output=True)
    return Y

ydata = z.y


yerror = 10*np.random.rand(Nt+1)
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#theano compiler
@as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])


def func(al,be):
    Q = lv(tt, k1 = al, k2 = be)
    return np.ravel(Q)


niter = 10
model = pm.Model()
with model:
    k1 = pm.Uniform('k1', upper = 1.2, lower = 0.8)
    k2 = pm.Uniform('k2', upper = 1.2, lower = 0.8)
    step = pm.NUTS()
    mu = func(k1,k2)
    
    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal('Y_obs', mu=mu, sd=0.2, observed=ydata)
    trace = pm.sample(niter = niter, nchains=4, step = step)
1 Like

Hi,

Very sorry for the late response. I’m currently working on a PR to pymc3 which will make this sort of analysis incredibly straight forward.