Multidimensional gaussian process

Ah, okay. So to sample from my gp using pymc3 random variables I need to create a new model? So my overall form would be to define the surrogate model:

with pm.Model() as surrogate_model:
    ls    = pm.HalfNormal('l',2./3,shape=len(params))
    eta   = pm.HalfNormal('eta',1.0,shape=1)
    cov   = eta**2.*pm.gp.cov.ExpQuad(len(params),ls_inv=ls)
    sigma = pm.HalfNormal('sigma',1.0)

    gp = pm.gp.Marginal(cov_func=cov)

    y_ = gp.marginal_likelihood('y',X=X,y=errors,noise=sigma)
    
    trace = pm.sample(1000,tune=1000,njobs=1)

and then define a new model that samples from the gp fit here as:

with pm.Model() as model:
    a_     = pm.Normal('a',params[0],0.1*abs(params[0]),shape=1)
    b_     = pm.Normal('b',params[1],0.1*abs(params[1]),shape=1)
    c_     = pm.Normal('c',params[2],0.1*abs(params[2]),shape=1)
    d_     = pm.Normal('d',params[3],0.1*abs(params[3]),shape=1)
    params_ = tt.stack([a_,b_,c_,d_],axis=0).reshape([1,4])
    
    sigma   = pm.HalfNormal('sigma', 1.)
    
    e_pred = pm.Deterministic('e_pred',gp.predictt(params_)[0])
    
    e = pm.Normal('e', mu=e_pred, sd=sigma, observed=0.)
    
    trace2 = pm.sample(1000,njobs=1)

I can fit the gp with the first model but in the second sampling I get the error:

---------------------------------------------------------------------------
MissingInputError                         Traceback (most recent call last)
<ipython-input-33-d32336a23b64> in <module>()
     13     e = pm.Normal('e', mu=e_pred, sd=sigma, observed=0.)
     14 
---> 15     trace2 = pm.sample(1000,njobs=1)

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
    393             start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
    394                                      model=model, random_seed=random_seed,
--> 395                                      progressbar=progressbar, **args)
    396             if start is None:
    397                 start = start_

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\sampling.py in init_nuts(init, chains, n_init, model, random_seed, progressbar, **kwargs)
   1386             'Unknown initializer: {}.'.format(init))
   1387 
-> 1388     step = pm.NUTS(potential=potential, model=model, **kwargs)
   1389 
   1390     return start, step

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\step_methods\hmc\nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    150         `pm.sample` to the desired number of tuning steps.
    151         """
--> 152         super(NUTS, self).__init__(vars, **kwargs)
    153 
    154         self.max_treedepth = max_treedepth

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\step_methods\hmc\base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, integrator, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs)
     61 
     62         super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
---> 63                                       dtype=dtype, **theano_kwargs)
     64 
     65         self.adapt_step_size = adapt_step_size

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\step_methods\arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs)
    213 
    214         self._logp_dlogp_func = model.logp_dlogp_function(
--> 215             vars, dtype=dtype, **theano_kwargs)
    216 
    217     def step(self, point):

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\model.py in logp_dlogp_function(self, grad_vars, **kwargs)
    656         varnames = [var.name for var in grad_vars]
    657         extra_vars = [var for var in self.free_RVs if var.name not in varnames]
--> 658         return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
    659 
    660     @property

c:\users\natha\onedrive\documents\python3_modules\pymc3\pymc3\model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs)
    403 
    404         self._theano_function = theano.function(
--> 405             inputs, [self._cost_joined, grad], givens=givens, **kwargs)
    406 
    407     def set_extra_values(self, extra_vars):

~\Anaconda3\lib\site-packages\theano\compile\function.py in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    315                    on_unused_input=on_unused_input,
    316                    profile=profile,
--> 317                    output_keys=output_keys)
    318     return fn

~\Anaconda3\lib\site-packages\theano\compile\pfunc.py in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys)
    484                          accept_inplace=accept_inplace, name=name,
    485                          profile=profile, on_unused_input=on_unused_input,
--> 486                          output_keys=output_keys)
    487 
    488 

~\Anaconda3\lib\site-packages\theano\compile\function_module.py in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys)
   1837                   on_unused_input=on_unused_input,
   1838                   output_keys=output_keys,
-> 1839                   name=name)
   1840         with theano.change_flags(compute_test_value="off"):
   1841             fn = m.create(defaults)

~\Anaconda3\lib\site-packages\theano\compile\function_module.py in __init__(self, inputs, outputs, mode, accept_inplace, function_builder, profile, on_unused_input, fgraph, output_keys, name)
   1485             # OUTPUT VARIABLES)
   1486             fgraph, additional_outputs = std_fgraph(inputs, outputs,
-> 1487                                                     accept_inplace)
   1488             fgraph.profile = profile
   1489         else:

~\Anaconda3\lib\site-packages\theano\compile\function_module.py in std_fgraph(input_specs, output_specs, accept_inplace)
    179 
    180     fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs,
--> 181                                   update_mapping=update_mapping)
    182 
    183     for node in fgraph.apply_nodes:

~\Anaconda3\lib\site-packages\theano\gof\fg.py in __init__(self, inputs, outputs, features, clone, update_mapping)
    173 
    174         for output in outputs:
--> 175             self.__import_r__(output, reason="init")
    176         for i, output in enumerate(outputs):
    177             output.clients.append(('output', i))

~\Anaconda3\lib\site-packages\theano\gof\fg.py in __import_r__(self, variable, reason)
    344         # Imports the owners of the variables
    345         if variable.owner and variable.owner not in self.apply_nodes:
--> 346                 self.__import__(variable.owner, reason=reason)
    347         elif (variable.owner is None and
    348                 not isinstance(variable, graph.Constant) and

~\Anaconda3\lib\site-packages\theano\gof\fg.py in __import__(self, apply_node, check, reason)
    389                                      "for more information on this error."
    390                                      % (node.inputs.index(r), str(node)))
--> 391                         raise MissingInputError(error_msg, variable=r)
    392 
    393         for node in new_nodes:

MissingInputError: Input 0 of the graph (indices start from 0), used to compute Elemwise{exp,no_inplace}(sigma_log__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.

Sorry to ask so many questions.

No thatā€™s not what I meant. You can still add node to the old model as you did, it just for prediction you shouldnā€™t use pm.sample() as that is for inferencing your model.

But I want to use my surrogate model to do inference on params_ to update my priors on a_, b_, etc.

Shouldnā€™t I be sampling in order to do that?

Oh so you are not just doing prediction - do i understand correctly that you inference the GP in the first model, and in the second model you are trying the set the parameters of the GP constant and inference params_?

That is correct. Sorry for the confusion!

In that case, in your second model you should redefine the GP:

    ls    = ...
    eta   = ...
    cov   = eta**2.*pm.gp.cov.ExpQuad(len(params), ls_inv=ls)
    gp = pm.gp.Marginal(cov_func=cov)

You can either use a distribution that approximate the posterior of ls and eta, or just the posterior mean. And then you can sample your second model as before.

Great! I tried to implement that and I am still getting the MissingInputError. Here is my code for the second model:

summary = pm.summary(trace)

l_means    = np.array([summary['mean'][key] for key in summary['mean'].keys() if 'l_' in key]).reshape(1,4)
eta_mean   = np.array([summary['mean'][key] for key in summary['mean'].keys() if 'eta' in key]).reshape(1,1)
sigma_mean = [summary['mean'][key] for key in summary['mean'].keys() if 'sigma' in key]

with pm.Model() as model:
    
    #Set variable parameter values
    a_      = pm.Normal('a',params[0],0.1*abs(params[0]),shape=1)
    b_      = pm.Normal('b',params[1],0.1*abs(params[1]),shape=1)
    c_      = pm.Normal('c',params[2],0.1*abs(params[2]),shape=1)
    d_      = pm.Normal('d',params[3],0.1*abs(params[3]),shape=1)
    params_ = tt.stack([a_,b_,c_,d_],axis=0).reshape([1,4])
    sigma2  = pm.HalfNormal('sigma', 1.)
    
    #Redefine the gaussian process
    ls_    = l_means
    eta_   = eta_mean
    sigma_ = sigma_mean
    cov    = eta_*2.*pm.gp.cov.ExpQuad(len(params),ls_inv=ls_)
    gp     = pm.gp.Marginal(cov_func=cov)
    y_     = gp.marginal_likelihood('y',X=X,y=errors,noise=sigma)

    #e_pred = gp.conditional('e_pred', Xnew=params_, shape=1)
    e_pred = pm.Deterministic('e_pred',gp.predictt(params_)[0])
    
    e = pm.Normal('e', mu=e_pred, sd=sigma2, observed=0.)
    
    trace2 = pm.sample(1000,n_init=50000,tune=1000,njobs=1)

MissingInputError                         Traceback (most recent call last)
<ipython-input-59-f4755668138f> in <module>()
     22     e = pm.Normal('e', mu=e_pred, sd=sigma2, observed=0.)
     23 
---> 24     trace2 = pm.sample(1000,n_init=50000,tune=1000,njobs=1)
.
.
.

MissingInputError: Input 0 of the graph (indices start from 0), used to compute Elemwise{exp,no_inplace}(sigma_log__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.

It seems to be a problem in the e_pred line though I donā€™t understand the source.

You set up is not quite right, you dont need to do gp.marginal_likelihood again, as you already did the inference previously. Using the gp.Latent should do the trick:

with pm.Model() as model:
    params_ = pm.Normal('params_', params, 0.1 * tt.abs_(params), shape=(1, 4))
    sigma2 = pm.HalfNormal('sigma', 1.)

    # Redefine the gaussian process
    ls_ = l_means
    eta_ = eta_mean
    sigma_ = sigma_mean
    cov = eta_*2.*pm.gp.cov.ExpQuad(len(params), ls_inv=ls_)
    gp = pm.gp.Latent(cov_func=cov, )
    e_pred = gp.prior('e_pred', X=params_, shape=1)

    e = pm.Normal('e', mu=e_pred, sd=sigma2, observed=0.)

    trace2 = pm.sample(1000, tune=1000, njobs=1)

for more information see http://docs.pymc.io/notebooks/GP-Latent.html

But I think in your case it might be better to do:

with pm.Model() as model:
    params_ = pm.Normal('params_', params, 0.1 * tt.abs_(params), shape=(1, 4))

    # Redefine the gaussian process
    ls_ = l_means
    eta_ = eta_mean
    sigma_ = sigma_mean
    cov = eta_*2.*pm.gp.cov.ExpQuad(len(params), ls_inv=ls_)
    gp = pm.gp.Marginal(cov_func=cov, )
    e_pred = gp.marginal_likelihood('e_pred', X=params_, y=np.asarray([0.]), noise=sigma_)

    trace2 = pm.sample(1000, tune=1000, njobs=1)
2 Likes

Thank you so much for your help that solved it! I still need to better understand setting priors for the GP in the first place but having the remaining code set up will make later work much easier.

So that I better understand, why did you prefer the second option to the first? Is it just because you can reduce the number of random variables or is there a deeper reason?

Thank you again for your help and patience.

2 Likes

These are two different models, so which one to choose depending on what are your assumptions and model constraints. In the first option if you replace the sigma2 with sigma_ it should work the same as option 2

1 Like

Always love it when we get friendly people to the forums!

3 Likes

Ah, I see. I had missed that detail. Thanks!

Glad to be here!

I have greatly improved my priors on my gaussian processes and have discovered that there is a slight problem with the solution as presented above (quoted here).

In the process of redefining the gaussian process using this method, the gaussian process does not have the fit to data which the previously fit model has. The mean response of the model is now zero everywhere. How do I include my previously fit test data into my re-definition of the GP?

Here is code which demonstrates the response Iā€™m seeing:
Multidimentional_Gaussian-Current.py (4.3 KB)

I managed to get the GP to work correctly by defining

.
.
.
    gp = pm.gp.Latent(cov_func=cov,)
    e_pred = gp.conditional('e_pred',Xnew=params_,given={'X':X,'f':error},shape=1)
.
.
.

This allows me to sample but I am having some significant problems with divergence. Is there something I am doing incorrectly here?

@NateAM: Have you solved this problem? I am struggling using a GP as surrogate model for inference. I am wondering if this is possible in the pymc3 framework at all? Has anybody done this?

@Actionhank I have solved the problem and Iā€™m seeing pretty good results with it though the sampling is somewhat slow. I fit a scalar output gaussian process using something like

ls_      = pm.HalfNormal('ls', sd=0.2*(xbnds[1]-xbnds[0]), shape=(1,nvars))
eta_     = pm.HalfNormal('eta',sd=np.sqrt(0.2*(ybnds[1]-ybnds[0])), shape=1)
noise    = pm.HalfNormal('noise',sd=1.,shape=1)
cov_func = eta_.reshape([1,1])**2.*pm.some_covfxn(nvars, ls=ls_)
gp       = pm.gp.Marginal(cov_func=cov_func)
y_       = gp.marginal_likelihood("y", X=xdata, y=ydata, noise=noise)

where xdata is a n x nvars array and y is a n x 1 array and n is the number of points. Iā€™ve found that a big part of getting a GP to work right is to set reasonable priors on the lengthscale parameters (ls_ and eta_). I put some rules of thumb on the range of each which have to do with the bounding values of the output that seems to work pretty well.

I then sample from the median fit to the GP via

cov_func  = surrogate_params['eta'].reshape([1,1])**2.*surrogate_params['covfxn'](nvars,ls=surrogate_params[ls])
gp        = pm.gp.Latent(cov_func=cov_func)
givens    = gp._get_given_vals({'X':surrogate_params['X'],'f':surrogate_params['y'],'y':surrogate_params['y'],'noise':surrogate_params['noise']})
surrogate = pm.Deterministic('surrogate',gp._build_conditional(params_,*givens)[0])

where surrogate_params is a dictionary that contains my median parameters and initial X and y dataset. Note that the new, ā€œX,ā€ parameters Iā€™m sampling on (params_) is a 1 x nparams theano variable. One thing that also helped me with my sampling is to change the init to ā€˜adapt_diagā€™ as the default (adaptive diagonalization with jitter) could push me into a bad space really easily.

Good luck!

3 Likes

Thanks a lot. Iā€™ll try your solution. Since you are using some internal function of pymc3, there seems to be no official solution to this? My expectation was that using a surrogate model for inference is quite common.

There actually is a built in function (gp.conditional see here) but Iā€™m bypassing it for speed because it builds a multivariate normal distribution that is unneeded for a surrogate application. @bwengals showed this trick in another thread.

Hi Nate,
two questions:

  • for ā€˜params_ā€™ you use ā€˜theano.tensor.stack()ā€™ to combine several pymc3 random variables to one theano vector?
  • in the ā€˜givensā€™ dict the initial dataset contains the training data for the surrogate model, i.e. ā€˜xdataā€™ and ā€˜ydataā€™? I am asking, because in my problem I have one dataset for the training of the GP and another dataset for the inference, which contains the true distribution of ā€˜yā€™.

Thanks.