How to accelerate inference when using v5

In my model, PyMC gives good results, but it takes a long time to run once. Is there any way to improve the running speed? For example, using a GPU?
Here are my codes:

def ballabio_data_model(E,E0,ion_temp):    
    a1 = np.array([4.69515,-0.040729,0.47,0.81844])
    a2 = np.array([1.7013e-3,0.16888,0.49,7.9460e-4])    
    wo = 82.542
    Ti = ion_temp
    #E0 = 2.4495*1e3 
    # computed parameters
    dE = (a1[0]/( 1 + a1[1]*Ti**a1[2] ))*Ti**( 2.0/3.0 ) + a1[3]*Ti
    dw = (a2[0]/( 1 + a2[1]*Ti**a2[2] ))*Ti**( 2.0/3.0 ) + a2[3]*Ti
    Emean = E0+ dE
    Sth = wo*(1+dw)*pm.math.sqrt(Ti)/(2*pm.math.sqrt(2*pm.math.log(2)))
    Ebar = Emean*pm.math.sqrt(1.0-1.5*Sth/Emean**2.0)
    S = (4./3.)*Ebar*(Emean-Ebar)        
    f = pm.math.exp(-( 2*Ebar/S**2 )*(pm.math.sqrt(E)-pm.math.sqrt(Ebar))**2)                   
    return f/pm.math.sum(f)
from pytensor.tensor import conv
def Ft(A,E0,Tion):
    Gaussian=A*ballabio_data_model(E_b,E0,Tion)*ds_dS
    Gaussian_t=Gaussian*dE_dt
    ft1=pt.concatenate([Gaussian_t, pt.zeros(IRF.shape[0] - 1)])
    ft=(conv.causal_conv1d(ft1[None,None,:],IRF[None,None,:],filter_shape=(1,1,IRF.shape[0]))).squeeze()
    ft3=ft[0:obs.shape[0]]
    return ft3
Samples=2000
with pm.Model() as background_model:
    A = pm.Normal('A',mu=1.5e3,sigma=0.5e2)
    Tion= pm.Uniform('Tion',lower=0.1,upper=3)
    E0=pm.Uniform('E0',lower=2000,upper=3000)
    err1=pm.HalfCauchy('err1',3.0)
    y_observed=pm.Normal(
        "y_observed",
        mu=Ft(A,E0,Tion),
        sigma=err1,
        observed=obs,
        )    
    output = pm.Deterministic('output',Ft(A,E0,Tion))
    prior = pm.sample_prior_predictive()
    posterior = pm.sample(draws = Samples,target_accept = 0.9,chains=4,cores=4)
    posterior_gaussian = pm.sample_posterior_predictive(posterior)
    az.plot_trace(posterior, var_names = ['A','E0','Tion',])
    result=az.summary(posterior, var_names = ['A','E0','Tion'])
    az.plot_ppc(posterior_gaussian, num_pp_samples=100,figsize=(8,8))
print(result)

Slow sampling can be a symptom of a badly specified model. Without looking at it though, the brute-force suggestion is to pass nuts_sampler="nutpie" or nuts_sampler="numpyro (after installing those libraries). Nutpie can provide big speedups when sampling in CPUs and Numpyro on GPUs

1 Like

Thanks,I have another problem, when sampling, the first 1% of samples are particularly slow, and the rest are much faster.If I want to accelerate the first 1%, what should I do?

That could mean your prior is poorly specified and the sampler needs a lot of work during early tuning to get to the “right posterior region”. You can try to provide better starting points to the sampler or define better model priors if that’s possible (and the source of the problem)

1 Like

It might also mean your initial step size is too small, and the dynamic integrator exhausts the tree depth you might compare the final step size you get with the default step size (I think it is something like 0.1 * {number_of_dimensions}**0.25), or just try setting the step size larger, or set max_treedepth = 8 (I think this would be pm.sample(nuts_sampler_kwargs={"max_treedepth": 8})), which would prevent your sampler from ever taking more than 256 leapfrog transitions for a single MCMC step.