In my model, PyMC gives good results, but it takes a long time to run once. Is there any way to improve the running speed? For example, using a GPU?

Here are my codes:

```
def ballabio_data_model(E,E0,ion_temp):
a1 = np.array([4.69515,-0.040729,0.47,0.81844])
a2 = np.array([1.7013e-3,0.16888,0.49,7.9460e-4])
wo = 82.542
Ti = ion_temp
#E0 = 2.4495*1e3
# computed parameters
dE = (a1[0]/( 1 + a1[1]*Ti**a1[2] ))*Ti**( 2.0/3.0 ) + a1[3]*Ti
dw = (a2[0]/( 1 + a2[1]*Ti**a2[2] ))*Ti**( 2.0/3.0 ) + a2[3]*Ti
Emean = E0+ dE
Sth = wo*(1+dw)*pm.math.sqrt(Ti)/(2*pm.math.sqrt(2*pm.math.log(2)))
Ebar = Emean*pm.math.sqrt(1.0-1.5*Sth/Emean**2.0)
S = (4./3.)*Ebar*(Emean-Ebar)
f = pm.math.exp(-( 2*Ebar/S**2 )*(pm.math.sqrt(E)-pm.math.sqrt(Ebar))**2)
return f/pm.math.sum(f)
from pytensor.tensor import conv
def Ft(A,E0,Tion):
Gaussian=A*ballabio_data_model(E_b,E0,Tion)*ds_dS
Gaussian_t=Gaussian*dE_dt
ft1=pt.concatenate([Gaussian_t, pt.zeros(IRF.shape[0] - 1)])
ft=(conv.causal_conv1d(ft1[None,None,:],IRF[None,None,:],filter_shape=(1,1,IRF.shape[0]))).squeeze()
ft3=ft[0:obs.shape[0]]
return ft3
Samples=2000
with pm.Model() as background_model:
A = pm.Normal('A',mu=1.5e3,sigma=0.5e2)
Tion= pm.Uniform('Tion',lower=0.1,upper=3)
E0=pm.Uniform('E0',lower=2000,upper=3000)
err1=pm.HalfCauchy('err1',3.0)
y_observed=pm.Normal(
"y_observed",
mu=Ft(A,E0,Tion),
sigma=err1,
observed=obs,
)
output = pm.Deterministic('output',Ft(A,E0,Tion))
prior = pm.sample_prior_predictive()
posterior = pm.sample(draws = Samples,target_accept = 0.9,chains=4,cores=4)
posterior_gaussian = pm.sample_posterior_predictive(posterior)
az.plot_trace(posterior, var_names = ['A','E0','Tion',])
result=az.summary(posterior, var_names = ['A','E0','Tion'])
az.plot_ppc(posterior_gaussian, num_pp_samples=100,figsize=(8,8))
print(result)
```