Vectorizing slow theano variable operations

I am trying to compute the function below with theano/aesara in an preferably vectorized manner:

image

The solution i have is not vectorized and therefore way too slow:

def apply_adstock_with_lag(x, L, P, D):
    """
    params:
    x: original array
    L: length
    P: peak, delay in effect
    D: decay, retain
    """
    x = np.append(np.zeros(L - 1), x)

    weights = [0 for _ in range(L)]
    for l in range(L):
        weight = D ** ((l - P) ** 2)
        weights[L - 1 - l] = weight
    weights = np.array(weights)
    adstocked_x = []
    for i in range(L - 1, len(x)):
        x_array = x[i - L + 1:i + 1]
        xi = sum(x_array * weights) / sum(weights)
        adstocked_x.append(xi)
    adstocked_x = tt.as_tensor_variable(adstocked_x)
    return adstocked_x

An similar function although simplier and its vectorized solution can be found below, note that this is much much quicker probably due to the vectorized operations:

image

def adstock_geometric_theano_pymc3(x, theta):
    x = tt.as_tensor_variable(x)

    def adstock_geometric_recurrence_theano(index, input_x, decay_x, theta):
        return tt.set_subtensor(decay_x[index], tt.sum(input_x + theta * decay_x[index - 1]))

    len_observed = x.shape[0]

    x_decayed = tt.zeros_like(x)
    x_decayed = tt.set_subtensor(x_decayed[0], x[0])

    output, _ = theano.scan(
        fn=adstock_geometric_recurrence_theano,
        sequences=[tt.arange(1, len_observed), x[1:len_observed]],
        outputs_info=x_decayed,
        non_sequences=theta,
        n_steps=len_observed - 1
    )

    return output[-1]

I cant come up with the vectorized solution to my adstock-function, can anyone give it a go?