Adding (length safe) Exponential Weighted Moving Average based on this answer to have same functionality as pandas.DataFrame.ewm
Also this one support numpyro sampler
import numpy as np
import pytensor.tensor as pt
import matplotlib.pyplot as plt
def get_max_row_size(alpha, dtype=float):
assert 0. <= alpha < 1.
epsilon = np.finfo(dtype).tiny
return int(np.log(epsilon)/np.log(1-alpha)) + 1
def pt_ewma(data, alpha, shape: int, max_chunk_size: int):
# Based on: https://stackoverflow.com/questions/42869495/numpy-version-of-exponential-weighted-moving-average-equivalent-to-pandas-ewm
def pt_ewma_func(x, offset):
scaling_factors = (1 - alpha)**(pt.arange(x.shape[0] + 1))
out = (x * (alpha * scaling_factors[-2]) / scaling_factors[:-1]).cumsum()
out /= scaling_factors[-2::-1]
out += offset * scaling_factors[1:]
return out
n_chunks = shape // max_chunk_size
n_trails = shape % max_chunk_size
if n_chunks == 0:
return pt_ewma_func(data, offset=data[0])
scaling_factors = (1 - alpha) ** pt.arange(1, max_chunk_size + 1)
last_scaling_factor = scaling_factors[-1]
out_main = pt.as_tensor_variable([
pt_ewma_func(data[ i*max_chunk_sz: (i + 1)*max_chunk_sz ], offset=0) for i in np.arange(n_chunks)
])
offsets = [data[0]]
for i in np.arange(1, n_chunks):
offsets.append(offsets[i - 1] * last_scaling_factor + out_main[i - 1, -1])
offsets = pt.as_tensor_variable(offsets)
if n_chunks > 0:
out_main += offsets.reshape((n_chunks, 1)) * scaling_factors.reshape((1, max_chunk_size))
out_tail = [] if n_trails == 0 else pt_ewma_func(data[-n_trails:], offset=out_main[-1, -1])
return pt.concatenate([out_main.reshape((n_chunks * max_chunk_size, )), out_tail])
window_sz = 30
alpha = 2/(window_sz + 1.0)
max_chunk_sz = get_max_row_size(alpha, np.float32)
np.random.seed(1337)
grw = np.cumsum(np.random.normal(size=max_chunk_sz + 10))
pt_grw = pt.as_tensor_variable(grw)
plt.figure(figsize=(15, 5))
plt.plot(grw)
plt.plot(pt_ewma(pt_grw, alpha, shape=grw.shape[0], max_chunk_size=max_chunk_sz).eval())
plt.show()
And median filtering - model after scipy.signal.medfilt. This version will have boundary artifacts, but I’m not sure how to do it fast (numpyro not supported)
def pt_medfilt(arr, kernel_sz, fill_value=0):
if kernel_sz % 2 == 0:
med_arr = pt.stack([
*[pt.concatenate([arr[i:], (fill_value*pt.ones(i))]) for i in range(kernel_sz // 2, 0, -1)],
*[pt.concatenate([(fill_value*pt.ones(i)),arr[:-i]]) for i in range(1, kernel_sz // 2 + 1)],
]).sort(axis=0)
idx = (kernel_sz // 2)
return (med_arr[idx, :] + med_arr[idx + 1, :])/2
else:
med_arr = pt.stack([
*[pt.concatenate([arr[i:], (fill_value*pt.ones(i))]) for i in range(kernel_sz // 2, 0, -1)],
arr,
*[pt.concatenate([(fill_value*pt.ones(i)),arr[:-i]]) for i in range(1, kernel_sz // 2 + 1)],
]).sort(axis=0)
idx = (kernel_sz // 2)
return med_arr[idx, :]
window_sz = 30
np.random.seed(1337)
grw = np.cumsum(np.random.normal(size=1000))
pt_grw = pt.as_tensor_variable(grw)
plt.figure(figsize=(15, 5))
plt.plot(grw)
plt.plot(pt_medfilt(pt_grw, window_sz).eval())
plt.show()

