Minibatch when latent variable size depends on data dimension

Yeah you can do it two ways:

# Generate data
ntrials = 1000
sig_mu_true = 0.5
mu_true = np.random.normal(0, sig_mu_true, size=ntrials)
X = np.arange(0, ntrials)
Y = mu_true + np.random.normal(0, 0.1, size=ntrials)
# Minibatch
ntrials_mb = 200
Y_mb = pm.Minibatch(Y, ntrials_mb)
X_mb = pm.Minibatch(X, ntrials_mb)

# Set up model
with pm.Model() as m:
    sig_mu = pm.HalfNormal('sig_mu', sd=2.)
    mu = pm.Normal('mu', 0, sd=sig_mu, shape=ntrials)
    Y_obs = pm.Normal('Y_obs', 
                      mu=mu[X_mb], 
                      sd=0.1, 
                      observed=Y_mb, 
                      total_size=ntrials)

# Use ADVI
with m:
    approx = pm.fit(40000, obj_n_mc=5)

or

# Minibatch
ntrials_mb = 200
Y_mb = pm.Minibatch(Y, ntrials_mb)
X_mb = pm.Minibatch(X, ntrials_mb)
Y_shared = theano.shared(Y)
X_shared = theano.shared(X)

# Set up model
with pm.Model() as m2:
    sig_mu = pm.HalfNormal('sig_mu', sd=2.)
    mu = pm.Normal('mu', 0, sd=sig_mu, shape=ntrials)
    Y_obs = pm.Normal('Y_obs', 
                      mu=mu[X_shared], 
                      sd=0.1, 
                      observed=Y_shared, 
                      total_size=ntrials)

# Use ADVI
with m2:
    approx = pm.fit(50000, 
                    obj_n_mc=5, 
                    more_replacements={X_shared: X_mb,
                                       Y_shared: Y_mb})