Weird effective sample size instability

Following suggestions in Colin’s blog post and elsewhere I tried to use tr.ordered in my component mixtures, but to no avail.
Here is my changed code, that still exhibits label switching (which I can confirm from looking at traceplots.)

import pymc3 as pm
from pymc3 import sample, Model, effective_n
import pymc3.distributions.transforms as tr
import numpy as np
import theano.tensor as tt


for i in range(3):
    with Model():
        mu1 = pm.Normal.dist(
            mu=0.,
            sd=np.sqrt(0.1),
            transform=tr.ordered,
            testval=np.asarray([-1, 0, 1])
        )
        mu2 = pm.Normal.dist(
            mu=4.,
            sd=np.sqrt(0.1),
            transform=tr.ordered,
            testval=np.asarray([-1, 0, 1])
        )

        energy = pm.Mixture(
            "GaussianMixture1D",
            w=[1. / 2., 1. / 2.],
            comp_dists=[
                mu1, mu2
            ]
        )
        trace = sample(njobs=1, tune=3000, 
                      discard_tuned_samples=False,
                       draws=5000, chains=2)

        pm.traceplot(trace)
        import matplotlib.pyplot as plt
        plt.show()
        print(effective_n(trace))