Fitting mixture of binomials

Its not because sample type isnt there, I see. I think that neither scenario you are presenting is the true scenario.

There should be DGP1 + samples drawn purely from background_Rate. see below where I added DGP3

# %%
import numpy as np
import pandas as pd
import scipy.stats as ss
import matplotlib.pyplot as plt

# %%
background_rate = 1e-4
signal_rate = 5e-2

num_obs = 500
simulated_data = pd.DataFrame(
    data=[ss.norm.rvs(loc=10_000, scale=1_500, size=num_obs)], index=["N"]
).T

# number of flips per observation
simulated_data["N"] = simulated_data["N"].astype(int)

# mixing fraction
simulated_data["fraction"] = 0.5

# counts of "background" successes
simulated_data["background_counts"] = ss.binom.rvs(
    n=simulated_data["N"], p=background_rate
)

# counts of "signal" successes
simulated_data["signal_counts"] = ss.binom.rvs(n=simulated_data["N"], p=signal_rate)

# %%
# DGP #1
# for each observation, mix the signal and background successes according to the mixing fraction
simulated_data["observed_counts_DGP1"] = (
    (simulated_data["signal_counts"] * simulated_data["fraction"])
    + (simulated_data["background_counts"] * (1 - simulated_data["fraction"]))
).astype(int)

# %%
# DGP #2
# for each observation, select "background" or "signal" successes according to the mixing fraction
switch = ss.binom.rvs(n=1, p=simulated_data["fraction"])
simulated_data["observed_counts_DGP2"] = np.where(
    switch, simulated_data["signal_counts"], simulated_data["background_counts"]
)

# %%
# DGP #3
# for each observation in class 1, mix the signal and background successes according to the mixing fraction
# for each observation in class 2, only use background successes
simulated_data["observed_counts_DGP3"] = (
    (simulated_data["signal_counts"] * simulated_data["fraction"])
    + (simulated_data["background_counts"] * (1 - simulated_data["fraction"]))
).astype(int)

simulated_data.loc[:250,"observed_counts_DGP3"] = simulated_data.loc[:250,"background_counts"]


# %%
plt.hist(simulated_data["observed_counts_DGP1"], color="r", alpha=0.5)
plt.hist(simulated_data["observed_counts_DGP2"], color="g", alpha=0.5)
plt.hist(simulated_data["observed_counts_DGP3"], color="b", alpha=0.5)
plt.xlabel("# of successes per observation")
plt.ylabel("# observations")
plt.legend(["DGP1", "DGP2", "DGP3"])
plt.show()