def test_deterministic_l2(mu: float, hybridize: bool) -> None:
    """
    Test to check that maximizing the likelihood recovers the parameters.
    This tests uses the Gaussian distribution with fixed variance and sample mean.
    This essentially reduces to determistic L2.
    """
    # generate samples
    mu = mu
    mus = mx.nd.zeros(NUM_SAMPLES) + mu

    deterministic_distr = Gaussian(mu=mus, sigma=0.1 * mx.nd.ones_like(mus))
    samples = deterministic_distr.sample()

    class GaussianFixedVarianceOutput(GaussianOutput):
        @classmethod
        def domain_map(cls, F, mu, sigma):
            sigma = 0.1 * F.ones_like(sigma)
            return mu.squeeze(axis=-1), sigma.squeeze(axis=-1)

    mu_hat, _ = maximum_likelihood_estimate_sgd(
        GaussianFixedVarianceOutput(),
        samples,
        init_biases=[3 * mu, 0.1],
        hybridize=hybridize,
        num_epochs=PositiveInt(1),
    )

    assert (np.abs(mu_hat - mu) <
            TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
def test_gaussian_likelihood(mu: float, sigma: float, hybridize: bool):
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    # generate samples
    mus = mx.nd.zeros((NUM_SAMPLES, )) + mu
    sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma

    distr = Gaussian(mus, sigmas)
    samples = distr.sample()

    init_biases = [
        mu - START_TOL_MULTIPLE * TOL * mu,
        inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma),
    ]

    mu_hat, sigma_hat = maximum_likelihood_estimate_sgd(
        GaussianOutput(),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.001),
        num_epochs=PositiveInt(5),
    )

    assert (np.abs(mu_hat - mu) <
            TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert (np.abs(sigma_hat - sigma) < TOL * sigma
            ), f"alpha did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"