示例#1
0
def test_multivariate_gaussian(hybridize: bool) -> None:
    num_samples = 2000
    dim = 2

    mu = np.arange(0, dim) / float(dim)

    L_diag = np.ones((dim, ))
    L_low = 0.1 * np.ones((dim, dim)) * np.tri(dim, k=-1)
    L = np.diag(L_diag) + L_low
    Sigma = L.dot(L.transpose())

    distr = MultivariateGaussian(mu=mx.nd.array(mu), L=mx.nd.array(L))

    samples = distr.sample(num_samples)

    mu_hat, L_hat = maximum_likelihood_estimate_sgd(
        MultivariateGaussianOutput(dim=dim),
        samples,
        init_biases=
        None,  # todo we would need to rework biases a bit to use it in the multivariate case
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(10),
    )

    distr = MultivariateGaussian(mu=mx.nd.array([mu_hat]),
                                 L=mx.nd.array([L_hat]))

    Sigma_hat = distr.variance[0].asnumpy()

    assert np.allclose(
        mu_hat, mu, atol=0.1,
        rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert np.allclose(
        Sigma_hat, Sigma, atol=0.1, rtol=0.1
    ), f"Sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
示例#2
0
def test_binned_likelihood(num_bins: float, bin_probabilites: np.ndarray,
                           hybridize: bool):
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    bin_prob = mx.nd.array(bin_probabilites)
    bin_center = mx.nd.array(np.logspace(-1, 1, num_bins))

    # generate samples
    bin_probs = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_prob
    bin_centers = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_center

    distr = Binned(bin_probs.log(), bin_centers)
    samples = distr.sample()

    # add some jitter to the uniform initialization and normalize
    bin_prob_init = mx.nd.random_uniform(1 - TOL, 1 + TOL, num_bins) * bin_prob
    bin_prob_init = bin_prob_init / bin_prob_init.sum()

    init_biases = [bin_prob_init]

    bin_log_prob_hat, _ = maximum_likelihood_estimate_sgd(
        BinnedOutput(bin_center),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.05),
        num_epochs=PositiveInt(25),
    )

    bin_prob_hat = np.exp(bin_log_prob_hat)

    assert all(
        mx.nd.abs(mx.nd.array(bin_prob_hat) - bin_prob) < TOL * bin_prob
    ), f"bin_prob did not match: bin_prob = {bin_prob}, bin_prob_hat = {bin_prob_hat}"
示例#3
0
def test_poisson_likelihood(rate: float, hybridize: bool) -> None:
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    # generate samples
    rates = mx.nd.zeros(NUM_SAMPLES) + rate

    distr = Poisson(rates)
    samples = distr.sample()

    init_biases = [inv_softplus(rate - START_TOL_MULTIPLE * TOL * rate)]

    rate_hat = maximum_likelihood_estimate_sgd(
        PoissonOutput(),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.05),
        num_epochs=PositiveInt(20),
    )

    assert (np.abs(rate_hat[0] - rate) < TOL *
            rate), f"mu did not match: rate = {rate}, rate_hat = {rate_hat}"
示例#4
0
def maximum_likelihood_estimate_sgd(
    distr_output: DistributionOutput,
    samples: mx.ndarray,
    init_biases: List[mx.ndarray.NDArray] = None,
    num_epochs: PositiveInt = PositiveInt(5),
    learning_rate: PositiveFloat = PositiveFloat(1e-2),
    hybridize: bool = True,
) -> Iterable[float]:
    model_ctx = mx.cpu()

    arg_proj = distr_output.get_args_proj()
    arg_proj.initialize()

    if hybridize:
        arg_proj.hybridize()

    if init_biases is not None:
        for param, bias in zip(arg_proj.proj, init_biases):
            param.params[param.prefix + "bias"].initialize(
                mx.initializer.Constant(bias), force_reinit=True)

    trainer = mx.gluon.Trainer(
        arg_proj.collect_params(),
        'sgd',
        {
            'learning_rate': learning_rate,
            'clip_gradient': 10.0
        },
    )

    # The input data to our model is one-dimensional
    dummy_data = mx.nd.array(np.ones((len(samples), 1)))

    train_data = mx.gluon.data.DataLoader(
        mx.gluon.data.ArrayDataset(dummy_data, samples),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    for e in range(num_epochs):
        cumulative_loss = 0
        num_batches = 0
        # inner loop
        for i, (data, sample_label) in enumerate(train_data):
            data = data.as_in_context(model_ctx)
            sample_label = sample_label.as_in_context(model_ctx)
            with mx.autograd.record():
                distr_args = arg_proj(data)
                distr = distr_output.distribution(distr_args)
                loss = distr.loss(sample_label)
                if not hybridize:
                    assert loss.shape == distr.batch_shape
            loss.backward()
            trainer.step(BATCH_SIZE)
            num_batches += 1

            cumulative_loss += mx.nd.mean(loss).asscalar()
        print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches))

    return [
        param[0].asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1))))
    ]
示例#5
0
def test_piecewise_linear(
    gamma: float,
    slopes: np.ndarray,
    knot_spacings: np.ndarray,
    hybridize: bool,
) -> None:
    '''
    Test to check that minimizing the CRPS recovers the quantile function
    '''
    num_samples = 500  # use a few samples for timeout failure

    gammas = mx.nd.zeros((num_samples, )) + gamma
    slopess = mx.nd.zeros((num_samples, len(slopes))) + mx.nd.array(slopes)
    knot_spacingss = mx.nd.zeros(
        (num_samples, len(knot_spacings))) + mx.nd.array(knot_spacings)

    pwl_sqf = PiecewiseLinear(gammas, slopess, knot_spacingss)

    samples = pwl_sqf.sample()

    # Parameter initialization
    gamma_init = gamma - START_TOL_MULTIPLE * TOL * gamma
    slopes_init = slopes - START_TOL_MULTIPLE * TOL * slopes
    knot_spacings_init = knot_spacings
    # We perturb knot spacings such that even after the perturbation they sum to 1.
    mid = len(slopes) // 2
    knot_spacings_init[:mid] = (knot_spacings[:mid] -
                                START_TOL_MULTIPLE * TOL * knot_spacings[:mid])
    knot_spacings_init[mid:] = (knot_spacings[mid:] +
                                START_TOL_MULTIPLE * TOL * knot_spacings[mid:])

    init_biases = [gamma_init, slopes_init, knot_spacings_init]

    # check if it returns original parameters of mapped
    gamma_hat, slopes_hat, knot_spacings_hat = maximum_likelihood_estimate_sgd(
        PiecewiseLinearOutput(len(slopes)),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(20),
    )

    # Since the problem is highly non-convex we may not be able to recover the exact parameters
    # Here we check if the estimated parameters yield similar function evaluations at different quantile levels.
    quantile_levels = np.arange(0.1, 1.0, 0.1)

    # create a LinearSplines instance with the estimated parameters to have access to .quantile
    pwl_sqf_hat = PiecewiseLinear(
        mx.nd.array(gamma_hat),
        mx.nd.array(slopes_hat).expand_dims(axis=0),
        mx.nd.array(knot_spacings_hat).expand_dims(axis=0),
    )

    # Compute quantiles with the estimated parameters
    quantiles_hat = np.squeeze(
        pwl_sqf_hat.quantile(mx.nd.array(quantile_levels).expand_dims(axis=0),
                             axis=1).asnumpy())

    # Compute quantiles with the original parameters
    # Since params is replicated across samples we take only the first entry
    quantiles = np.squeeze(
        pwl_sqf.quantile(
            mx.nd.array(quantile_levels).expand_dims(axis=0).repeat(
                axis=0, repeats=num_samples),
            axis=1,
        ).asnumpy()[0, :])

    for ix, (quantile, quantile_hat) in enumerate(zip(quantiles,
                                                      quantiles_hat)):
        assert np.abs(quantile_hat - quantile) < TOL * quantile, (
            f"quantile level {quantile_levels[ix]} didn't match:"
            f" "
            f"q = {quantile}, q_hat = {quantile_hat}")
def test_inflated_beta_likelihood(
    alpha: float,
    beta: float,
    hybridize: bool,
    inflated_at: str,
    zero_probability: float,
    one_probability: float,
) -> None:
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    # generate samples
    alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha
    betas = mx.nd.zeros((NUM_SAMPLES, )) + beta

    zero_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + zero_probability
    one_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + one_probability
    if inflated_at == "zero":
        distr = ZeroInflatedBeta(alphas,
                                 betas,
                                 zero_probability=zero_probabilities)
        distr_output = ZeroInflatedBetaOutput()
    elif inflated_at == "one":
        distr = OneInflatedBeta(alphas,
                                betas,
                                one_probability=one_probabilities)
        distr_output = OneInflatedBetaOutput()

    else:
        distr = ZeroAndOneInflatedBeta(
            alphas,
            betas,
            zero_probability=zero_probabilities,
            one_probability=one_probabilities,
        )
        distr_output = ZeroAndOneInflatedBetaOutput()

    samples = distr.sample()

    init_biases = [
        inv_softplus(alpha - START_TOL_MULTIPLE * TOL * alpha),
        inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta),
    ]

    parameters = maximum_likelihood_estimate_sgd(
        distr_output,
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.05),
        num_epochs=PositiveInt(10),
    )

    if inflated_at == "zero":
        alpha_hat, beta_hat, zero_probability_hat = parameters

        assert (
            np.abs(zero_probability_hat[0] - zero_probability) <
            TOL * zero_probability
        ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}"

    elif inflated_at == "one":
        alpha_hat, beta_hat, one_probability_hat = parameters

        assert (
            np.abs(one_probability_hat - one_probability) <
            TOL * one_probability
        ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}"
    else:
        (
            alpha_hat,
            beta_hat,
            zero_probability_hat,
            one_probability_hat,
        ) = parameters

        assert (
            np.abs(zero_probability_hat - zero_probability) <
            TOL * zero_probability
        ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}"
        assert (
            np.abs(one_probability_hat - one_probability) <
            TOL * one_probability
        ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}"

    assert (np.abs(alpha_hat - alpha) < TOL * alpha
            ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}"
    assert (np.abs(beta_hat - beta) < TOL *
            beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"
示例#7
0
        np.abs(total_count_hat - total_count) < TOL * total_count
    ), f"total_count did not match: total_count = {total_count}, total_count_hat = {total_count_hat}"
    assert (
        np.abs(logit_hat - logit) < TOL * logit
    ), f"logit did not match: logit = {logit}, logit_hat = {logit_hat}"


percentile_tail = 0.05


@pytest.mark.parametrize("percentile_tail", [percentile_tail])
@pytest.mark.parametrize(
    "np_logits", [[percentile_tail, 1 - 2 * percentile_tail, percentile_tail]]
)
@pytest.mark.parametrize(
    "lower_gp_xi, lower_gp_beta", [(0.4, PositiveFloat(1.5))]
)
@pytest.mark.parametrize(
    "upper_gp_xi, upper_gp_beta", [(0.3, PositiveFloat(1.0))]
)
@pytest.mark.timeout(300)
@pytest.mark.flaky(max_runs=5, min_passes=1)
def test_splicedbinnedpareto_likelihood(
    percentile_tail: PositiveFloat,
    np_logits: np.ndarray,
    lower_gp_xi: float,
    lower_gp_beta: PositiveFloat,
    upper_gp_xi: float,
    upper_gp_beta: PositiveFloat,
) -> None:
    # percentile_tail = 0.05
示例#8
0
def test_box_cox_tranform(
    lambdas: Tuple[float, float],
    mu_sigma: Tuple[float, float],
    hybridize: bool,
):
    '''
    Test to check that maximizing the likelihood recovers the parameters
    '''
    # test instance
    lam_1, lam_2 = lambdas
    mu, sigma = mu_sigma

    # generate samples
    lamdas_1 = mx.nd.zeros((NUM_SAMPLES,)) + lam_1
    lamdas_2 = mx.nd.zeros((NUM_SAMPLES,)) + lam_2
    transform = InverseBoxCoxTransform(lamdas_1, lamdas_2)

    mus = mx.nd.zeros((NUM_SAMPLES,)) + mu
    sigmas = mx.nd.zeros((NUM_SAMPLES,)) + sigma
    gausian_distr = Gaussian(mus, sigmas)

    # Here the base distribution is Guassian which is transformed to
    # non-Gaussian via the inverse Box-Cox transform.
    # Sampling from `trans_distr` gives non-Gaussian samples
    trans_distr = TransformedDistribution(gausian_distr, transform)

    # Given the non-Gaussian samples find the true parameters
    # of the Box-Cox transformation as well as the underlying Gaussian distribution.
    samples = trans_distr.sample()

    init_biases = [
        mu - START_TOL_MULTIPLE * TOL * mu,
        inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma),
        lam_1 - START_TOL_MULTIPLE * TOL * lam_1,
        inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2),
    ]

    mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd(
        TransformedDistributionOutput(
            GaussianOutput(),
            InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True),
        ),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(18),
    )

    assert (
        np.abs(lam_1_hat - lam_1) < TOL * lam_1
    ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}"
    # assert (
    #     np.abs(lam_2_hat - lam_2) < TOL * lam_2
    # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}"

    assert np.abs(mu_hat - mu) < TOL * np.abs(
        mu
    ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert (
        np.abs(sigma_hat - sigma) < TOL * sigma
    ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_empirical_distribution(hybridize: bool) -> None:
    r"""
    This verifies if the loss implemented by `EmpiricalDistribution` is correct.
    This is done by recovering parameters of a parametric distribution not by maximizing likelihood but by
    optimizing CRPS loss on the Monte Carlo samples drawn from the underlying parametric distribution.

    More precisely, given observations `obs` drawn from the true distribution p(x; \theta^*), we solve

                \theta_hat = \argmax_{\theta} CRPS(obs, {x}_i)
                             subject to:      x_i ~ p(x; \theta)

    and verify if \theta^* and \theta_hat agree.

    This test uses Multivariate Gaussian with diagonal covariance. Once multivariate CRPS is implemented in
    `EmpiricalDistribution` one could use `LowrankMultivariateGaussian` as well. Any univariate distribution whose
    `sample_rep` is differentiable can also be used in this test.

    """
    num_obs = 2000
    dim = 2

    # Multivariate CRPS is not implemented in `EmpiricalDistribution`.
    rank = 0
    W = None

    mu = np.arange(0, dim) / float(dim)
    D = np.eye(dim) * (np.arange(dim) / dim + 0.5)
    Sigma = D

    distr = LowrankMultivariateGaussian(
        mu=mx.nd.array([mu]),
        D=mx.nd.array([np.diag(D)]),
        W=W,
        dim=dim,
        rank=rank,
    )

    obs = distr.sample(num_obs).squeeze().asnumpy()

    theta_hat = maximum_likelihood_estimate_sgd(
        EmpiricalDistributionOutput(
            num_samples=200,
            distr_output=LowrankMultivariateGaussianOutput(dim=dim,
                                                           rank=rank,
                                                           sigma_init=0.2,
                                                           sigma_minimum=0.0),
        ),
        obs,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(25),
        init_biases=
        None,  # todo we would need to rework biases a bit to use it in the multivariate case
        hybridize=hybridize,
    )

    mu_hat, D_hat = theta_hat
    W_hat = None

    distr = LowrankMultivariateGaussian(
        dim=dim,
        rank=rank,
        mu=mx.nd.array([mu_hat]),
        D=mx.nd.array([D_hat]),
        W=W_hat,
    )

    Sigma_hat = distr.variance.asnumpy()

    print(mu_hat, Sigma_hat)

    assert np.allclose(
        mu_hat, mu, atol=0.2,
        rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"

    assert np.allclose(
        Sigma_hat, Sigma, atol=0.1, rtol=0.1
    ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
def test_lowrank_multivariate_gaussian(hybridize: bool, rank: int) -> None:
    num_samples = 2000
    dim = 2

    mu = np.arange(0, dim) / float(dim)
    D = np.eye(dim) * (np.arange(dim) / dim + 0.5)
    if rank > 0:
        W = np.sqrt(np.ones((dim, rank)) * 0.2)
        Sigma = D + W.dot(W.transpose())
        W = mx.nd.array([W])
    else:
        Sigma = D
        W = None

    distr = LowrankMultivariateGaussian(
        mu=mx.nd.array([mu]),
        D=mx.nd.array([np.diag(D)]),
        W=W,
        dim=dim,
        rank=rank,
    )

    assert np.allclose(
        distr.variance[0].asnumpy(), Sigma, atol=0.1, rtol=0.1
    ), f"did not match: sigma = {Sigma}, sigma_hat = {distr.variance[0]}"

    samples = distr.sample(num_samples).squeeze().asnumpy()

    theta_hat = maximum_likelihood_estimate_sgd(
        LowrankMultivariateGaussianOutput(dim=dim,
                                          rank=rank,
                                          sigma_init=0.2,
                                          sigma_minimum=0.0),
        samples,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(25),
        init_biases=
        None,  # todo we would need to rework biases a bit to use it in the multivariate case
        hybridize=hybridize,
    )

    if rank > 0:
        mu_hat, D_hat, W_hat = theta_hat
        W_hat = mx.nd.array([W_hat])
    else:
        mu_hat, D_hat = theta_hat
        W_hat = None

    distr = LowrankMultivariateGaussian(
        dim=dim,
        rank=rank,
        mu=mx.nd.array([mu_hat]),
        D=mx.nd.array([D_hat]),
        W=W_hat,
    )

    Sigma_hat = distr.variance.asnumpy()

    assert np.allclose(
        mu_hat, mu, atol=0.2,
        rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"

    assert np.allclose(
        Sigma_hat, Sigma, atol=0.1, rtol=0.1
    ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"