def test_multivariate_gaussian(hybridize: bool) -> None: num_samples = 2000 dim = 2 mu = np.arange(0, dim) / float(dim) L_diag = np.ones((dim, )) L_low = 0.1 * np.ones((dim, dim)) * np.tri(dim, k=-1) L = np.diag(L_diag) + L_low Sigma = L.dot(L.transpose()) distr = MultivariateGaussian(mu=mx.nd.array(mu), L=mx.nd.array(L)) samples = distr.sample(num_samples) mu_hat, L_hat = maximum_likelihood_estimate_sgd( MultivariateGaussianOutput(dim=dim), samples, init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(10), ) distr = MultivariateGaussian(mu=mx.nd.array([mu_hat]), L=mx.nd.array([L_hat])) Sigma_hat = distr.variance[0].asnumpy() assert np.allclose( mu_hat, mu, atol=0.1, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"Sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
def test_binned_likelihood(num_bins: float, bin_probabilites: np.ndarray, hybridize: bool): """ Test to check that maximizing the likelihood recovers the parameters """ bin_prob = mx.nd.array(bin_probabilites) bin_center = mx.nd.array(np.logspace(-1, 1, num_bins)) # generate samples bin_probs = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_prob bin_centers = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_center distr = Binned(bin_probs.log(), bin_centers) samples = distr.sample() # add some jitter to the uniform initialization and normalize bin_prob_init = mx.nd.random_uniform(1 - TOL, 1 + TOL, num_bins) * bin_prob bin_prob_init = bin_prob_init / bin_prob_init.sum() init_biases = [bin_prob_init] bin_log_prob_hat, _ = maximum_likelihood_estimate_sgd( BinnedOutput(bin_center), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(25), ) bin_prob_hat = np.exp(bin_log_prob_hat) assert all( mx.nd.abs(mx.nd.array(bin_prob_hat) - bin_prob) < TOL * bin_prob ), f"bin_prob did not match: bin_prob = {bin_prob}, bin_prob_hat = {bin_prob_hat}"
def test_poisson_likelihood(rate: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples rates = mx.nd.zeros(NUM_SAMPLES) + rate distr = Poisson(rates) samples = distr.sample() init_biases = [inv_softplus(rate - START_TOL_MULTIPLE * TOL * rate)] rate_hat = maximum_likelihood_estimate_sgd( PoissonOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(20), ) assert (np.abs(rate_hat[0] - rate) < TOL * rate), f"mu did not match: rate = {rate}, rate_hat = {rate_hat}"
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> Iterable[float]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True) trainer = mx.gluon.Trainer( arg_proj.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'clip_gradient': 10.0 }, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) return [ param[0].asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ]
def test_piecewise_linear( gamma: float, slopes: np.ndarray, knot_spacings: np.ndarray, hybridize: bool, ) -> None: ''' Test to check that minimizing the CRPS recovers the quantile function ''' num_samples = 500 # use a few samples for timeout failure gammas = mx.nd.zeros((num_samples, )) + gamma slopess = mx.nd.zeros((num_samples, len(slopes))) + mx.nd.array(slopes) knot_spacingss = mx.nd.zeros( (num_samples, len(knot_spacings))) + mx.nd.array(knot_spacings) pwl_sqf = PiecewiseLinear(gammas, slopess, knot_spacingss) samples = pwl_sqf.sample() # Parameter initialization gamma_init = gamma - START_TOL_MULTIPLE * TOL * gamma slopes_init = slopes - START_TOL_MULTIPLE * TOL * slopes knot_spacings_init = knot_spacings # We perturb knot spacings such that even after the perturbation they sum to 1. mid = len(slopes) // 2 knot_spacings_init[:mid] = (knot_spacings[:mid] - START_TOL_MULTIPLE * TOL * knot_spacings[:mid]) knot_spacings_init[mid:] = (knot_spacings[mid:] + START_TOL_MULTIPLE * TOL * knot_spacings[mid:]) init_biases = [gamma_init, slopes_init, knot_spacings_init] # check if it returns original parameters of mapped gamma_hat, slopes_hat, knot_spacings_hat = maximum_likelihood_estimate_sgd( PiecewiseLinearOutput(len(slopes)), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(20), ) # Since the problem is highly non-convex we may not be able to recover the exact parameters # Here we check if the estimated parameters yield similar function evaluations at different quantile levels. quantile_levels = np.arange(0.1, 1.0, 0.1) # create a LinearSplines instance with the estimated parameters to have access to .quantile pwl_sqf_hat = PiecewiseLinear( mx.nd.array(gamma_hat), mx.nd.array(slopes_hat).expand_dims(axis=0), mx.nd.array(knot_spacings_hat).expand_dims(axis=0), ) # Compute quantiles with the estimated parameters quantiles_hat = np.squeeze( pwl_sqf_hat.quantile(mx.nd.array(quantile_levels).expand_dims(axis=0), axis=1).asnumpy()) # Compute quantiles with the original parameters # Since params is replicated across samples we take only the first entry quantiles = np.squeeze( pwl_sqf.quantile( mx.nd.array(quantile_levels).expand_dims(axis=0).repeat( axis=0, repeats=num_samples), axis=1, ).asnumpy()[0, :]) for ix, (quantile, quantile_hat) in enumerate(zip(quantiles, quantiles_hat)): assert np.abs(quantile_hat - quantile) < TOL * quantile, ( f"quantile level {quantile_levels[ix]} didn't match:" f" " f"q = {quantile}, q_hat = {quantile_hat}")
def test_inflated_beta_likelihood( alpha: float, beta: float, hybridize: bool, inflated_at: str, zero_probability: float, one_probability: float, ) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha betas = mx.nd.zeros((NUM_SAMPLES, )) + beta zero_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + zero_probability one_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + one_probability if inflated_at == "zero": distr = ZeroInflatedBeta(alphas, betas, zero_probability=zero_probabilities) distr_output = ZeroInflatedBetaOutput() elif inflated_at == "one": distr = OneInflatedBeta(alphas, betas, one_probability=one_probabilities) distr_output = OneInflatedBetaOutput() else: distr = ZeroAndOneInflatedBeta( alphas, betas, zero_probability=zero_probabilities, one_probability=one_probabilities, ) distr_output = ZeroAndOneInflatedBetaOutput() samples = distr.sample() init_biases = [ inv_softplus(alpha - START_TOL_MULTIPLE * TOL * alpha), inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta), ] parameters = maximum_likelihood_estimate_sgd( distr_output, samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) if inflated_at == "zero": alpha_hat, beta_hat, zero_probability_hat = parameters assert ( np.abs(zero_probability_hat[0] - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" elif inflated_at == "one": alpha_hat, beta_hat, one_probability_hat = parameters assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" else: ( alpha_hat, beta_hat, zero_probability_hat, one_probability_hat, ) = parameters assert ( np.abs(zero_probability_hat - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert (np.abs(beta_hat - beta) < TOL * beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"
np.abs(total_count_hat - total_count) < TOL * total_count ), f"total_count did not match: total_count = {total_count}, total_count_hat = {total_count_hat}" assert ( np.abs(logit_hat - logit) < TOL * logit ), f"logit did not match: logit = {logit}, logit_hat = {logit_hat}" percentile_tail = 0.05 @pytest.mark.parametrize("percentile_tail", [percentile_tail]) @pytest.mark.parametrize( "np_logits", [[percentile_tail, 1 - 2 * percentile_tail, percentile_tail]] ) @pytest.mark.parametrize( "lower_gp_xi, lower_gp_beta", [(0.4, PositiveFloat(1.5))] ) @pytest.mark.parametrize( "upper_gp_xi, upper_gp_beta", [(0.3, PositiveFloat(1.0))] ) @pytest.mark.timeout(300) @pytest.mark.flaky(max_runs=5, min_passes=1) def test_splicedbinnedpareto_likelihood( percentile_tail: PositiveFloat, np_logits: np.ndarray, lower_gp_xi: float, lower_gp_beta: PositiveFloat, upper_gp_xi: float, upper_gp_beta: PositiveFloat, ) -> None: # percentile_tail = 0.05
def test_box_cox_tranform( lambdas: Tuple[float, float], mu_sigma: Tuple[float, float], hybridize: bool, ): ''' Test to check that maximizing the likelihood recovers the parameters ''' # test instance lam_1, lam_2 = lambdas mu, sigma = mu_sigma # generate samples lamdas_1 = mx.nd.zeros((NUM_SAMPLES,)) + lam_1 lamdas_2 = mx.nd.zeros((NUM_SAMPLES,)) + lam_2 transform = InverseBoxCoxTransform(lamdas_1, lamdas_2) mus = mx.nd.zeros((NUM_SAMPLES,)) + mu sigmas = mx.nd.zeros((NUM_SAMPLES,)) + sigma gausian_distr = Gaussian(mus, sigmas) # Here the base distribution is Guassian which is transformed to # non-Gaussian via the inverse Box-Cox transform. # Sampling from `trans_distr` gives non-Gaussian samples trans_distr = TransformedDistribution(gausian_distr, transform) # Given the non-Gaussian samples find the true parameters # of the Box-Cox transformation as well as the underlying Gaussian distribution. samples = trans_distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), lam_1 - START_TOL_MULTIPLE * TOL * lam_1, inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2), ] mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd( TransformedDistributionOutput( GaussianOutput(), InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True), ), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(18), ) assert ( np.abs(lam_1_hat - lam_1) < TOL * lam_1 ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}" # assert ( # np.abs(lam_2_hat - lam_2) < TOL * lam_2 # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}" assert np.abs(mu_hat - mu) < TOL * np.abs( mu ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert ( np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_empirical_distribution(hybridize: bool) -> None: r""" This verifies if the loss implemented by `EmpiricalDistribution` is correct. This is done by recovering parameters of a parametric distribution not by maximizing likelihood but by optimizing CRPS loss on the Monte Carlo samples drawn from the underlying parametric distribution. More precisely, given observations `obs` drawn from the true distribution p(x; \theta^*), we solve \theta_hat = \argmax_{\theta} CRPS(obs, {x}_i) subject to: x_i ~ p(x; \theta) and verify if \theta^* and \theta_hat agree. This test uses Multivariate Gaussian with diagonal covariance. Once multivariate CRPS is implemented in `EmpiricalDistribution` one could use `LowrankMultivariateGaussian` as well. Any univariate distribution whose `sample_rep` is differentiable can also be used in this test. """ num_obs = 2000 dim = 2 # Multivariate CRPS is not implemented in `EmpiricalDistribution`. rank = 0 W = None mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) Sigma = D distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=W, dim=dim, rank=rank, ) obs = distr.sample(num_obs).squeeze().asnumpy() theta_hat = maximum_likelihood_estimate_sgd( EmpiricalDistributionOutput( num_samples=200, distr_output=LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), ), obs, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) mu_hat, D_hat = theta_hat W_hat = None distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=W_hat, ) Sigma_hat = distr.variance.asnumpy() print(mu_hat, Sigma_hat) assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
def test_lowrank_multivariate_gaussian(hybridize: bool, rank: int) -> None: num_samples = 2000 dim = 2 mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) if rank > 0: W = np.sqrt(np.ones((dim, rank)) * 0.2) Sigma = D + W.dot(W.transpose()) W = mx.nd.array([W]) else: Sigma = D W = None distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=W, dim=dim, rank=rank, ) assert np.allclose( distr.variance[0].asnumpy(), Sigma, atol=0.1, rtol=0.1 ), f"did not match: sigma = {Sigma}, sigma_hat = {distr.variance[0]}" samples = distr.sample(num_samples).squeeze().asnumpy() theta_hat = maximum_likelihood_estimate_sgd( LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), samples, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) if rank > 0: mu_hat, D_hat, W_hat = theta_hat W_hat = mx.nd.array([W_hat]) else: mu_hat, D_hat = theta_hat W_hat = None distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=W_hat, ) Sigma_hat = distr.variance.asnumpy() assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"