def test_lowrank_multivariate_gaussian(hybridize: bool) -> None: num_samples = 2000 dim = 2 rank = 1 mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) W = np.sqrt(np.ones((dim, rank)) * 0.2) Sigma = D + W.dot(W.transpose()) distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=mx.nd.array([W]), dim=dim, rank=rank, ) assert np.allclose( distr.variance[0].asnumpy(), Sigma, atol=0.1, rtol=0.1 ), f"did not match: sigma = {Sigma}, sigma_hat = {distr.variance[0]}" samples = distr.sample(num_samples).squeeze().asnumpy() mu_hat, D_hat, W_hat = maximum_likelihood_estimate_sgd( LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), samples, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=mx.nd.array([W_hat]), ) Sigma_hat = distr.variance.asnumpy() assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
( GammaOutput(), mx.nd.random.gamma(shape=(3, 4, 5, 6)), [None, mx.nd.ones(shape=(3, 4, 5))], (3, 4, 5), (), ), ( MultivariateGaussianOutput(dim=5), mx.nd.random.normal(shape=(3, 4, 10)), [None, mx.nd.ones(shape=(3, 4, 5))], (3, 4), (5, ), ), ( LowrankMultivariateGaussianOutput(dim=5, rank=4), mx.nd.random.normal(shape=(3, 4, 10)), [None, mx.nd.ones(shape=(3, 4, 5))], (3, 4), (5, ), ), ( DirichletOutput(dim=5), mx.nd.random.gamma(shape=(3, 4, 5)), [None], (3, 4), (5, ), ), ( DirichletMultinomialOutput(dim=5, n_trials=10), mx.nd.random.gamma(shape=(3, 4, 5)),
) dataset = load_multivariate_constant_dataset() target_dim = int(dataset.metadata.feat_static_cat[0].cardinality) metadata = dataset.metadata estimator = DeepVAREstimator @pytest.mark.timeout(10) @pytest.mark.parametrize( "distr_output, num_batches_per_epoch, Estimator, hybridize, " "use_marginal_transformation", [ ( LowrankMultivariateGaussianOutput(dim=target_dim, rank=2), 10, estimator, True, True, ), ( LowrankMultivariateGaussianOutput(dim=target_dim, rank=2), 10, estimator, False, False, ), ( LowrankMultivariateGaussianOutput(dim=target_dim, rank=2), 10,
def __init__( self, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_parallel_samples: int = 100, dropout_rate: float = 0.1, cardinality: List[int] = [1], embedding_dimension: int = 5, distr_output: Optional[DistributionOutput] = None, rank: Optional[int] = 5, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, conditioning_length: int = 200, use_marginal_transformation=False, **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert all( [c > 0 for c in cardinality] ), "Elements of `cardinality` should be > 0" assert ( embedding_dimension > 0 ), "The value of `embedding_dimension` should be > 0" self.freq = freq self.context_length = ( context_length if context_length is not None else prediction_length ) if distr_output is not None: self.distr_output = distr_output else: self.distr_output = LowrankMultivariateGaussianOutput( dim=target_dim, rank=rank ) self.prediction_length = prediction_length self.target_dim = target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.conditioning_length = conditioning_length self.use_marginal_transformation = use_marginal_transformation self.lags_seq = ( lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) ) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling if self.use_marginal_transformation: self.output_transform: Optional[ Callable ] = cdf_to_gaussian_forward_transform else: self.output_transform = None