def test_distribution_output_mean( distr_out: DistributionOutput, data: Tensor, loc: List[Union[None, Tensor]], scale: List[Union[None, Tensor]], expected_batch_shape: Tuple, expected_event_shape: Tuple, ): args_proj = distr_out.get_args_proj() args_proj.initialize() args = args_proj(data) for l, s in product(loc, scale): distr = distr_out.distribution(args, loc=l, scale=s) assert distr.mean.shape == expected_batch_shape + expected_event_shape
def test_distribution_output_shapes( distr_out: DistributionOutput, data: Tensor, loc: List[Union[None, Tensor]], scale: List[Union[None, Tensor]], expected_batch_shape: Tuple, expected_event_shape: Tuple, ): args_proj = distr_out.get_args_proj() args_proj.initialize() args = args_proj(data) assert distr_out.event_shape == expected_event_shape for l, s in product(loc, scale): distr = distr_out.distribution(args, loc=l, scale=s) assert distr.batch_shape == expected_batch_shape assert distr.event_shape == expected_event_shape x = distr.sample() assert x.shape == distr.batch_shape + distr.event_shape loss = distr.loss(x) assert loss.shape == distr.batch_shape x1 = distr.sample(num_samples=1) assert x1.shape == (1, ) + distr.batch_shape + distr.event_shape x3 = distr.sample(num_samples=3) assert x3.shape == (3, ) + distr.batch_shape + distr.event_shape
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> List[np.ndarray]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True) trainer = mx.gluon.Trainer( arg_proj.collect_params(), "sgd", { "learning_rate": learning_rate, "clip_gradient": 10.0 }, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() assert not np.isnan(cumulative_loss) print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) if len(distr_args[0].shape) == 1: return [ param.asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ] # alpha parameter of zero inflated Neg Bin was not returned using param[0] ls = [[p.asnumpy() for p in param] for param in arg_proj(mx.nd.array(np.ones((1, 1))))] return reduce(lambda x, y: x + y, ls)