def test_conditional_missing_conditional_shape(self): with self.assertRaisesRegexp( ValueError, "`conditional_event_shape` must be provided when `conditional` is True" ): tfb.AutoregressiveNetwork(params=2, conditional=True, event_shape=[4])
def test_conditional_false_with_shape(self): with self.assertRaisesRegexp( ValueError, "`conditional_event_shape` passed but `conditional` is set to False." ): tfb.AutoregressiveNetwork(params=2, conditional_event_shape=[4])
def test_doc_string(self): # Generate data. n = 2000 x2 = np.random.randn(n).astype(dtype=np.float32) * 2. x1 = np.random.randn(n).astype(dtype=np.float32) + (x2 * x2 / 4.) data = np.stack([x1, x2], axis=-1) # Density estimation with MADE. made = tfb.AutoregressiveNetwork(params=2, hidden_units=[10, 10]) distribution = tfd.TransformedDistribution( distribution=tfd.Sample(tfd.Normal(0., 1.), [2]), bijector=tfb.MaskedAutoregressiveFlow(made)) # Construct and fit model. x_ = tfkl.Input(shape=(2,), dtype=tf.float32) log_prob_ = distribution.log_prob(x_) model = tfk.Model(x_, log_prob_) model.compile(optimizer=tf1.train.AdamOptimizer(), loss=lambda _, log_prob: -log_prob) batch_size = 25 model.fit(x=data, y=np.zeros((n, 0), dtype=np.float32), batch_size=batch_size, epochs=1, steps_per_epoch=1, # Usually `n // batch_size`. shuffle=True, verbose=True) # Use the fitted distribution. self.assertAllEqual((3, 1, 2), distribution.sample((3, 1)).shape) self.assertAllEqual( (3,), distribution.log_prob(np.ones((3, 2), dtype=np.float32)).shape)
def test_doc_string_2(self): n = 2000 c = np.r_[np.zeros(n // 2), np.ones(n // 2)] mean_0, mean_1 = 0, 5 x = np.r_[np.random.randn(n // 2).astype(dtype=np.float32) + mean_0, np.random.randn(n // 2).astype(dtype=np.float32) + mean_1] shuffle_idxs = np.arange(n) np.random.shuffle(shuffle_idxs) x = x[shuffle_idxs] c = c[shuffle_idxs] seed = test_util.test_seed_stream() # Density estimation with MADE. made = tfb.AutoregressiveNetwork( params=2, hidden_units=[1], event_shape=(1, ), kernel_initializer=tfk.initializers.VarianceScaling(0.1, seed=seed() % 2**31), conditional=True, conditional_event_shape=(1, )) distribution = tfd.TransformedDistribution( distribution=tfd.Sample(tfd.Normal(loc=0., scale=1.), sample_shape=[1]), bijector=tfb.MaskedAutoregressiveFlow(made)) # Construct and fit model. x_ = tfkl.Input(shape=(1, ), dtype=tf.float32) c_ = tfkl.Input(shape=(1, ), dtype=tf.float32) log_prob_ = distribution.log_prob( x_, bijector_kwargs={"conditional_input": c_}) model = tfk.Model([x_, c_], log_prob_) model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.1), loss=lambda _, log_prob: -log_prob) batch_size = 25 model.fit(x=[x, c], y=np.zeros((n, 0), dtype=np.float32), batch_size=batch_size, epochs=3, steps_per_epoch=n // batch_size, shuffle=False, verbose=True) # Use the fitted distribution to sample condition on c = 1 n_samples = 1000 cond = 1 samples = distribution.sample((n_samples, ), bijector_kwargs={ "conditional_input": cond * np.ones((n_samples, 1)) }, seed=seed()) # Assert mean is close to conditional mean self.assertAllMeansClose(samples[..., 0], mean_1, axis=0, atol=1.)
def test_conditional_wrong_shape(self): with self.assertRaisesRegexp( ValueError, "Parameter `conditional_event_shape` must describe a rank-1 shape" ): tfb.AutoregressiveNetwork(params=2, conditional=True, event_shape=[4], conditional_event_shape=[10, 4])
def test_layer_no_hidden_units(self): made = tfb.AutoregressiveNetwork( params=4, event_shape=3, use_bias=False, hidden_degrees="random", kernel_constraint="unit_norm") self.assertEqual((2, 2, 5, 3, 4), made(np.zeros((2, 2, 5, 3))).shape) self.assertEqual(3 * 12, self._count_trainable_params(made)) if not tf.executing_eagerly(): self.evaluate( tf1.initializers.variables(made.trainable_variables)) self.assertIsAutoregressive(made, event_size=3, order="left-to-right")
def test_layer_v2_kernel_initializer(self): init = tf.keras.initializers.GlorotNormal() made = tfb.AutoregressiveNetwork( params=2, event_shape=4, activation="relu", hidden_units=[5, 5], kernel_initializer=init) self.assertEqual((4, 2), made(np.zeros(4)).shape) self.assertEqual(5 * 5 + 6 * 5 + 6 * 8, self._count_trainable_params(made)) if not tf.executing_eagerly(): self.evaluate( tf1.initializers.variables(made.trainable_variables)) self.assertIsAutoregressive(made, event_size=4, order="left-to-right")
def test_layer_smaller_hidden_layers_than_input(self): made = tfb.AutoregressiveNetwork( params=1, event_shape=9, activation="relu", use_bias=False, bias_regularizer=tfk.regularizers.l1(0.5), bias_constraint=tf.math.abs, input_order="right-to-left", hidden_units=[5, 5]) self.assertEqual((9, 1), made(np.zeros(9)).shape) self.assertEqual(9 * 5 + 5 * 5 + 5 * 9, self._count_trainable_params(made)) if not tf.executing_eagerly(): self.evaluate( tf1.initializers.variables(made.trainable_variables)) self.assertIsAutoregressive(made, event_size=9, order="right-to-left")
def test_layer_callable_activation(self): made = tfb.AutoregressiveNetwork( params=2, activation=tf.math.exp, input_order="random", kernel_regularizer=tfk.regularizers.l2(0.1), bias_initializer="ones", hidden_units=[9], hidden_degrees="equal") self.assertEqual((3, 5, 2), made(np.zeros((3, 5))).shape) self.assertEqual(6 * 9 + 10 * 10, self._count_trainable_params(made)) if not tf.executing_eagerly(): self.evaluate( tf1.initializers.variables(made.trainable_variables)) self.assertIsAutoregressive(made, event_size=5, order=made._input_order)
def test_layer_right_to_left_float64(self): made = tfb.AutoregressiveNetwork( params=3, event_shape=4, activation=None, input_order="right-to-left", dtype=tf.float64, hidden_degrees="random", hidden_units=[10, 7, 10]) self.assertEqual((4, 3), made(np.zeros(4, dtype=np.float64)).shape) self.assertEqual(5 * 10 + 11 * 7 + 8 * 10 + 11 * 12, self._count_trainable_params(made)) if not tf.executing_eagerly(): self.evaluate( tf1.initializers.variables(made.trainable_variables)) self.assertIsAutoregressive(made, event_size=4, order="right-to-left")
def test_conditional_missing_tensor(self): with self.assertRaisesRegexp( ValueError, "`conditional_input` must be passed as a named argument"): made = tfb.AutoregressiveNetwork(params=2, event_shape=[4], conditional=True, conditional_event_shape=[6]) made(np.random.normal(0, 1, (1, 4)))
def test_conditional_incorrect_layers(self): with self.assertRaisesRegexp( ValueError, "`conditional_input_layers` must be \"first_layers\" or \"all_layers\"" ): tfb.AutoregressiveNetwork( params=2, conditional=True, event_shape=[4], conditional_event_shape=[4], conditional_input_layers="non-existent-option")
def _masked_autoregressive_shift_and_log_scale_fn(hidden_units, shift_only=False, activation="relu", name=None, **kwargs): params = 1 if shift_only else 2 layer = tfb.AutoregressiveNetwork(params, hidden_units=hidden_units, activation=activation, name=name, **kwargs) if shift_only: return lambda x: (layer(x)[..., 0], None) return layer
def test_doc_string(self): # Generate data -- as in Figure 1 in [Papamakarios et al. (2017)][1]). n = 2000 x2 = np.random.randn(n) * 2. x1 = np.random.randn(n) + (x2 * x2 / 4.) data = np.stack([x1, x2], axis=-1) # Density estimation with MADE. model = tfk.Sequential([ # NOTE: This model takes no input and outputs a Distribution. (We use # the batch_size and type of the input, but there are no actual input # values because the last dimension of the shape is 0.) # # For conditional density estimation, the model would take the # conditioning values as input.) tfkl.InputLayer(input_shape=(0, ), dtype=tf.float32), # Given the empty input, return a standard normal distribution with # matching batch_shape and event_shape of [2]. # pylint: disable=g-long-lambda tfpl.DistributionLambda(lambda t: tfd.MultivariateNormalDiag( loc=tf.zeros(tf.concat([tf.shape(t)[:-1], [2]], axis=0)), scale_diag=[1., 1.])), # Transform the standard normal distribution with event_shape of [2] to # the target distribution with event_shape of [2]. tfpl.AutoregressiveTransform( tfb.AutoregressiveNetwork(params=2, hidden_units=[10], activation='relu')), ]) model.compile(optimizer=tf.optimizers.Adam(), loss=lambda y, rv_y: -rv_y.log_prob(y)) model.fit( x=np.zeros((n, 0)), y=data, batch_size=25, epochs=1, steps_per_epoch=1, # Usually n // 25, verbose=True) distribution = model(np.zeros((0, ))) self.assertEqual((4, 2), self.evaluate(distribution.sample(4)).shape) self.assertEqual( (5, 3), self.evaluate( distribution.log_prob(np.zeros((5, 3, 2), dtype=np.float32))).shape)
def test_doc_string_images_case_2(self): # Generate fake images. images = np.random.choice([0, 1], size=(100, 8, 8, 3)) n, width, height, channels = images.shape # Reshape images to achieve desired autoregressivity. reshaped_images = np.transpose(np.reshape( images, [n, width * height, channels]), axes=[0, 2, 1]) made = tfb.AutoregressiveNetwork(params=1, event_shape=[width * height], hidden_units=[20, 20], activation="relu") # Density estimation with MADE. # # NOTE: Parameterize an autoregressive distribution over an event_shape of # [channels, width * height], with univariate Bernoulli conditional # distributions. distribution = tfd.Autoregressive( lambda x: tfd.Independent( # pylint: disable=g-long-lambda tfd.Bernoulli(logits=tf.unstack(made(x), axis=-1)[0], dtype=tf.float32), reinterpreted_batch_ndims=2), sample0=tf.zeros([channels, width * height], dtype=tf.float32)) # Construct and fit model. x_ = tfkl.Input(shape=(channels, width * height), dtype=tf.float32) log_prob_ = distribution.log_prob(x_) model = tfk.Model(x_, log_prob_) model.compile(optimizer=tf1.train.AdamOptimizer(), loss=lambda _, log_prob: -log_prob) batch_size = 10 model.fit( x=reshaped_images, y=np.zeros((n, 0), dtype=np.float32), batch_size=batch_size, epochs=1, steps_per_epoch=1, # Usually `n // batch_size`. shuffle=True, verbose=True) # Use the fitted distribution. self.assertAllEqual((7, channels, width * height), distribution.sample(7).shape) self.assertAllEqual((n, ), distribution.log_prob(reshaped_images).shape)
def test_conditional_broadcasting(self, input_shape, cond_shape): made = tfb.AutoregressiveNetwork(params=2, event_shape=[3], conditional=True, conditional_event_shape=[4]) made_shape = tf.shape( made(tf.ones(input_shape), conditional_input=tf.ones(cond_shape))) broadcast_shape = tf.concat( [ tf.broadcast_dynamic_shape(cond_shape[:-1], input_shape[:-1]), input_shape[-1:] ], axis=0, ) self.assertAllEqual( self.evaluate(tf.concat([broadcast_shape, [2]], axis=0)), made_shape)
def _masked_autoregressive_gated_bijector_fn(hidden_units, activation="relu", name=None, **kwargs): layer = tfb.AutoregressiveNetwork( 2, hidden_units=hidden_units, activation=activation, name=name, **kwargs) def _bijector_fn(x): if tensorshape_util.rank(x.shape) == 1: x = x[tf.newaxis, ...] reshape_output = lambda x: x[0] else: reshape_output = lambda x: x shift, logit_gate = tf.unstack(layer(x), axis=-1) shift = reshape_output(shift) logit_gate = reshape_output(logit_gate) gate = tf.nn.sigmoid(logit_gate) return tfb.Shift(shift=(1. - gate) * shift)(tfb.Scale(scale=gate)) return _bijector_fn