def initial_state(self): """The initial state value.""" # 70% topics are trashy, rest are nutritious. num_trashy_topics = int(self._num_topics * 0.7) num_nutritious_topics = self._num_topics - num_trashy_topics trashy = tf.linspace(self._topic_min_utility, 0., num_trashy_topics) nutritious = tf.linspace(0., self._topic_max_utility, num_nutritious_topics) topic_quality_means = tf.concat([trashy, nutritious], axis=0) # Equal probability of each topic. doc_topic = ed.Categorical( logits=tf.zeros((self._num_docs, self._num_topics)), dtype=tf.int32) # Fixed variance for doc quality. doc_quality_var = 0.1 doc_quality = ed.Normal( loc=tf.gather(topic_quality_means, doc_topic), scale=doc_quality_var) # 1-hot doc features. doc_features = ed.Normal( loc=tf.one_hot(doc_topic, depth=self._num_topics), scale=0.7) # All videos have same length. video_length = ed.Deterministic( loc=tf.ones((self._num_docs,)) * self._video_length) return Value( # doc_id=0 is reserved for "null" doc. doc_id=ed.Deterministic( loc=tf.range(start=1, limit=self._num_docs + 1, dtype=tf.int32)), doc_topic=doc_topic, doc_quality=doc_quality, doc_features=doc_features, doc_length=video_length)
def mixture_of_real_and_int(): loc = ed.Normal(loc=0., scale=1., name="loc") flip = ed.Bernoulli(probs=0.5, name="flip") if tf.equal(flip, 1): x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") else: x = ed.Poisson(rate=tf.nn.softplus(loc), sample_shape=3, name="x") return x
def linear_regression(features, prior_precision): w = ed.Normal(loc=0., scale=tf.math.rsqrt(prior_precision), sample_shape=features.shape[1], name="w") y = ed.Normal(loc=tf.tensordot(features, w, [[1], [0]]), scale=1., name="y") return y
def model(population_size): """Creates the Variables for this demo.""" pref_dimension = 3 # pylint: disable=g-long-lambda world_state = Variable( name="world state", spec=ValueSpec(three_headed_monkeys=Space( space=spaces.Box(low=np.array([-np.Inf] * pref_dimension), high=np.array([np.Inf] * pref_dimension))))) social_network = Variable( name="social network", spec=ValueSpec(n=Space( space=spaces.Box(low=np.array([0] * population_size), high=np.array([1] * population_size))))) user_state = Variable( name="user state", spec=ValueSpec(preference=Space( space=spaces.Box(low=np.array([-np.Inf] * pref_dimension), high=np.array([np.Inf] * pref_dimension))))) # Static variables. world_state.initial_value = variable.value( lambda: Value(three_headed_monkeys=ed.Normal( loc=[3.14] * pref_dimension, scale=[0.01], sample_shape=(1, )))) social_network.initial_value = variable.value(lambda: Value(n=ed.Bernoulli( probs=0.01 * tf.ones((population_size, population_size)), dtype=tf.float32))) # Dynamic variables user_state.initial_value = variable.value( lambda: Value(preference=ed.Normal(loc=[3.14] * pref_dimension, scale=[4.13], sample_shape=population_size))) user_state.value = variable.value( lambda previous_user_state, social_network: Value(preference=ed.Normal( loc=(0.7 * previous_user_state.get("preference") + 0.3 * tf.matmul( social_network.get("n"), previous_user_state.get("preference")) ), scale=[0.01])), dependencies=[user_state.previous, social_network]) return [user_state, world_state, social_network]
def dis_test_real_slate_docs(self): # Upscale the parameters in this test and override the default test setup. self._num_users = 50 self._num_docs = 100 self._num_topics = 20 self._slate_size = 5 self._config = { 'history_length': 5, 'num_users': self._num_users, 'num_docs': self._num_docs, 'num_topics': self._num_topics, 'slate_size': self._slate_size, } doc_state = Value(state=ed.Deterministic(loc=tf.random.uniform( shape=[self._config['num_users'], 5], minval=0, maxval=self._config['num_docs'], dtype=tf.int32))).prefixed_with('doc_history') consumption_state = Value(state=ed.Deterministic(loc=tf.random.uniform( shape=[self._config['num_users'], 5], minval=0.0, maxval=1.0, dtype=tf.float32))).prefixed_with('ctime_history') available_docs = Value( doc_id=ed.Deterministic(loc=tf.range( start=1, limit=self._config['num_docs'] + 1, dtype=tf.int32)), doc_topic=ed.Deterministic(loc=tf.ones((self._num_docs, ))), doc_quality=ed.Normal(loc=tf.zeros((self._config['num_docs'], )), scale=0.1), doc_features=ed.Deterministic( loc=tf.ones((self._num_docs, self._num_topics)) * 1.0 / self._num_topics), doc_length=ed.Deterministic(loc=tf.ones((self._num_docs, )))) self._recommender = cf_recommender.CollabFilteringRecommender( self._config) slate_docs = self.evaluate( self._recommender.slate_docs(doc_state.union(consumption_state), {}, available_docs).as_dict) # Verify all the shapes and presented keys. self.assertCountEqual([ 'doc_id', 'doc_topic', 'doc_quality', 'doc_features', 'doc_length' ], slate_docs.keys()) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_id'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_topic'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_quality'])) np.testing.assert_array_equal([ self._config['num_users'], self._config['slate_size'], self._config['num_topics'] ], np.shape(slate_docs['doc_features'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_length']))
def call(self, sampled_global_latents, num_targets, local_x_y_encodings): inputs = tf.tile(sampled_global_latents, [1, num_targets, 1]) if local_x_y_encodings is not None: inputs = tf.concat([inputs, local_x_y_encodings], axis=-1) logits = self._net(inputs) mean, untransformed_std = tf.split(logits, 2, axis=-1) std = tf.nn.softplus(untransformed_std) return ed.Normal(loc=mean, scale=std)
def testNCPNormalOutput(self): batch_size = 3 features = ed.Normal(loc=tf.random.normal([2 * batch_size, 1]), scale=1.) labels = np.random.rand(batch_size).astype(np.float32) model = ed.layers.NCPNormalOutput(mean=labels) predictions = model(features) self.assertLen(model.losses, 1) self.assertAllEqual(tf.convert_to_tensor(features[:batch_size]), tf.convert_to_tensor(predictions))
def __call__(self, x): """Computes regularization given an ed.Normal random variable as input.""" if not isinstance(x, ed.RandomVariable): raise ValueError('Input must be an ed.RandomVariable.') prior = ed.Independent( ed.Normal(loc=x.distribution.mean(), scale=self.stddev).distribution, reinterpreted_batch_ndims=len(x.distribution.event_shape)) regularization = x.distribution.kl_divergence(prior.distribution) return self.scale_factor * regularization
def testCauchyKLDivergence(self): shape = (3,) regularizer = ed.regularizers.get('cauchy_kl_divergence') variational_posterior = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) # KL uses a single-sample estimate, which is not necessarily >0. We only # check shape. self.assertEqual(kl.shape, ())
def testNCPNormalOutput(self): batch_size = 3 features = ed.Normal(loc=tf.random.normal([2 * batch_size, 1]), scale=1.) labels = np.random.rand(batch_size).astype(np.float32) model = ed.layers.NCPNormalOutput(mean=labels) predictions = model(features) features_val, predictions_val = self.evaluate([features, predictions]) self.assertLen(model.losses, 1) self.assertAllEqual(features_val[:batch_size], predictions_val)
def testNormalEmpiricalBayesKLDivergenceTFFunction(self): """Checks that KL evaluates properly multiple times when compiled.""" shape = (3,) regularizer = ed.regularizers.get('normal_empirical_bayes_kl_divergence') regularizer_compiled = tf.function(regularizer) weights_one = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=len(shape)) kl_one = regularizer(weights_one).numpy() kl_one_c = regularizer_compiled(weights_one).numpy() weights_two = ed.Independent( ed.Normal(loc=5. + tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=len(shape)) kl_two = regularizer(weights_two).numpy() kl_two_c = regularizer_compiled(weights_two).numpy() self.assertAllClose(kl_one, kl_one_c) self.assertAllClose(kl_two, kl_two_c) self.assertNotAlmostEqual(kl_one_c, kl_two_c)
def model(home_id, away_id, score1_obs=None, score2_obs=None): # priors alpha = ed.Normal(loc=0.0, scale=1.0, name="alpha") sd_att = ed.StudenT(df=3.0, loc=0.0, scale=2.5, name="sd_att") sd_def = ed.StudenT(df=3.0, loc=0.0, scale=2.5, name="sd_def") home = ed.Normal(loc=0.0, scale=1.0, name="home") # home advantage nt = len(np.unique(home_id)) # team-specific model parameters attack = ed.Normal(loc=0, scale=sd_att, sample_shape=nt, name="attack") defend = ed.Normal(loc=0, scale=sd_def, sample_shape=nt, name="defend") # likelihood theta1 = tf.exp(alpha + home + attack[home_id] - defend[away_id]) theta2 = tf.exp(alpha + attack[away_id] - defend[home_id]) s1 = ed.Poisson(theta1, name="s1") s2 = ed.Poisson(theta2, name="s2") return s1, s2
def testTrainableNormalKLDivergenceStddev(self): tf.random.set_seed(83271) shape = (3,) regularizer = ed.regularizers.get('trainable_normal_kl_divergence_stddev') variational_posterior = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) self.assertGreaterEqual(kl, 0.) prior_stddev = regularizer.stddev_constraint(regularizer.stddev) self.assertAllClose(prior_stddev, np.ones(prior_stddev.shape), atol=0.1)
def test_chained_rv(self): # This computes the log-probability of a sequence # z[0] = (0., 1.) # z[t][0] = Normal(loc=z[t-1][0], scale=1) # z[t][1] = Normal(loc=z[t][0] + 1., scale=2) # against the observation # o = [(0., 1.), (1., 2.), (2., 3.), (3., 4.)] z, o, obs_0, obs_1 = self.chained_rv_test_network() ref = 0.0 for i in range(4): self.assertAllClose( ref, log_probability.log_probability(variables=[z], observation=[o], num_steps=i)) if i < 3: ref += ( ed.Normal(loc=obs_0[i], scale=1.0).distribution.log_prob( obs_0[i + 1]) + ed.Normal(loc=obs_0[i + 1] + 1.0, scale=2.0).distribution.log_prob(obs_1[i + 1]))
def testNormalEmpiricalBayesKLDivergence(self, gen_stddev, eb_prior_stddev): """Tests ed.regularizers.NormalEmpiricalBayesKLDivergence. Check that EB KL estimate should always be smaller but close to the true generating Normal-InverseGamma KL due to it being explicitly optimized. Args: gen_stddev: Standard deviation of the generating normal distribution. eb_prior_stddev: Standard deviation of the EB hyperprior. """ tf.random.set_seed(89323) shape = (99, 101) gen_mean = 0. eb_prior_mean = eb_prior_stddev**2 cvar = (eb_prior_mean / eb_prior_stddev)**2 variance_concentration = cvar + 2. variance_scale = eb_prior_mean * (cvar + 1.) weight = ed.Independent(ed.Normal(gen_mean + tf.zeros(shape), gen_stddev).distribution, reinterpreted_batch_ndims=len(shape)) # Compute KL(q(w)|| N(w|gen_stddev)) - log IG(gen_stddev**2) under a fixed # setting of the prior stddev. normal_regularizer = ed.regularizers.NormalKLDivergence( mean=gen_mean, stddev=gen_stddev) kl = normal_regularizer(weight) kl -= tf.reduce_sum( ed.InverseGamma(variance_concentration, variance_scale).distribution.log_prob( gen_stddev**2)) eb_regularizer = ed.regularizers.NormalEmpiricalBayesKLDivergence( mean=gen_mean, variance_concentration=variance_concentration, variance_scale=variance_scale) eb_kl = eb_regularizer(weight) # Normalize comparison by total number of weights. (Note this also scales # the IG log prob.) kl /= float(np.prod(shape)) eb_kl /= float(np.prod(shape)) kl_value, eb_kl_value = self.evaluate([kl, eb_kl]) self.assertGreaterEqual(kl_value, eb_kl_value) self.assertAlmostEqual(kl_value, eb_kl_value, delta=0.05, msg='Parameters score KL=%.6f on generating ' 'Normal-IG KL and KL=%.6f on EB-fitted KL, ' 'too much difference.' % (kl_value, eb_kl_value))
def testNormalKLDivergence(self): shape = (3,) regularizer = ed.regularizers.get('normal_kl_divergence') variational_posterior = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) self.assertGreaterEqual(kl, 0.) dataset_size = 100 scale_factor = 1. / dataset_size regularizer = ed.regularizers.NormalKLDivergence(scale_factor=scale_factor) scaled_kl = regularizer(variational_posterior) self.assertEqual(scale_factor * kl, scaled_kl)
def next_state(self, previous_state, _, slate_docs): """The state value after the initial value.""" # Compute the improvement of slate scores. slate_doc_features = slate_docs.get('features') slate_doc_affinities = self._affinity_model.affinities( previous_state.get('intent'), slate_doc_features).get('affinities') max_slate_utility = tf.reduce_max(slate_doc_affinities, axis=-1) + 2.0 improvement = max_slate_utility - previous_state.get( 'max_slate_utility') next_satisfaction = self._sat_sensitivity * previous_state.get( 'satisfaction') + improvement return Value( satisfaction=ed.Normal(loc=next_satisfaction, scale=0.01), intent=self._intent_model.next_state( Value(state=previous_state.get('intent'))).get('state'), max_slate_utility=max_slate_utility)
def next_state(self, previous_state, user_response, slate_docs): """The state value after the initial value.""" user_interests = previous_state.get('user_interests') chosen_docs = user_response.get('choice') chosen_doc_features = selectors.get_chosen(slate_docs, chosen_docs) doc_features = chosen_doc_features.get('doc_features') # Define similarities to be affinities(user_interest, doc_features) + 2. similarities = self._utility_model.affinities( user_interests, doc_features, False).get('affinities') + 2.0 return Value( utilities=ed.Normal(loc=similarities, scale=self._utility_stddev, validate_args=True), user_interests=ed.Independent( tfd.Deterministic(user_interests + self._interest_step_size * (user_interests - doc_features)), reinterpreted_batch_ndims=1))
def testUniformKLDivergence(self): shape = (3, ) regularizer = ed.regularizers.get('uniform_kl_divergence') variational_posterior = ed.Independent(ed.Normal( loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) kl_value = self.evaluate(kl) self.assertNotEqual(kl_value, 0.) dataset_size = 100 scale_factor = 1. / dataset_size regularizer = ed.regularizers.UniformKLDivergence( scale_factor=scale_factor) kl = regularizer(variational_posterior) scaled_kl_value = self.evaluate(kl) self.assertAlmostEqual(scale_factor * kl_value, scaled_kl_value)
def multilayer_perceptron(n_examples, input_shape, output_scaler=1.): """Builds a single hidden layer Bayesian feedforward network. Args: n_examples: Number of examples in training set. input_shape: tf.TensorShape. output_scaler: Float to scale mean predictions. Training is faster and more stable when both the inputs and outputs are normalized. To not affect metrics such as RMSE and NLL, the outputs need to be scaled back (de-normalized, but the mean doesn't matter), using output_scaler. Returns: tf.keras.Model. """ p_fn, q_fn = mean_field_fn(empirical_bayes=True) def normalized_kl_fn(q, p, _): return q.kl_divergence(p) / tf.cast(n_examples, tf.float32) inputs = tf.keras.layers.Input(shape=input_shape) hidden = tfp.layers.DenseLocalReparameterization( 50, activation='relu', kernel_prior_fn=p_fn, kernel_posterior_fn=q_fn, bias_prior_fn=p_fn, bias_posterior_fn=q_fn, kernel_divergence_fn=normalized_kl_fn, bias_divergence_fn=normalized_kl_fn)(inputs) loc = tfp.layers.DenseLocalReparameterization( 1, activation=None, kernel_prior_fn=p_fn, kernel_posterior_fn=q_fn, bias_prior_fn=p_fn, bias_posterior_fn=q_fn, kernel_divergence_fn=normalized_kl_fn, bias_divergence_fn=normalized_kl_fn)(hidden) loc = tf.keras.layers.Lambda(lambda x: x * output_scaler)(loc) scale = tfp.layers.VariableLayer( shape=(), initializer=tf.keras.initializers.Constant(-3.))(loc) scale = tf.keras.layers.Activation('softplus')(scale) outputs = tf.keras.layers.Lambda( lambda x: ed.Normal(loc=x[0], scale=x[1]))((loc, scale)) return tf.keras.Model(inputs=inputs, outputs=outputs)
def call(self, unlabelled_x, attentive_encodings, sampled_local_latents, sampled_global_latents): inputs = unlabelled_x if self._model_type in [ 'cnp', 'acnp', 'anp', 'acns', 'fully_connected' ]: inputs = tf.concat([inputs, attentive_encodings], axis=-1) if self._model_type in ['acns', 'fully_connected']: inputs = tf.concat([inputs, sampled_local_latents], axis=-1) if self._model_type in ['np', 'anp', 'fully_connected']: tiled_global_latents = tf.tile(sampled_global_latents, [1, tf.shape(unlabelled_x)[1], 1]) inputs = tf.concat([inputs, tiled_global_latents], axis=-1) logits = self._net(inputs) mean, untransformed_std = tf.split(logits, 2, axis=-1) if self._output_activation is not None: mean = self._output_activation(mean) std = tf.nn.softplus(untransformed_std) return ed.Normal(loc=mean, scale=std)
def multilayer_perceptron(input_shape, output_scaler=1.): """Builds a single hidden layer feedforward network. Args: input_shape: tf.TensorShape. output_scaler: Float to scale mean predictions. Training is faster and more stable when both the inputs and outputs are normalized. To not affect metrics such as RMSE and NLL, the outputs need to be scaled back (de-normalized, but the mean doesn't matter), using output_scaler. Returns: tf.keras.Model. """ inputs = tf.keras.layers.Input(shape=input_shape) hidden = tf.keras.layers.Dense(50, activation='relu')(inputs) loc = tf.keras.layers.Dense(1, activation=None)(hidden) loc = tf.keras.layers.Lambda(lambda x: x * output_scaler)(loc) # The variable layer must depend on a symbolic input tensor. scale = VariableInputLayer((), constraint='softplus')(inputs) outputs = tf.keras.layers.Lambda( lambda x: ed.Normal(loc=x[0], scale=x[1]))((loc, scale)) return tf.keras.Model(inputs=inputs, outputs=outputs)
def testTransformedRandomVariable(self): class Exp(tf.keras.layers.Layer): """Exponential activation function for reversible networks.""" def __call__(self, inputs, *args, **kwargs): if not isinstance(inputs, ed.RandomVariable): return super(Exp, self).__call__(inputs, *args, **kwargs) return ed.TransformedRandomVariable(inputs, self) def call(self, inputs): return tf.exp(inputs) def reverse(self, inputs): return tf.math.log(inputs) def log_det_jacobian(self, inputs): return -tf.math.log(inputs) x = ed.Normal(0., 1.) y = Exp()(x) y_sample = self.evaluate(y.distribution.sample()) y_log_prob = self.evaluate(y.distribution.log_prob(y_sample)) self.assertGreater(y_sample, 0.) self.assertTrue(np.isfinite(y_log_prob))
def model(): x = ed.Normal(loc=0., scale=1., name="x") y = ed.Normal(loc=x, scale=1., name="y") return x + y
def model(): x = ed.Normal(loc=-5., scale=1e-8, name="x") y = ed.Normal(loc=x, scale=1e-8, name="y") return x, y
def call(self, unlabelled_x, labelled_x, labelled_y, sampled_global_latents=None, attentive_encodings=None, lengthscale_1=1., lengthscale_2=1.): def _get_mean_var(inputs): logits = self._net(inputs) mean, untransformed_var = tf.split(logits, 2, axis=-1) if self._output_activation is not None: mean = self._output_activation(mean) var = tf.nn.softplus(untransformed_var) return mean, var tiled_unlabelled_dataset_encoding = tf.tile( sampled_global_latents, [1, tf.shape(unlabelled_x)[1], 1]) tiled_labelled_dataset_encoding = tf.tile( sampled_global_latents, [1, tf.shape(labelled_x)[1], 1]) if self._uncertainty_type == 'attentive_gp': if self._net is not None: unlabelled_inputs = tf.concat( [unlabelled_x, tiled_unlabelled_dataset_encoding], axis=-1) global_unlabelled_mean, _ = _get_mean_var(unlabelled_inputs) labelled_inputs = tf.concat( [labelled_x, tiled_labelled_dataset_encoding], axis=-1) global_labelled_mean, _ = _get_mean_var(labelled_inputs) else: global_unlabelled_mean = 0. global_labelled_mean = 0. mean = global_unlabelled_mean + self._attention_mean( unlabelled_x, labelled_x, labelled_y - global_labelled_mean, normalize=True, scale=lengthscale_1) k_xx = 1. k_xd = self._attention_scale_1(unlabelled_x, labelled_x, scale=lengthscale_2, normalize=True, weights_only=True) w_xd = self._attention_scale_2(unlabelled_x, labelled_x, scale=lengthscale_1, normalize=True, weights_only=True) var = k_xx - tf.reduce_sum(w_xd * k_xd, axis=-1, keepdims=True) else: inputs = tf.concat([ unlabelled_x, tiled_unlabelled_dataset_encoding, attentive_encodings ], axis=-1) mean, var = _get_mean_var(inputs) std = tf.sqrt(var + eps) return ed.Normal(loc=mean, scale=std)
def call(self, avg_dataset_encodings): logits = self._net(avg_dataset_encodings) mean, untransformed_std = tf.split(logits, 2, axis=-1) std = tf.nn.softplus(untransformed_std) + eps return ed.Normal(loc=mean, scale=std)
def model_builtin(): return ed.Normal(1., 0.1, name="x")
def normal_with_unknown_mean(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") return x
def variational(): loc = tf1.get_variable("loc", []) qz = ed.Normal(loc=loc, scale=0.5, name="qz") return qz