def get_support_set_softmax(self, logits, class_ids): """Softmax normalize over the support set. Args: logits: [N_k, H*W, Q] dimensional tensor. class_ids: [N_k] tensor giving the support-set-id of each image. Returns: Softmax-ed x over the support set. softmax(x) = np.exp(x) / np.reduce_sum(np.exp(x), axis) """ max_logit = tf.reduce_max(logits, axis=1, keepdims=True) max_logit = tf.math.unsorted_segment_max(max_logit, class_ids, tf.reduce_max(class_ids) + 1) max_logit = tf.gather(max_logit, class_ids) logits_reduc = logits - max_logit exp_x = tf.exp(logits_reduc) sum_exp_x = tf.reduce_sum(exp_x, axis=1, keepdims=True) sum_exp_x = tf.math.unsorted_segment_sum(sum_exp_x, class_ids, tf.reduce_max(class_ids) + 1) log_sum_exp_x = tf.log(sum_exp_x) log_sum_exp_x = tf.gather(log_sum_exp_x, class_ids) norm_logits = logits_reduc - log_sum_exp_x softmax = tf.exp(norm_logits) return softmax
def loss_fn(self, policy=None, value=None): adv = tf.placeholder(tf.float32, [None], name="advantages") returns = tf.placeholder(tf.float32, [None], name="returns") logli_old = tf.placeholder(tf.float32, [None], name="logli_old") value_old = tf.placeholder(tf.float32, [None], name="value_old") if not self.subenvs: ratio = tf.exp(self.policy.logli - logli_old) clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) value_err = (self.value - returns)**2 if self.clip_value > 0.0: clipped_value = tf.clip_by_value(self.value, value_old - self.clip_value, value_old + self.clip_value) clipped_value_err = (clipped_value - returns)**2 value_err = tf.maximum(value_err, clipped_value_err) policy_loss = -tf.reduce_mean( tf.minimum(adv * ratio, adv * clipped_ratio)) value_loss = tf.reduce_mean(value_err) * self.value_coef entropy_loss = tf.reduce_mean( self.policy.entropy) * self.entropy_coef else: assert policy is not None and value is not None, "Missing variables representing <policy> and <value>" ratio = tf.exp(policy.logli - logli_old) clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) value_err = (value - returns)**2 if self.clip_value > 0.0: clipped_value = tf.clip_by_value(value, value_old - self.clip_value, value_old + self.clip_value) clipped_value_err = (clipped_value - returns)**2 value_err = tf.maximum(value_err, clipped_value_err) policy_loss = -tf.reduce_mean( tf.minimum(adv * ratio, adv * clipped_ratio)) value_loss = tf.reduce_mean(value_err) * self.value_coef entropy_loss = tf.reduce_mean(policy.entropy) * self.entropy_coef # we want to reduce policy and value errors, and maximize entropy # but since optimizer is minimizing the signs are opposite full_loss = policy_loss + value_loss - entropy_loss return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old, value_old]
def gauss_kernel2D(x, Dx, Dy, gamma=1.): h_size = (x.get_shape()[-1].value) // 2 x = tf.expand_dims(x, axis=-1) if x.get_shape().ndims < 4: Dx = tf.reshape(Dx, (1, 1, -1)) Dy = tf.reshape(Dy, (1, 1, -1)) x1, x2 = x[:, :h_size], x[:, h_size:] else: Dy = tf.reshape(Dy, (1, 1, 1, 1, -1)) Dx = tf.reshape(Dx, (1, 1, 1, 1, -1)) x1, x2 = x[:, :, :, :h_size], x[:, :, :, h_size:] gauss_kernel = tf.exp(-gamma * tf.square(x1 - Dx)) + tf.exp(- gamma * tf.square(x2 - Dy)) return gauss_kernel
def gauss_kernel(x, D, gamma=1.): x = tf.expand_dims(x, axis=-1) if x.get_shape().ndims < 4: D = tf.reshape(D, (1, 1, -1)) else: D = tf.reshape(D, (1, 1, 1, 1, -1)) return tf.exp(- gamma * tf.square(x - D))
def call(self, observation, step_type=(), network_state=()): del step_type # unused. output = tf.cast(tf.nest.flatten(observation)[0], tf.float32) for layer in self._mlp_layers: output = layer(output) shift, log_scale_diag = tf.split(output, 2, axis=-1) log_scale_diag = tf.clip_by_value(log_scale_diag, -20, 2) base_distribution = tfp.distributions.MultivariateNormalDiag( loc=shift, scale_diag=tf.exp(log_scale_diag)) distribution = SquashToSpecDistribution( base_distribution, self._single_action_spec) distribution = tf.nest.pack_sequence_as(self.output_spec, [distribution]) return distribution, network_state
def sample(self, mean, log_b2, training=False): """sample Sampling z from Z ~ Laplacian(μ,b) Y ~ N(0,1) V ~ Exponential(1) = Gamma(1,1) z = μ + by(2v)^1/2 """ if not training: return mean # Exponential is special case of Gamma # Exponential(λ) = Gamma(1,λ) exponential = tf.random.gamma(tf.shape(mean), alpha=1, beta=1) gaussian = tf.random.normal(tf.shape(mean), mean=0.0, stddev=1.0) return mean + tf.exp(0.5*log_b2)*tf.sqrt(2*exponential)*gaussian
def loss_fn(self): adv = tf.placeholder(tf.float32, [None], name="advantages") returns = tf.placeholder(tf.float32, [None], name="returns") logli_old = tf.placeholder(tf.float32, [None], name="logli_old") ratio = tf.exp(self.policy.logli - logli_old) clipped_ratio = tf.clip_by_value(ratio, 1-self.clip_ratio, 1+self.clip_ratio) policy_loss = -tf.reduce_mean(tf.minimum(adv * ratio, adv * clipped_ratio)) # TODO clip value loss value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef # we want to reduce policy and value errors, and maximize entropy # but since optimizer is minimizing the signs are opposite full_loss = policy_loss + value_loss - entropy_loss return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old]
def loss_fn(self): adv = tf.placeholder(tf.float32, [None], name="advantages") returns = tf.placeholder(tf.float32, [None], name="returns") logli_old = tf.placeholder(tf.float32, [None], name="logli_old") ratio = tf.exp(self.policy.logli - logli_old) clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) policy_loss = -tf.reduce_mean( tf.minimum(adv * ratio, adv * clipped_ratio)) # TODO clip value loss value_loss = tf.reduce_mean( (self.value - returns)**2) * self.value_coef entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef # we want to reduce policy and value errors, and maximize entropy # but since optimizer is minimizing the signs are opposite full_loss = policy_loss + value_loss - entropy_loss return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old]
def sample(self, mean, log_var, training=False): if not training: return mean noise = tf.random.normal(tf.shape(mean), mean=0.0, stddev=1.0) return mean + tf.exp(0.5 * log_var) * noise