def loss(y_true, y_pred):
     # scale predictions so that the class probas of each sample sum to 1
     y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
     # clip to prevent NaN's and Inf's
     y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
     # calc
     loss = y_true * K.log(y_pred) * weights
     loss = -K.sum(loss, -1)
     return loss
示例#2
0
    def train_critic(self, batches):
        batches = np.array(batches).transpose()

        imgs = np.vstack(batches[0])
        speeds = np.vstack(batches[1])
        actions = np.vstack(batches[2])
        rewards = np.vstack(batches[3])
        next_imgs = np.vstack(batches[4])
        next_speeds = np.vstack(batches[5])
        dones = np.vstack(batches[6].astype(int))

        speeds = np.reshape(speeds, (-1, 1))
        next_speeds = np.reshape(next_speeds, (-1, 1))

        noise = np.clip(
            np.random.randn(2) * self.policy_noise, -self.noise_clip,
            self.noise_clip)
        target_actions = self.actor_target([next_imgs, next_speeds]) + noise
        target_actions = K.clip(target_actions, [-0.8, 0], [0.8, 1])

        target_q1 = self.critic_target1.predict(
            [next_imgs, next_speeds, target_actions], steps=1)
        target_q2 = self.critic_target2.predict(
            [next_imgs, next_speeds, target_actions], steps=1)
        target_q = K.minimum(target_q1, target_q2)
        rewards += self.gamma * target_q * (1 - dones)
        q1 = self.critic1([imgs, speeds, actions])
        q2 = self.critic2([imgs, speeds, actions])
        with tf.name_scope('critic_loss'):
            loss1 = tf.reduce_mean(
                tf.keras.losses.mean_squared_error(rewards, q1))
            loss2 = tf.reduce_mean(
                tf.keras.losses.mean_squared_error(rewards, q2))
            loss = loss1 + loss2
            closs_scalar = tf.compat.v1.summary.scalar('critic_loss', loss)
        grads = tf.gradients(
            loss,
            self.critic1.trainable_weights + self.critic2.trainable_weights)
        self.critic1.optimizer.apply_gradients(
            zip(
                grads, self.critic1.trainable_weights +
                self.critic2.trainable_weights))
示例#3
0
    def loss(y_true, y_pred):
        PPO_LOSS_CLIPPING = 0.2
        PPO_ENTROPY_LOSS = 5 * 1e-3  # Does not converge without entropy penalty

        log_pdf_new = get_log_probability_density(y_pred, y_true)
        log_pdf_old = get_log_probability_density(old_prediction, y_true)

        ratio = K.exp(log_pdf_new - log_pdf_old)
        surrogate1 = ratio * advantage
        clip_ratio = K.clip(ratio,
                            min_value=(1 - PPO_LOSS_CLIPPING),
                            max_value=(1 + PPO_LOSS_CLIPPING))
        surrogate2 = clip_ratio * advantage

        loss_actor = -K.mean(K.minimum(surrogate1, surrogate2))

        sigma = y_pred[:, 2:]
        variance = K.square(sigma)

        loss_entropy = PPO_ENTROPY_LOSS * K.mean(
            -(K.log(2 * np.pi * variance) + 1) / 2)

        return loss_actor + loss_entropy
示例#4
0
 def __call__(self, w):
     return K.clip(w, self.min_val, self.max_val)
示例#5
0
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall
示例#6
0
def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision
示例#7
0
def preprocess(x):
    x = (x + 0.8) / 7.0
    x = K.clip(x, -5, 5)
    return x