Exemplo n.º 1
0
    def neural_psc(self, frame, step):
        last_frame = process_density_images(frame)
        density_input = process_density_input(last_frame)

        prob = self.density_model.prob_evaluate(self.sess, density_input,
                                                True) + 1e-8
        prob_dot = self.density_model.prob_evaluate(self.sess,
                                                    density_input) + 1e-8
        pred_gain = np.sum(np.log(prob_dot) - np.log(prob))
        psc_reward = pow(
            (exp(0.1 * pow(step + 1, -0.5) * max(0, pred_gain)) - 1), 0.5)
        return psc_reward
Exemplo n.º 2
0
    def bonus(self, obs, t):
        step = t
        frame = resize(obs, (self.flags.img_height, self.flags.img_width), order=1)
        last_frame = process_density_images(frame)
        density_input = process_density_input(last_frame)

        prob = self.density_model.prob_evaluate(self.sess, density_input, True)
        prob_dot = self.density_model.prob_evaluate(self.sess, density_input)
        prob += 1e-8
        prob_dot += 1e-8
        pred_gain = np.sum(np.log(prob_dot) - np.log(prob))
        self.writer.add_scalar('data/loss', -np.sum(np.log(prob)), t)
        self.writer.add_scalar('data/PG', pred_gain, t)
        psc_reward = pow((exp(0.1 * pow(step + 1, -0.5) * max(0, pred_gain)) - 1), 0.5)
        return psc_reward