Пример #1
0
 def predict_reward(self, path):
     """Predict the reward for each step in a given path"""
     q_state_reward_pred = self.sess.run(self.q_state_reward_pred, feed_dict={
         self.segment_placeholder: np.array([create_segment_q_states(path)]),
         K.learning_phase(): False
     })
     return q_state_reward_pred[0]
Пример #2
0
    def _write_training_summaries(self, loss):
        self.agent_logger.log_simple("predictor/loss", loss)

        # Calculate correlation between true and predicted reward by running validation on recent episodes
        recent_paths = self.agent_logger.get_recent_paths_with_padding()
        if len(
                recent_paths
        ) > 1 and self.agent_logger.summary_step % 10 == 0:  # Run validation every 10 iters
            validation_q_states = np.asarray(
                [create_segment_q_states(path) for path in recent_paths])
            q_state_reward_pred = self.sess.run(self.q_state_reward_pred,
                                                feed_dict={
                                                    self.segment_placeholder:
                                                    validation_q_states,
                                                    K.learning_phase(): False
                                                })
            ep_reward_pred = np.sum(q_state_reward_pred, axis=1)
            q_state_reward_true = np.asarray(
                [path['original_rewards'] for path in recent_paths])
            ep_reward_true = np.sum(q_state_reward_true, axis=1)
            self.agent_logger.log_simple(
                "predictor/correlations",
                corrcoef(ep_reward_true, ep_reward_pred))

        self.agent_logger.log_simple("predictor/num_training_iters",
                                     self._elapsed_predictor_training_iters)
        self.agent_logger.log_simple("labels/desired_labels",
                                     self.label_schedule.n_desired_labels)
        self.agent_logger.log_simple("labels/total_comparisons",
                                     len(self.comparison_collector))
        self.agent_logger.log_simple(
            "labels/labeled_comparisons",
            len(self.comparison_collector.labeled_decisive_comparisons))
Пример #3
0
        pos_list=list(range(path_length - segment_length + 1))
<<<<<<< HEAD
        porob_list=[2*p for p in pos_list]
        segment_num=int(np.log2(path_length))
        prob=[p /sum(pos_list) for p in pos_list]
=======
        prb_list=[10*p for p in pos_list]

        segment_num=int(math.log(path_length,30))
        prob=[p /sum(prb_list) for p in prb_list]
>>>>>>> 7a61d536740d4172e616e6d01bef6665e377801d
        start_pos=np.random.choice(pos_list,segment_num,prob)
        segments=[]
        for pos in start_pos:
            segment=_slice_path(path,segment_length,pos)
            segment["q_states"] = create_segment_q_states(segment)
            segments.append(segment)
        return segments


    def path_callback(self, path):
        path_length = len(path["obs"])
        self._steps_since_last_training += path_length

        self.agent_logger.log_episode(path)

        # We may be in a new part of the environment, so we take new segments to build comparisons from
        # segment = sample_segment_from_path(path, int(self._frames_per_segment))
        # if segment:
        #     self.recent_segments.append(segment)
        #