Пример #1
0
    def _run_one_iteration(self, iteration):
        """Runs one iteration of agent/environment interaction.

    An iteration involves running several episodes until a certain number of
    steps are obtained. The interleaving of train/eval phases implemented here
    are to match the implementation of (Mnih et al., 2015).

    Args:
      iteration: int, current iteration number, used as a global_step for saving
        Tensorboard summaries.

    Returns:
      A dict containing summary statistics for this iteration.
    """
        statistics = iteration_statistics.IterationStatistics()
        tf.logging.info('Starting iteration %d', iteration)
        num_episodes_train, average_reward_train = self._run_train_phase(
            statistics)
        num_episodes_eval, average_reward_eval = self._run_eval_phase(
            statistics)

        self._save_tensorboard_summaries(iteration, num_episodes_train,
                                         average_reward_train,
                                         num_episodes_eval,
                                         average_reward_eval)
        return statistics.data_lists
    def testAddManyValues(self):
        my_pi = 3.14159

        statistics = iteration_statistics.IterationStatistics()

        # Add a number of items. Each item is added to the list corresponding to its
        # given key.
        statistics.append({
            'rewards': 0,
            'nouns': 'reinforcement',
            'angles': my_pi
        })
        # Add a second item to the 'nouns' list.
        statistics.append({'nouns': 'learning'})

        # There are three lists.
        self.assertEqual(3, len(statistics.data_lists))
        self.assertEqual(1, len(statistics.data_lists['rewards']))
        self.assertEqual(2, len(statistics.data_lists['nouns']))
        self.assertEqual(1, len(statistics.data_lists['angles']))

        self.assertEqual(0, statistics.data_lists['rewards'][0])
        self.assertEqual('reinforcement', statistics.data_lists['nouns'][0])
        self.assertEqual('learning', statistics.data_lists['nouns'][1])
        self.assertEqual(my_pi, statistics.data_lists['angles'][0])
    def testAddOneValue(self):
        statistics = iteration_statistics.IterationStatistics()

        # The statistics data structure should be empty a-priori.
        self.assertEqual(0, len(statistics.data_lists))

        statistics.append({'key1': 0})
        # We should have exactly one list, containing one value.
        self.assertEqual(1, len(statistics.data_lists))
        self.assertEqual(1, len(statistics.data_lists['key1']))
        self.assertEqual(0, statistics.data_lists['key1'][0])
Пример #4
0
    def _run_one_iteration(self, iteration):
        """Runs one iteration of agent/environment interaction.

    An iteration involves running several episodes until a certain number of
    steps are obtained. The interleaving of train/eval phases implemented here
    are to match the implementation of (Mnih et al., 2015).

    Args:
      iteration: int, current iteration number, used as a global_step for saving
        Tensorboard summaries.

    Returns:
      A dict containing summary statistics for this iteration.
    """
        statistics = iteration_statistics.IterationStatistics()
        tf.logging.info('Starting iteration %d', iteration)

        train_eval_mode = False
        # if self.game_name == "Pong":
        if self.average_reward_eval >= episodic_return_switch[self.game_name]:
            train_eval_mode = True
            print("Stop training at iteration {}".format(iteration))

        num_episodes_train, average_reward_train = self._run_train_phase(
            statistics, train_eval_mode)
        # else:
        #   # don't train, only for evaluation.
        #   num_episodes_train, average_reward_train = 0, 0

        if self.agent_name in RPG_AGENTS and self._agent._replay_opt.memory.add_count == 0:
            num_episodes_eval, average_reward_eval = -10000, -10000
            # if we didn't train rpg, don't waste time evaluate it.
        else:
            num_episodes_eval, average_reward_eval = self._run_eval_phase(
                statistics)
        self.average_reward_eval = average_reward_eval
        self._save_tensorboard_summaries(iteration, num_episodes_train,
                                         average_reward_train,
                                         num_episodes_eval,
                                         average_reward_eval)
        return statistics.data_lists
Пример #5
0
    def _run_one_iteration(self, iteration):
        """Runs one iteration of agent/environment interaction.

    An iteration involves running several episodes until a certain number of
    steps are obtained. This method differs from the `_run_one_iteration` method
    in the base `Runner` class in that it only runs the train phase.

    Args:
      iteration: int, current iteration number, used as a global_step for saving
        Tensorboard summaries.

    Returns:
      A dict containing summary statistics for this iteration.
    """
        statistics = iteration_statistics.IterationStatistics()
        num_episodes_train, average_reward_train = self._run_train_phase(
            statistics)

        self._save_tensorboard_summaries(iteration, num_episodes_train,
                                         average_reward_train)
        return statistics.data_lists
Пример #6
0
 def _run_one_iteration(self, iteration):
     statistics = iteration_statistics.IterationStatistics()
     print('Starting iteration ', iteration)
     self._run_train_phase()
     self._run_eval_phase(statistics)
     return statistics.data_lists
 def testMissingValue(self):
     statistics = iteration_statistics.IterationStatistics()
     with self.assertRaises(KeyError):
         _ = statistics.data_lists['missing_key']