def _run_one_iteration(self, iteration): """Runs one iteration of agent/environment interaction. An iteration involves running several episodes until a certain number of steps are obtained. The interleaving of train/eval phases implemented here are to match the implementation of (Mnih et al., 2015). Args: iteration: int, current iteration number, used as a global_step for saving Tensorboard summaries. Returns: A dict containing summary statistics for this iteration. """ statistics = iteration_statistics.IterationStatistics() tf.logging.info('Starting iteration %d', iteration) num_episodes_train, average_reward_train = self._run_train_phase( statistics) num_episodes_eval, average_reward_eval = self._run_eval_phase( statistics) self._save_tensorboard_summaries(iteration, num_episodes_train, average_reward_train, num_episodes_eval, average_reward_eval) return statistics.data_lists
def testAddManyValues(self): my_pi = 3.14159 statistics = iteration_statistics.IterationStatistics() # Add a number of items. Each item is added to the list corresponding to its # given key. statistics.append({ 'rewards': 0, 'nouns': 'reinforcement', 'angles': my_pi }) # Add a second item to the 'nouns' list. statistics.append({'nouns': 'learning'}) # There are three lists. self.assertEqual(3, len(statistics.data_lists)) self.assertEqual(1, len(statistics.data_lists['rewards'])) self.assertEqual(2, len(statistics.data_lists['nouns'])) self.assertEqual(1, len(statistics.data_lists['angles'])) self.assertEqual(0, statistics.data_lists['rewards'][0]) self.assertEqual('reinforcement', statistics.data_lists['nouns'][0]) self.assertEqual('learning', statistics.data_lists['nouns'][1]) self.assertEqual(my_pi, statistics.data_lists['angles'][0])
def testAddOneValue(self): statistics = iteration_statistics.IterationStatistics() # The statistics data structure should be empty a-priori. self.assertEqual(0, len(statistics.data_lists)) statistics.append({'key1': 0}) # We should have exactly one list, containing one value. self.assertEqual(1, len(statistics.data_lists)) self.assertEqual(1, len(statistics.data_lists['key1'])) self.assertEqual(0, statistics.data_lists['key1'][0])
def _run_one_iteration(self, iteration): """Runs one iteration of agent/environment interaction. An iteration involves running several episodes until a certain number of steps are obtained. The interleaving of train/eval phases implemented here are to match the implementation of (Mnih et al., 2015). Args: iteration: int, current iteration number, used as a global_step for saving Tensorboard summaries. Returns: A dict containing summary statistics for this iteration. """ statistics = iteration_statistics.IterationStatistics() tf.logging.info('Starting iteration %d', iteration) train_eval_mode = False # if self.game_name == "Pong": if self.average_reward_eval >= episodic_return_switch[self.game_name]: train_eval_mode = True print("Stop training at iteration {}".format(iteration)) num_episodes_train, average_reward_train = self._run_train_phase( statistics, train_eval_mode) # else: # # don't train, only for evaluation. # num_episodes_train, average_reward_train = 0, 0 if self.agent_name in RPG_AGENTS and self._agent._replay_opt.memory.add_count == 0: num_episodes_eval, average_reward_eval = -10000, -10000 # if we didn't train rpg, don't waste time evaluate it. else: num_episodes_eval, average_reward_eval = self._run_eval_phase( statistics) self.average_reward_eval = average_reward_eval self._save_tensorboard_summaries(iteration, num_episodes_train, average_reward_train, num_episodes_eval, average_reward_eval) return statistics.data_lists
def _run_one_iteration(self, iteration): """Runs one iteration of agent/environment interaction. An iteration involves running several episodes until a certain number of steps are obtained. This method differs from the `_run_one_iteration` method in the base `Runner` class in that it only runs the train phase. Args: iteration: int, current iteration number, used as a global_step for saving Tensorboard summaries. Returns: A dict containing summary statistics for this iteration. """ statistics = iteration_statistics.IterationStatistics() num_episodes_train, average_reward_train = self._run_train_phase( statistics) self._save_tensorboard_summaries(iteration, num_episodes_train, average_reward_train) return statistics.data_lists
def _run_one_iteration(self, iteration): statistics = iteration_statistics.IterationStatistics() print('Starting iteration ', iteration) self._run_train_phase() self._run_eval_phase(statistics) return statistics.data_lists
def testMissingValue(self): statistics = iteration_statistics.IterationStatistics() with self.assertRaises(KeyError): _ = statistics.data_lists['missing_key']