Exemplo n.º 1
0
 def testRunOnePhase(self):
     max_steps = 10
     environment_steps = 2
     environment = MockEnvironment(max_steps=environment_steps)
     statistics = []
     runner = run_experiment.Runner(
         self._test_subdir,
         self._create_agent_fn,
         game_name='Test',
         create_environment_fn=lambda x, y: environment)
     step_number, sum_returns, num_episodes = runner._run_one_phase(
         max_steps, statistics, 'test')
     calls_to_run_episode = int(max_steps / environment_steps)
     self.assertEqual(self._agent.step.call_count, calls_to_run_episode)
     self.assertEqual(self._agent.end_episode.call_count,
                      calls_to_run_episode)
     self.assertEqual(max_steps, step_number)
     self.assertEqual(-1 * calls_to_run_episode, sum_returns)
     self.assertEqual(calls_to_run_episode, num_episodes)
     expected_statistics = []
     for _ in range(calls_to_run_episode):
         expected_statistics.append({
             'test_episode_lengths': 2,
             'test_episode_returns': -1
         })
     self.assertEqual(len(expected_statistics), len(statistics))
     for i in range(len(statistics)):
         self.assertDictEqual(expected_statistics[i], statistics[i])
Exemplo n.º 2
0
def main():
  # Create the runner class with this agent. We use very small numbers of steps
  # to terminate quickly, as this is mostly meant for demonstrating how one can
  # use the framework. We also explicitly terminate after 110 iterations (instead
  # of the standard 200) to demonstrate the plotting of partial runs.
  random_dqn_runner = run_experiment.Runner(LOG_PATH,
                                            create_random_dqn_agent,
                                            game_name=GAME,
                                            num_iterations=200,
                                            training_steps=10,
                                            evaluation_steps=10,
                                            max_steps_per_episode=100)

  # @title Train MyRandomDQNAgent.# @titl 
  print('Will train agent, please be patient, may be a while...')
  random_dqn_runner.run_experiment()
  print('Done training!')


  # @title Load the training logs.# @titl 
  random_dqn_data = colab_utils.read_experiment(LOG_PATH, verbose=True)
  random_dqn_data['agent'] = 'MyRandomDQN'
  random_dqn_data['run_number'] = 1
  print(experimental_data.__dict__)
  experimental_data[GAME] = experimental_data[GAME].merge(random_dqn_data,
                                                          how='outer')

  # @title Plot training results.
  fig, ax = plt.subplots(figsize=(16,8))
  sns.tsplot(data=experimental_data[GAME], time='iteration', unit='run_number',
            condition='agent', value='train_episode_returns', ax=ax)
  plt.title(GAME)
  plt.savefig('game.png')
Exemplo n.º 3
0
    def testCheckpointExperiment(self, mock_logger_constructor,
                                 mock_checkpointer_constructor):
        checkpoint_dir = os.path.join(self._test_subdir, 'checkpoints')
        test_dict = {'test': 1}
        iteration = 1729

        def bundle_and_checkpoint(x, y):
            self.assertEqual(checkpoint_dir, x)
            self.assertEqual(iteration, y)
            return test_dict

        self._agent.bundle_and_checkpoint.side_effect = bundle_and_checkpoint
        experiment_checkpointer = mock.Mock()
        mock_checkpointer_constructor.return_value = experiment_checkpointer
        logs_data = {'one': 1, 'two': 2}
        mock_logger = MockLogger(run_asserts=False, data=logs_data)
        mock_logger_constructor.return_value = mock_logger
        runner = run_experiment.Runner(
            self._test_subdir,
            self._create_agent_fn,
            game_name='Test',
            create_environment_fn=lambda x, y: mock.Mock())
        runner._checkpoint_experiment(iteration)
        self.assertEqual(1, experiment_checkpointer.save_checkpoint.call_count)
        mock_args, _ = experiment_checkpointer.save_checkpoint.call_args
        self.assertEqual(iteration, mock_args[0])
        test_dict['logs'] = logs_data
        test_dict['current_iteration'] = iteration
        self.assertDictEqual(test_dict, mock_args[1])
Exemplo n.º 4
0
 def testInitializeCheckpointingWithNoCheckpointFile(self, mock_get_latest):
   mock_get_latest.return_value = -1
   base_dir = '/does/not/exist'
   with self.assertRaisesRegexp(tf.errors.PermissionDeniedError,
                                '.*/does.*'):
     run_experiment.Runner(base_dir, self._create_agent_fn,
                           game_name='Pong')
Exemplo n.º 5
0
 def testRunOneEpisodeWithLowMaxSteps(self):
   max_steps_per_episode = 2
   environment = MockEnvironment()
   runner = run_experiment.Runner(
       self._test_subdir, self._create_agent_fn, game_name='Test',
       create_environment_fn=lambda x, y: environment,
       max_steps_per_episode=max_steps_per_episode)
   step_number, total_reward = runner._run_one_episode()
   self.assertEqual(self._agent.step.call_count, max_steps_per_episode - 1)
   self.assertEqual(self._agent.end_episode.call_count, 1)
   self.assertEqual(max_steps_per_episode, step_number)
   self.assertEqual(-1, total_reward)
Exemplo n.º 6
0
 def testRunOneEpisode(self):
   max_steps_per_episode = 11
   environment = MockEnvironment()
   runner = run_experiment.Runner(
       self._test_subdir, self._create_agent_fn, game_name='Test',
       create_environment_fn=lambda x, y: environment,
       max_steps_per_episode=max_steps_per_episode)
   step_number, total_reward = runner._run_one_episode()
   self.assertEqual(self._agent.step.call_count, environment.max_steps - 1)
   self.assertEqual(self._agent.end_episode.call_count, 1)
   self.assertEqual(environment.max_steps, step_number)
   # Expected reward will be \sum_{i=0}^{9} (-1)**i * i = -5
   self.assertEqual(-5, total_reward)
Exemplo n.º 7
0
 def testDefaultGinRainbow(self):
     """Test RainbowAgent default configuration using default gin."""
     tf.logging.info('####### Training the RAINBOW agent #####')
     tf.logging.info('####### RAINBOW base_dir: {}'.format(FLAGS.base_dir))
     FLAGS.agent_name = 'rainbow'
     FLAGS.gin_files = ['dopamine/agents/rainbow/configs/rainbow.gin']
     FLAGS.gin_bindings = [
         'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
     ]
     run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
     runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent)
     self.assertIsInstance(runner._agent.optimizer, tf.train.AdamOptimizer)
     self.assertNear(0.0000625, runner._agent.optimizer._lr, 0.0001)
     shutil.rmtree(FLAGS.base_dir)
Exemplo n.º 8
0
 def testRunExperimentWithInconsistentRange(self, mock_logger_constructor,
                                            mock_checkpointer_constructor):
   experiment_logger = MockLogger()
   mock_logger_constructor.return_value = experiment_logger
   experiment_checkpointer = mock.Mock()
   mock_checkpointer_constructor.return_value = experiment_checkpointer
   runner = run_experiment.Runner(
       self._test_subdir, self._create_agent_fn,
       game_name='Test',
       create_environment_fn=lambda x, y: mock.Mock(),
       num_iterations=0)
   runner.run_experiment()
   self.assertEqual(0, experiment_checkpointer.save_checkpoint.call_count)
   self.assertEqual(0, experiment_logger._calls_to_set)
   self.assertEqual(0, experiment_logger._calls_to_log)
Exemplo n.º 9
0
 def testDefaultGinDqn(self):
     """Test DQNAgent configuration using the default gin config."""
     tf.logging.info('####### Training the DQN agent #####')
     tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir))
     FLAGS.agent_name = 'dqn'
     FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
     FLAGS.gin_bindings = [
         'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
     ]
     run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
     runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent)
     self.assertIsInstance(runner._agent.optimizer,
                           tf.train.RMSPropOptimizer)
     self.assertNear(0.00025, runner._agent.optimizer._learning_rate,
                     0.0001)
     shutil.rmtree(FLAGS.base_dir)
Exemplo n.º 10
0
    def testOverrideGinDqn(self):
        """Test DQNAgent configuration overridden with AdamOptimizer."""
        tf.logging.info('####### Training the DQN agent #####')
        tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir))
        FLAGS.agent_name = 'dqn'
        FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
        FLAGS.gin_bindings = [
            'DQNAgent.optimizer = @tf.train.AdamOptimizer()',
            'tf.train.AdamOptimizer.learning_rate = 100',
            'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
        ]

        run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
        runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent)
        self.assertIsInstance(runner._agent.optimizer, tf.train.AdamOptimizer)
        self.assertEqual(100, runner._agent.optimizer._lr)
        shutil.rmtree(FLAGS.base_dir)
Exemplo n.º 11
0
 def testLogExperiment(self, mock_logger_constructor):
   log_every_n = 2
   logging_file_prefix = 'prefix'
   statistics = 'statistics'
   experiment_logger = MockLogger(test_cls=self)
   mock_logger_constructor.return_value = experiment_logger
   runner = run_experiment.Runner(
       self._test_subdir, self._create_agent_fn,
       game_name='Test',
       create_environment_fn=lambda x, y: mock.Mock(),
       logging_file_prefix=logging_file_prefix,
       log_every_n=log_every_n)
   num_iterations = 10
   for i in range(num_iterations):
     runner._log_experiment(i, statistics)
   self.assertEqual(num_iterations, experiment_logger._calls_to_set)
   self.assertEqual((num_iterations / log_every_n),
                    experiment_logger._calls_to_log)
Exemplo n.º 12
0
    def testRunExperiment(self, mock_logger_constructor,
                          mock_checkpointer_constructor, mock_get_latest):
        log_every_n = 1
        environment = MockEnvironment()
        experiment_logger = MockLogger(run_asserts=False)
        mock_logger_constructor.return_value = experiment_logger
        experiment_checkpointer = mock.Mock()
        start_iteration = 1729
        mock_get_latest.return_value = start_iteration

        def load_checkpoint(_):
            return {
                'logs': 'log_data',
                'current_iteration': start_iteration - 1
            }

        experiment_checkpointer.load_checkpoint.side_effect = load_checkpoint
        mock_checkpointer_constructor.return_value = experiment_checkpointer

        def bundle_and_checkpoint(x, y):
            del x, y  # Unused.
            return {'test': 1}

        self._agent.bundle_and_checkpoint.side_effect = bundle_and_checkpoint
        num_iterations = 10
        self._agent.unbundle.return_value = True
        end_iteration = start_iteration + num_iterations
        runner = run_experiment.Runner(
            self._test_subdir,
            self._create_agent_fn,
            game_name='Test',
            create_environment_fn=lambda x, y: environment,
            log_every_n=log_every_n,
            num_iterations=end_iteration,
            training_steps=1,
            evaluation_steps=1)
        self.assertEqual(start_iteration, runner._start_iteration)
        runner.run_experiment()
        self.assertEqual(num_iterations,
                         experiment_checkpointer.save_checkpoint.call_count)
        self.assertEqual(num_iterations, experiment_logger._calls_to_set)
        self.assertEqual(num_iterations, experiment_logger._calls_to_log)
        glob_string = '{}/events.out.tfevents.*'.format(self._test_subdir)
        self.assertGreater(len(tf.gfile.Glob(glob_string)), 0)
Exemplo n.º 13
0
 def testOverrideGinRainbow(self):
     """Test RainbowAgent configuration overridden with RMSPropOptimizer."""
     tf.logging.info('####### Training the RAINBOW agent #####')
     tf.logging.info('####### RAINBOW base_dir: {}'.format(FLAGS.base_dir))
     FLAGS.agent_name = 'rainbow'
     FLAGS.gin_files = [
         'dopamine/agents/rainbow/configs/rainbow.gin',
     ]
     FLAGS.gin_bindings = [
         'RainbowAgent.optimizer = @tf.train.RMSPropOptimizer()',
         'tf.train.RMSPropOptimizer.learning_rate = 100',
         'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
     ]
     run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
     runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent)
     self.assertIsInstance(runner._agent.optimizer,
                           tf.train.RMSPropOptimizer)
     self.assertEqual(100, runner._agent.optimizer._learning_rate)
     shutil.rmtree(FLAGS.base_dir)
Exemplo n.º 14
0
 def testOverrideGinImplicitQuantile(self):
     """Test ImplicitQuantile configuration overriding using IQN gin."""
     tf.logging.info('###### Training the Implicit Quantile agent #####')
     FLAGS.agent_name = 'implicit_quantile'
     FLAGS.base_dir = os.path.join(
         '/tmp/dopamine_tests',
         datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
     tf.logging.info('###### IQN base dir: {}'.format(FLAGS.base_dir))
     FLAGS.gin_files = [
         'dopamine/agents/'
         'implicit_quantile/configs/implicit_quantile.gin'
     ]
     FLAGS.gin_bindings = [
         'Runner.num_iterations=0',
         'WrappedPrioritizedReplayBuffer.replay_capacity = 1000',
     ]
     run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
     runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent)
     self.assertEqual(1000, runner._agent._replay.memory._replay_capacity)
     shutil.rmtree(FLAGS.base_dir)
Exemplo n.º 15
0
 def create_runner(self, env_fn, hparams, target_iterations,
                   training_steps_per_iteration):
   # pylint: disable=unbalanced-tuple-unpacking
   agent_params, optimizer_params, \
   runner_params, replay_buffer_params = _parse_hparams(hparams)
   # pylint: enable=unbalanced-tuple-unpacking
   optimizer = _get_optimizer(optimizer_params)
   agent_params["optimizer"] = optimizer
   agent_params.update(replay_buffer_params)
   create_agent_fn = get_create_agent(agent_params)
   runner = run_experiment.Runner(
       base_dir=self.agent_model_dir,
       create_agent_fn=create_agent_fn,
       create_environment_fn=get_create_env_fun(
           env_fn, time_limit=hparams.time_limit),
       evaluation_steps=0,
       num_iterations=target_iterations,
       training_steps=training_steps_per_iteration,
       **runner_params)
   return runner
Exemplo n.º 16
0
 def testRunOneIteration(self):
   environment_steps = 2
   environment = MockEnvironment(max_steps=environment_steps)
   training_steps = 20
   evaluation_steps = 10
   runner = run_experiment.Runner(
       self._test_subdir, self._create_agent_fn, game_name='Test',
       create_environment_fn=lambda x, y: environment,
       training_steps=training_steps, evaluation_steps=evaluation_steps)
   dictionary = runner._run_one_iteration(1)
   train_calls = int(training_steps / environment_steps)
   eval_calls = int(evaluation_steps / environment_steps)
   expected_dictionary = {
       'train_episode_lengths': [2 for _ in range(train_calls)],
       'train_episode_returns': [-1 for _ in range(train_calls)],
       'train_average_return': [-1],
       'eval_episode_lengths': [2 for _ in range(eval_calls)],
       'eval_episode_returns': [-1 for _ in range(eval_calls)],
       'eval_average_return': [-1]
   }
   self.assertDictEqual(expected_dictionary, dictionary)
Exemplo n.º 17
0
 def testInitializeCheckpointingWhenCheckpointUnbundleSucceeds(
     self, mock_get_latest):
   latest_checkpoint = 7
   mock_get_latest.return_value = latest_checkpoint
   logs_data = {'a': 1, 'b': 2}
   current_iteration = 1729
   checkpoint_data = {'current_iteration': current_iteration,
                      'logs': logs_data}
   checkpoint_dir = os.path.join(self._test_subdir, 'checkpoints')
   checkpoint = checkpointer.Checkpointer(checkpoint_dir, 'ckpt')
   checkpoint.save_checkpoint(latest_checkpoint, checkpoint_data)
   mock_agent = mock.Mock()
   mock_agent.unbundle.return_value = True
   runner = run_experiment.Runner(self._test_subdir,
                                  lambda x, y, summary_writer: mock_agent,
                                  game_name='Pong')
   expected_iteration = current_iteration + 1
   self.assertEqual(expected_iteration, runner._start_iteration)
   self.assertDictEqual(logs_data, runner._logger.data)
   mock_agent.unbundle.assert_called_once_with(
       checkpoint_dir, latest_checkpoint, checkpoint_data)
Exemplo n.º 18
0
def create_runner(base_dir, create_agent_fn):
  """Creates an experiment Runner.

  Args:
    base_dir: str, base directory for hosting all subdirectories.
    create_agent_fn: A function that takes as args a Tensorflow session and an
     Atari 2600 Gym environment, and returns an agent.

  Returns:
    runner: A `run_experiment.Runner` like object.

  Raises:
    ValueError: When an unknown schedule is encountered.
  """
  assert base_dir is not None
  # Continuously runs training and evaluation until max num_iterations is hit.
  if FLAGS.schedule == 'continuous_train_and_eval':
    return run_experiment.Runner(base_dir, create_agent_fn, create_pacman_environment)
  # Continuously runs training until max num_iterations is hit.
  elif FLAGS.schedule == 'continuous_train':
    return run_experiment.TrainRunner(base_dir, create_agent_fn, create_pacman_environment)
  else:
    raise ValueError('Unknown schedule: {}'.format(FLAGS.schedule))
Exemplo n.º 19
0
 def testInitializeCheckpointingWhenCheckpointUnbundleFails(
         self, mock_logger_constructor, mock_checkpointer_constructor,
         mock_get_latest):
     mock_checkpointer = _create_mock_checkpointer()
     mock_checkpointer_constructor.return_value = mock_checkpointer
     latest_checkpoint = 7
     mock_get_latest.return_value = latest_checkpoint
     agent = mock.Mock()
     agent.unbundle.return_value = False
     mock_logger = mock.Mock()
     mock_logger_constructor.return_value = mock_logger
     runner = run_experiment.Runner(self._test_subdir,
                                    lambda x, y, summary_writer: agent,
                                    create_environment_fn=lambda x, y: x,
                                    game_name='Test')
     self.assertEqual(0, runner._start_iteration)
     self.assertEqual(1, mock_checkpointer.load_checkpoint.call_count)
     self.assertEqual(1, agent.unbundle.call_count)
     mock_args, _ = agent.unbundle.call_args
     self.assertEqual('{}/checkpoints'.format(self._test_subdir),
                      mock_args[0])
     self.assertEqual(latest_checkpoint, mock_args[1])
     expected_dictionary = {'current_iteration': 1729, 'logs': 'logs'}
     self.assertDictEqual(expected_dictionary, mock_args[2])
Exemplo n.º 20
0
 def testFailsWithoutGameName(self):
     with self.assertRaises(AssertionError):
         run_experiment.Runner(self._test_subdir, self._create_agent_fn)
Exemplo n.º 21
0
  def step(self, reward, observation):
    return self._choose_action()

def create_basic_agent(sess, environment, summary_writer = None):
  """The Runner class will expect a function of this type to create an agent."""
  return BasicAgent(sess, num_actions=environment.action_space.n,
                     switch_prob=0.2)

# Create the runner class with this agent. We use very small numbers of steps
# to terminate quickly, as this is mostly meant for demonstrating how one can
# use the framework. We also explicitly terminate after 110 iterations (instead
# of the standard 200) to demonstrate the plotting of partial runs.
basic_runner = run_experiment.Runner(LOG_PATH,
                                      create_basic_agent,
                                      game_name=GAME,
                                      num_iterations=200,
                                      training_steps=10,
                                      evaluation_steps=10,
                                      max_steps_per_episode=100)


# @title Train Basic Agent.
print('Will train basic agent, please be patient, may be a while...')
basic_runner.run_experiment()
print('Done training!')



# @title Load baseline data
content = os.path.join(LOG_PATH, 'content')
os.system('mkdir -p ' + content)
Exemplo n.º 22
0
        net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu6)
        q_values = slim.fully_connected(net, self.num_actions, activation_fn=None)
        return self._get_network_type()(q_values)


def create_random_dqn_agent(sess, environment, summary_writer=None):
    return MyDQNAgent(sess, num_actions=environment.action_space.n)


# Create the runner class with this agent. We use very small numbers of steps
# to terminate quickly, as this is mostly meant for demonstrating how one can
# use the framework. We also explicitly terminate after 110 iterations (instead
# of the standard 200) to demonstrate the plotting of partial runs.
print("creating runner")
random_dqn_runner = run_experiment.Runner(BASE_PATH,
                                          create_random_dqn_agent,
                                          game_name=GAME
                                          )

# @title Train MyRandomDQNAgent.
# print('Will train agent, please be patient, may be a while...')
# random_dqn_runner.run_experiment()
# print('Done training!')




def display_frames_as_gif(frames):
    """
    Displays a list of frames as a gif, with controls
    """
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)