Exemplo n.º 1
0
 def testResults(self):
   with temporary_directory() as logdir:
     results_obj = results_lib.Results(logdir)
     self.assertEqual(results_obj.read_this_shard(), [])
     results_obj.append(
         {'foo': 1.5, 'bar': 2.5, 'baz': 0})
     results_obj.append(
         {'foo': 5.5, 'bar': -1, 'baz': 2})
     self.assertEqual(
         results_obj.read_this_shard(),
         [{'foo': 1.5, 'bar': 2.5, 'baz': 0},
          {'foo': 5.5, 'bar': -1, 'baz': 2}])
Exemplo n.º 2
0
  def testShardedResults(self):
    with temporary_directory() as logdir:
      n = 4  # Number of shards.
      results_objs = [
          results_lib.Results(logdir, shard_id=i) for i in xrange(n)]
      for i, robj in enumerate(results_objs):
        robj.append({'foo': i, 'bar': 1 + i * 2})
      results_list, _ = results_objs[0].read_all()

      # Check results. Order does not matter here.
      self.assertEqual(
          set(freeze(r) for r in results_list),
          set(freeze({'foo': i, 'bar': 1 + i * 2}) for i in xrange(n)))
def get_results_for_experiment(models_dir,
                               task_name,
                               model_type='pg',
                               max_npe='5M',
                               desc='v0',
                               name_prefix='bf_rl_paper',
                               extra_desc=''):
    """Get and process results for a given experiment.

  An experiment is a set of runs with the same hyperparameters and environment.
  It is uniquely specified by a (task_name, model_type, max_npe) triple, as
  well as an optional description.

  We assume that each experiment has a folder with the same name as the job that
  ran the experiment. The name is computed by
  "%name_prefix%.%desc%-%max_npe%_%task_name%".

  Args:
    models_dir: Parent directory containing experiment folders.
    task_name: String name of task (the coding env). See code_tasks.py or
        run_eval_tasks.py
    model_type: Name of the algorithm, such as 'pg', 'topk', 'ga', 'rand'.
    max_npe: String SI unit representation of the maximum NPE threshold for the
        experiment. For example, "5M" means 5 million.
    desc: Description.
    name_prefix: Prefix of job names. Normally leave this as default.
    extra_desc: Optional extra description at the end of the job name.

  Returns:
    ProcessedResults namedtuple instance, containing
    metrics: Raw dicts read from disk.
    processed: Stats computed by `process_results`.

  Raises:
    ValueError: If max_npe in the metrics does not match NPE in the experiment
        folder name.
  """
    folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe,
                                                     task_name)
    if extra_desc:
        folder += '.' + extra_desc

    results = results_lib.Results(os.path.join(models_dir, folder))
    metrics, _ = results.read_all()
    processed = process_results(metrics)
    if (not np.isclose(processed['max_npe'], misc.si_to_int(max_npe))
            and processed['repetitions']):
        raise ValueError(
            'Invalid experiment. Max-NPE setting does not match expected max-NPE '
            'in experiment name.')
    return ProcessedResults(metrics=metrics, processed=processed)
def run_training(
        config=None,
        tuner=None,
        logdir=None,
        trial_name=None,  # pylint: disable=unused-argument
        is_chief=True):
    """Do all training runs.

  This is the top level training function for policy gradient based models.
  Run this from the main function.

  Args:
    config: config_lib.Config instance containing global config (agent and
        environment hparams). If None, config will be parsed from FLAGS.config.
    tuner: (unused) A tuner instance. Leave as None if not tuning.
    logdir: Parent directory where all data from all runs will be written. If
        None, FLAGS.logdir will be used.
    trial_name: (unused) If tuning, set this to a unique string that identifies
        this trial. If `tuner` is not None, this also must be set.
    is_chief: True if this worker is the chief.

  Returns:
    List of results dicts which were written to disk. Each training run gets a
    results dict. Results dict contains metrics, i.e. (name, value) pairs which
    give information about the training run.

  Raises:
    ValueError: If FLAGS.num_workers does not divide FLAGS.num_repetitions.
    ValueError: If results dicts read from disk contain invalid data.
  """
    if not config:
        # If custom config is not given, get it from flags.
        config = defaults.default_config_with_updates(FLAGS.config)
    if not logdir:
        logdir = FLAGS.logdir

    if FLAGS.num_repetitions % FLAGS.num_workers != 0:
        raise ValueError('Number of workers must divide number of repetitions')
    num_local_reps = FLAGS.num_repetitions // FLAGS.num_workers
    logging.info('Running %d reps globally.', FLAGS.num_repetitions)
    logging.info('This worker will run %d local reps.', num_local_reps)
    if FLAGS.max_npe:
        max_generations = FLAGS.max_npe // config.batch_size
        logging.info('Max samples per rep: %d', FLAGS.max_npe)
        logging.info('Max generations per rep: %d', max_generations)
    else:
        max_generations = sys.maxint
        logging.info('Running unlimited generations.')

    assert FLAGS.num_workers > 0
    logging.info('Starting experiment. Directory: "%s"', logdir)
    results = results_lib.Results(logdir, FLAGS.task_id)
    local_results_list = results.read_this_shard()
    if local_results_list:
        if local_results_list[0]['max_npe'] != FLAGS.max_npe:
            raise ValueError(
                'Cannot resume training. Max-NPE changed. Was %s, now %s',
                local_results_list[0]['max_npe'], FLAGS.max_npe)
        if local_results_list[0][
                'max_global_repetitions'] != FLAGS.num_repetitions:
            raise ValueError(
                'Cannot resume training. Number of repetitions changed. Was %s, '
                'now %s', local_results_list[0]['max_global_repetitions'],
                FLAGS.num_repetitions)
    start_rep = len(local_results_list)

    for rep in xrange(start_rep, num_local_reps):
        global_rep = num_local_reps * FLAGS.task_id + rep
        logging.info('Starting repetition: Rep = %d. (global rep = %d)', rep,
                     global_rep)

        # Save data for each rep, like checkpoints, goes into separate folders.
        run_dir = os.path.join(logdir, 'run_%d' % global_rep)

        if not tf.gfile.IsDirectory(run_dir):
            tf.gfile.MakeDirs(run_dir)
        checkpoint_writer = CheckpointWriter(run_dir,
                                             population_size=config.batch_size)

        data_manager = data.DataManager(config, run_number=global_rep)
        task_eval_fn = ga_lib.make_task_eval_fn(data_manager.rl_task)

        if config.agent.algorithm == 'rand':
            logging.info('Running random search.')
            assert FLAGS.max_npe
            result = run_random_search(FLAGS.max_npe, run_dir, task_eval_fn,
                                       config.timestep_limit)
        else:
            assert config.agent.algorithm == 'ga'
            logging.info('Running genetic algorithm.')
            pop = ga_lib.make_population(ga_lib.random_individual(
                config.timestep_limit),
                                         n=config.batch_size)
            hof = utils.MaxUniquePriorityQueue(2)  # Hall of fame.
            result = ga_lib.ga_loop(pop,
                                    cxpb=config.agent.crossover_rate,
                                    mutpb=config.agent.mutation_rate,
                                    task_eval_fn=task_eval_fn,
                                    ngen=max_generations,
                                    halloffame=hof,
                                    checkpoint_writer=checkpoint_writer)

        logging.info('Finished rep. Num gens: %d', result.generations)

        results_dict = {
            'max_npe': FLAGS.max_npe,
            'batch_size': config.batch_size,
            'max_batches': FLAGS.max_npe // config.batch_size,
            'npe': result.num_programs,
            'max_global_repetitions': FLAGS.num_repetitions,
            'max_local_repetitions': num_local_reps,
            'code_solution': result.best_code if result.solution_found else '',
            'best_reward': result.reward,
            'num_batches': result.generations,
            'found_solution': result.solution_found,
            'task': data_manager.task_name,
            'global_rep': global_rep
        }
        logging.info('results_dict: %s', results_dict)
        results.append(results_dict)

    if is_chief:
        logging.info(
            'Worker is chief. Waiting for all workers to finish so that results '
            'can be reported to the tuner.')

        global_results_list, shard_stats = results.read_all(
            num_shards=FLAGS.num_workers)
        while not all(s.finished for s in shard_stats):
            logging.info(
                'Still waiting on these workers: %s', ', '.join([
                    '%d (%d reps left)' %
                    (i, s.max_local_reps - s.num_local_reps_completed)
                    for i, s in enumerate(shard_stats) if not s.finished
                ]))
            sleep(60)
            global_results_list, shard_stats = results.read_all(
                num_shards=FLAGS.num_workers)

        logging.info(
            '%d results obtained. Chief worker is exiting the experiment.',
            len(global_results_list))

        return global_results_list
Exemplo n.º 5
0
def run_training(config=None,
                 tuner=None,
                 logdir=None,
                 trial_name=None,
                 is_chief=True):
    """Do all training runs.

  This is the top level training function for policy gradient based models.
  Run this from the main function.

  Args:
    config: config_lib.Config instance containing global config (agent and
        environment hparams). If None, config will be parsed from FLAGS.config.
    tuner: A tuner instance. Leave as None if not tuning.
    logdir: Parent directory where all data from all runs will be written. If
        None, FLAGS.logdir will be used.
    trial_name: If tuning, set this to a unique string that identifies this
        trial. If `tuner` is not None, this also must be set.
    is_chief: True if this worker is the chief.

  Returns:
    List of results dicts which were written to disk. Each training run gets a
    results dict. Results dict contains metrics, i.e. (name, value) pairs which
    give information about the training run.

  Raises:
    ValueError: If results dicts read from disk contain invalid data.
  """
    if not config:
        # If custom config is not given, get it from flags.
        config = defaults.default_config_with_updates(FLAGS.config)
    if not logdir:
        logdir = FLAGS.logdir
    if not tf.gfile.Exists(logdir):
        tf.gfile.MakeDirs(logdir)
    assert FLAGS.num_repetitions > 0
    results = results_lib.Results(logdir)
    results_list, _ = results.read_all()

    tf.logging.info('Starting experiment. Directory: "%s"', logdir)

    if results_list:
        if results_list[0]['max_npe'] != FLAGS.max_npe:
            raise ValueError(
                'Cannot resume training. Max-NPE changed. Was %s, now %s',
                results_list[0]['max_npe'], FLAGS.max_npe)
        if results_list[0]['max_global_repetitions'] != FLAGS.num_repetitions:
            raise ValueError(
                'Cannot resume training. Number of repetitions changed. Was %s, '
                'now %s', results_list[0]['max_global_repetitions'],
                FLAGS.num_repetitions)

    while len(results_list) < FLAGS.num_repetitions:
        run_number = len(results_list)
        rep_container_name = trial_name if trial_name else 'container'
        if FLAGS.num_repetitions > 1:
            rep_dir = os.path.join(logdir, 'run_%d' % run_number)
            rep_container_name = rep_container_name + '_run_' + str(run_number)
        else:
            rep_dir = logdir

        tf.logging.info('Starting repetition %d (%d out of %d)', run_number,
                        run_number + 1, FLAGS.num_repetitions)

        # Train will write result to disk.
        with tf.container(rep_container_name):
            trainer = train(config, is_chief, tuner, rep_dir, run_number,
                            results)
        tf.logging.info('Done training.')

        if is_chief:
            # Destroy current container immediately (clears current graph).
            tf.logging.info('Clearing shared variables.')
            tf.Session.reset(FLAGS.master, containers=[rep_container_name])
            tf.logging.info('Shared variables cleared.')

            # Delete replay buffer on disk.
            assert trainer
            trainer.delete_replay_buffer()
        else:
            # Give chief worker time to clean up.
            sleep_sec = 30.0
            tf.logging.info('Sleeping for %s sec.', sleep_sec)
            time.sleep(sleep_sec)
        tf.reset_default_graph()
        tf.logging.info('Default graph reset.')

        # Expecting that train wrote new result to disk before returning.
        results_list, _ = results.read_all()
    return results_list