示例#1
0
    def _make_context(self, *args, **kwargs):
        """Make a context from the template information and variant args.

        Currently, all arguments should be keyword arguments.

        Args:
            args (list): Should be empty.
            kwargs (dict): Keyword arguments for the wrapped function. Will be
                logged to `variant.json`

        Returns:
            ExperimentContext: The created experiment context.

        Raises:
            ValueError: If args is not empty.

        """
        if args:
            raise ValueError('garage.experiment currently only supports '
                             'keyword arguments')
        name = self.name
        if name is None:
            name = self.function.__name__
        if self.name_parameters:
            name = self._augment_name(name, kwargs)
        log_dir = self.log_dir
        if log_dir is None:
            log_dir = ('{data}/local/{prefix}/{name}'.format(
                data=os.path.join(os.getcwd(), 'data'),
                prefix=self.prefix,
                name=name))
        log_dir = _make_sequential_log_dir(log_dir)

        tabular_log_file = os.path.join(log_dir, 'progress.csv')
        text_log_file = os.path.join(log_dir, 'debug.log')
        variant_log_file = os.path.join(log_dir, 'variant.json')
        metadata_log_file = os.path.join(log_dir, 'metadata.json')

        dump_json(variant_log_file, kwargs)
        git_root_path, metadata = get_metadata()
        dump_json(metadata_log_file, metadata)
        if git_root_path and self.archive_launch_repo:
            make_launcher_archive(git_root_path=git_root_path, log_dir=log_dir)

        logger.add_output(dowel.TextOutput(text_log_file))
        logger.add_output(dowel.CsvOutput(tabular_log_file))
        logger.add_output(
            dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
        logger.add_output(dowel.StdOutput())

        logger.push_prefix('[{}] '.format(name))
        logger.log('Logging to {}'.format(log_dir))

        return ExperimentContext(snapshot_dir=log_dir,
                                 snapshot_mode=self.snapshot_mode,
                                 snapshot_gap=self.snapshot_gap)
示例#2
0
    def __enter__(self):
        tabular_log_file = os.path.join(self.log_dir, 'progress.csv')
        text_log_file = os.path.join(self.log_dir, 'debug.log')

        logger.add_output(dowel.TextOutput(text_log_file))
        logger.add_output(dowel.CsvOutput(tabular_log_file))
        logger.add_output(dowel.TensorBoardOutput(self.log_dir))
        logger.add_output(dowel.StdOutput())

        logger.push_prefix('[%s] ' % self.exp_name)
        return self
示例#3
0
    def _make_context(cls, options, **kwargs):
        """Make a context from the template information and variant args.

        Currently, all arguments should be keyword arguments.

        Args:
            options (dict): Options to `wrap_experiment` itself. See the
                function documentation for details.
            kwargs (dict): Keyword arguments for the wrapped function. Will be
                logged to `variant.json`

        Returns:
            ExperimentContext: The created experiment context.

        """
        name = options['name']
        if name is None:
            name = options['function'].__name__
        name = cls._augment_name(options, name, kwargs)
        log_dir = options['log_dir']
        if log_dir is None:
            log_dir = ('{data}/local/{prefix}/{name}'.format(
                data=os.path.join(os.getcwd(), 'data'),
                prefix=options['prefix'],
                name=name))
        if options['use_existing_dir']:
            os.makedirs(log_dir, exist_ok=True)
        else:
            log_dir = _make_sequential_log_dir(log_dir)

        tabular_log_file = os.path.join(log_dir, 'progress.csv')
        text_log_file = os.path.join(log_dir, 'debug.log')
        variant_log_file = os.path.join(log_dir, 'variant.json')
        metadata_log_file = os.path.join(log_dir, 'metadata.json')

        dump_json(variant_log_file, kwargs)
        git_root_path, metadata = get_metadata()
        dump_json(metadata_log_file, metadata)
        if git_root_path and options['archive_launch_repo']:
            make_launcher_archive(git_root_path=git_root_path, log_dir=log_dir)

        logger.add_output(dowel.TextOutput(text_log_file))
        logger.add_output(dowel.CsvOutput(tabular_log_file))
        logger.add_output(
            dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
        logger.add_output(dowel.StdOutput())

        logger.push_prefix('[{}] '.format(name))
        logger.log('Logging to {}'.format(log_dir))

        return ExperimentContext(snapshot_dir=log_dir,
                                 snapshot_mode=options['snapshot_mode'],
                                 snapshot_gap=options['snapshot_gap'])
示例#4
0
文件: runner_utils.py 项目: sisl/DICG
def restore_training(log_dir, exp_name, args, env_saved=True, env=None):
    tabular_log_file = os.path.join(
        log_dir, 'progress_restored.{}.{}.csv'.format(
            str(time.time())[:10], socket.gethostname()))
    text_log_file = os.path.join(
        log_dir, 'debug_restored.{}.{}.log'.format(
            str(time.time())[:10], socket.gethostname()))
    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir))
    logger.add_output(dowel.StdOutput())
    logger.push_prefix('[%s] ' % exp_name)

    ctxt = ExperimentContext(snapshot_dir=log_dir,
                             snapshot_mode='last',
                             snapshot_gap=1)

    runner = LocalRunnerWrapper(ctxt,
                                eval=args.eval_during_training,
                                n_eval_episodes=args.n_eval_episodes,
                                eval_greedy=args.eval_greedy,
                                eval_epoch_freq=args.eval_epoch_freq,
                                save_env=env_saved)
    saved = runner._snapshotter.load(log_dir, 'last')
    runner._setup_args = saved['setup_args']
    runner._train_args = saved['train_args']
    runner._stats = saved['stats']

    set_seed(runner._setup_args.seed)
    algo = saved['algo']

    # Compatibility patch
    if not hasattr(algo, '_clip_grad_norm'):
        setattr(algo, '_clip_grad_norm', args.clip_grad_norm)

    if env_saved:
        env = saved['env']

    runner.setup(env=env,
                 algo=algo,
                 sampler_cls=runner._setup_args.sampler_cls,
                 sampler_args=runner._setup_args.sampler_args)
    runner._train_args.start_epoch = runner._stats.total_epoch + 1
    runner._train_args.n_epochs = runner._train_args.start_epoch + args.n_epochs

    print('\nRestored checkpoint from epoch #{}...'.format(
        runner._train_args.start_epoch))
    print('To be trained for additional {} epochs...'.format(args.n_epochs))
    print('Will be finished at epoch #{}...\n'.format(
        runner._train_args.n_epochs))

    return runner._algo.train(runner)
示例#5
0
    def _make_context(self, *args, **kwargs):
        """Make a context from the template information and variant args.

        Currently, all arguments should be keyword arguments.

        Args:
            args (list): Should be empty.
            kwargs (dict): Keyword arguments for the wrapped function. Will be
                logged to `variant.json`

        Returns:
            ExperimentContext: The created experiment context.

        Raises:
            ValueError: If args is not empty.

        """
        if args:
            raise ValueError('metarl.experiment currently only supports '
                             'keyword arguments')
        log_dir = self.log_dir
        if log_dir is None:
            name = self.name
            if name is None:
                name = self.function.__name__
                self.name = self.function.__name__
            log_dir = ('{data}/local/{prefix}/{name}/{time}'.format(
                data=osp.join(os.getcwd(), 'data'),
                prefix=self.prefix,
                name=name,
                time=timestamp))
        log_dir = _make_sequential_log_dir(log_dir)

        tabular_log_file = os.path.join(log_dir, 'progress.csv')
        text_log_file = os.path.join(log_dir, 'debug.log')
        variant_log_file = os.path.join(log_dir, 'variant.json')

        dump_json(variant_log_file, kwargs)

        logger.add_output(dowel.TextOutput(text_log_file))
        logger.add_output(dowel.CsvOutput(tabular_log_file))
        logger.add_output(
            dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
        logger.add_output(dowel.StdOutput())

        logger.push_prefix('[%s] ' % self.name)

        return ExperimentContext(snapshot_dir=log_dir,
                                 snapshot_mode=self.snapshot_mode,
                                 snapshot_gap=self.snapshot_gap)
示例#6
0
def run_experiment(argv):
    """Run experiment.

    Args:
        argv (list[str]): Command line arguments.

    Raises:
        BaseException: Propagate any exception in the experiment.

    """
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=('Number of parallel workers to perform rollouts. '
              "0 => don't start any workers"))
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='last',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_from_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    if args.n_parallel > 0:
        parallel_sampler.initialize(n_parallel=args.n_parallel)
        if args.seed is not None:
            parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = os.path.join(os.path.join(os.getcwd(), 'data'),
                               args.exp_name)
    else:
        log_dir = args.log_dir

    tabular_log_file = os.path.join(log_dir, args.tabular_log_file)
    text_log_file = os.path.join(log_dir, args.text_log_file)
    params_log_file = os.path.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = os.path.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
    logger.add_output(dowel.StdOutput())

    logger.push_prefix('[%s] ' % args.exp_name)

    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode=args.snapshot_mode,
                                     snapshot_gap=args.snapshot_gap)

    method_call = cloudpickle.loads(base64.b64decode(args.args_data))
    try:
        method_call(snapshot_config, variant_data, args.resume_from_dir,
                    args.resume_from_epoch)
    except BaseException:
        children = garage.plotter.Plotter.get_plotters()
        children += garage.tf.plotter.Plotter.get_plotters()
        if args.n_parallel > 0:
            children += [parallel_sampler]
        child_proc_shutdown(children)
        raise

    logger.remove_all()
    logger.pop_prefix()
示例#7
0
def run_metarl(env, envs, tasks, seed, log_dir):
    """Create metarl Tensorflow PPO model and training.

    Args:
        env (dict): Environment of the task.
        seed (int): Random positive integer for the trial.
        log_dir (str): Log dir path.

    Returns:
        str: Path to output csv file

    """
    deterministic.set_seed(seed)
    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode='gap',
                                     snapshot_gap=10)
    with LocalTFRunner(snapshot_config) as runner:
        policy = GaussianGRUPolicy(
            hidden_dims=hyper_parameters['hidden_sizes'],
            env_spec=env.spec,
            state_include_action=False)

        baseline = MetaRLLinearFeatureBaseline(env_spec=env.spec)

        inner_algo = RL2PPO(
            env_spec=env.spec,
            policy=policy,
            baseline=baseline,
            max_path_length=hyper_parameters['max_path_length'] * hyper_parameters['rollout_per_task'],
            discount=hyper_parameters['discount'],
            gae_lambda=hyper_parameters['gae_lambda'],
            lr_clip_range=hyper_parameters['lr_clip_range'],
            optimizer_args=dict(
                max_epochs=hyper_parameters['optimizer_max_epochs'],
                tf_optimizer_args=dict(
                    learning_rate=hyper_parameters['optimizer_lr'],
                ),
            )
        )

        # Need to pass this if meta_batch_size < num_of_tasks
        task_names = list(ML45_ENVS['train'].keys())
        algo = RL2(
            policy=policy,
            inner_algo=inner_algo,
            max_path_length=hyper_parameters['max_path_length'],
            meta_batch_size=hyper_parameters['meta_batch_size'],
            task_sampler=tasks,
            task_names=None if hyper_parameters['meta_batch_size'] >= len(task_names) else task_names)

        # Set up logger since we are not using run_experiment
        tabular_log_file = osp.join(log_dir, 'progress.csv')
        text_log_file = osp.join(log_dir, 'debug.log')
        dowel_logger.add_output(dowel.TextOutput(text_log_file))
        dowel_logger.add_output(dowel.CsvOutput(tabular_log_file))
        dowel_logger.add_output(dowel.StdOutput())
        dowel_logger.add_output(dowel.TensorBoardOutput(log_dir))

        runner.setup(
            algo,
            envs,
            sampler_cls=hyper_parameters['sampler_cls'],
            n_workers=hyper_parameters['meta_batch_size'],
            worker_class=RL2Worker,
            sampler_args=dict(
                use_all_workers=hyper_parameters['use_all_workers']),
            worker_args=dict(
                n_paths_per_trial=hyper_parameters['rollout_per_task']))

        # meta evaluator
        env_obs_dim = [env().observation_space.shape[0] for (_, env) in ML45_ENVS['test'].items()]
        max_obs_dim = max(env_obs_dim)
        ML_test_envs = [
            TaskIdWrapper(NormalizedRewardEnv(RL2Env(env(*ML45_ARGS['test'][task]['args'],
                **ML45_ARGS['test'][task]['kwargs']), max_obs_dim)), task_id=task_id, task_name=task)
            for (task_id, (task, env)) in enumerate(ML45_ENVS['test'].items())
        ]
        test_tasks = task_sampler.EnvPoolSampler(ML_test_envs)
        test_tasks.grow_pool(hyper_parameters['n_test_tasks'])

        test_task_names = list(ML45_ENVS['test'].keys())

        runner.setup_meta_evaluator(test_task_sampler=test_tasks,
                                    n_exploration_traj=hyper_parameters['rollout_per_task'],
                                    n_test_rollouts=hyper_parameters['test_rollout_per_task'],
                                    n_test_tasks=hyper_parameters['n_test_tasks'],
                                    n_workers=hyper_parameters['n_test_tasks'],
                                    test_task_names=None if hyper_parameters['n_test_tasks'] >= len(test_task_names) else test_task_names)

        runner.train(n_epochs=hyper_parameters['n_itr'],
            batch_size=hyper_parameters['meta_batch_size'] * hyper_parameters['rollout_per_task'] * hyper_parameters['max_path_length'])

        dowel_logger.remove_all()

        return tabular_log_file
示例#8
0
"""Minimal example of dowel usage.

This example demonstrates how to log a simple progress metric using dowel.

The metric is simultaneously sent to the screen, a CSV files, a text log file
and TensorBoard.
"""
import time

import dowel
from dowel import logger, tabular

logger.add_output(dowel.StdOutput())
logger.add_output(dowel.CsvOutput('progress.csv'))
logger.add_output(dowel.TextOutput('progress.txt'))
logger.add_output(dowel.TensorBoardOutput('tensorboard_logdir'))

logger.log('Starting up...')
for i in range(1000):
    logger.push_prefix('itr {}: '.format(i))
    logger.log('Running training step')

    time.sleep(0.01)  # Tensorboard doesn't like output to be too fast.

    tabular.record('itr', i)
    tabular.record('loss', 100.0 / (2 + i))
    logger.log(tabular)

    logger.pop_prefix()
    logger.dump_all()
示例#9
0
def run_experiment(argv):
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=('Number of parallel workers to perform rollouts. '
              "0 => don't start any workers"))
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='last',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_from_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')
    parser.add_argument('--use_cloudpickle',
                        type=ast.literal_eval,
                        default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    # SIGINT is blocked for all processes created in parallel_sampler to avoid
    # the creation of sleeping and zombie processes.
    #
    # If the user interrupts run_experiment, there's a chance some processes
    # won't die due to a dead lock condition where one of the children in the
    # parallel sampler exits without releasing a lock once after it catches
    # SIGINT.
    #
    # Later the parent tries to acquire the same lock to proceed with his
    # cleanup, but it remains sleeping waiting for the lock to be released.
    # In the meantime, all the process in parallel sampler remain in the zombie
    # state since the parent cannot proceed with their clean up.
    with mask_signals([signal.SIGINT]):
        if args.n_parallel > 0:
            parallel_sampler.initialize(n_parallel=args.n_parallel)
            if args.seed is not None:
                parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = os.path.join(os.path.join(os.getcwd(), 'data'),
                               args.exp_name)
    else:
        log_dir = args.log_dir

    tabular_log_file = os.path.join(log_dir, args.tabular_log_file)
    text_log_file = os.path.join(log_dir, args.text_log_file)
    params_log_file = os.path.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = os.path.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir))
    logger.add_output(dowel.StdOutput())

    logger.push_prefix('[%s] ' % args.exp_name)

    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode=args.snapshot_mode,
                                     snapshot_gap=args.snapshot_gap)

    # read from stdin
    if args.use_cloudpickle:
        import cloudpickle
        method_call = cloudpickle.loads(base64.b64decode(args.args_data))
        try:
            method_call(snapshot_config, variant_data, args.resume_from_dir,
                        args.resume_from_epoch)
        except BaseException:
            children = garage.plotter.Plotter.get_plotters()
            children += garage.tf.plotter.Plotter.get_plotters()
            if args.n_parallel > 0:
                children += [parallel_sampler]
            child_proc_shutdown(children)
            raise
    else:
        data = pickle.loads(base64.b64decode(args.args_data))
        maybe_iter = concretize(data)
        if is_iterable(maybe_iter):
            for _ in maybe_iter:
                pass

    logger.remove_all()
    logger.pop_prefix()
示例#10
0
def run_metarl(env, seed, log_dir):
    """Create metarl Tensorflow PPO model and training.

    Args:
        env (dict): Environment of the task.
        seed (int): Random positive integer for the trial.
        log_dir (str): Log dir path.

    Returns:
        str: Path to output csv file

    """
    deterministic.set_seed(seed)
    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode='gap',
                                     snapshot_gap=10)
    with LocalTFRunner(snapshot_config) as runner:
        env, task_samplers = _prepare_meta_env(env)

        policy = GaussianGRUPolicy(
            hidden_dims=hyper_parameters['hidden_sizes'],
            env_spec=env.spec,
            state_include_action=False)

        baseline = MetaRLLinearFeatureBaseline(env_spec=env.spec)

        inner_algo = RL2PPO(
            env_spec=env.spec,
            policy=policy,
            baseline=baseline,
            max_path_length=hyper_parameters['max_path_length'] *
            hyper_parameters['rollout_per_task'],
            discount=hyper_parameters['discount'],
            gae_lambda=hyper_parameters['gae_lambda'],
            lr_clip_range=hyper_parameters['lr_clip_range'],
            optimizer_args=dict(
                max_epochs=hyper_parameters['optimizer_max_epochs'],
                tf_optimizer_args=dict(
                    learning_rate=hyper_parameters['optimizer_lr'], ),
            ))

        algo = RL2(policy=policy,
                   inner_algo=inner_algo,
                   max_path_length=hyper_parameters['max_path_length'],
                   meta_batch_size=hyper_parameters['meta_batch_size'],
                   task_sampler=task_samplers)

        # Set up logger since we are not using run_experiment
        tabular_log_file = osp.join(log_dir, 'progress.csv')
        text_log_file = osp.join(log_dir, 'debug.log')
        dowel_logger.add_output(dowel.TextOutput(text_log_file))
        dowel_logger.add_output(dowel.CsvOutput(tabular_log_file))
        dowel_logger.add_output(dowel.StdOutput())
        dowel_logger.add_output(dowel.TensorBoardOutput(log_dir))

        runner.setup(
            algo,
            task_samplers.sample(hyper_parameters['meta_batch_size']),
            sampler_cls=hyper_parameters['sampler_cls'],
            n_workers=hyper_parameters['meta_batch_size'],
            worker_class=RL2Worker,
            sampler_args=dict(
                use_all_workers=hyper_parameters['use_all_workers']),
            worker_args=dict(
                n_paths_per_trial=hyper_parameters['rollout_per_task']))

        runner.setup_meta_evaluator(
            test_task_sampler=task_samplers,
            n_exploration_traj=hyper_parameters['rollout_per_task'],
            n_test_rollouts=hyper_parameters['test_rollout_per_task'],
            n_test_tasks=hyper_parameters['n_test_tasks'],
            n_workers=hyper_parameters['n_test_tasks'])

        runner.train(n_epochs=hyper_parameters['n_itr'],
                     batch_size=hyper_parameters['meta_batch_size'] *
                     hyper_parameters['rollout_per_task'] *
                     hyper_parameters['max_path_length'])

        dowel_logger.remove_all()

        return tabular_log_file
示例#11
0
def main(args):

    if args.output_folder is not None:
        if not os.path.exists(args.output_folder):
            raise ValueError(
                "The folder with the training files does not exist")

    policy_filename = os.path.join(args.output_folder, 'policy.th')
    dynamics_filename = os.path.join(args.output_folder, 'dynamics.th')
    config_filename = os.path.join(args.output_folder, 'config.json')
    # eval_filename = os.path.join(args.output_folder, 'eval.npz')

    text_log_file = os.path.join(args.output_folder, 'test_log.txt')
    tabular_log_file = os.path.join(args.output_folder, 'test_result.csv')

    output_test_folder = args.output_folder + "test" if args.output_folder[
        -1] == '/' else args.output_folder + "/test"

    if os.path.exists(output_test_folder):
        shutil.rmtree(output_test_folder)
    os.makedirs(output_test_folder)

    # Set up logger
    logger.add_output(dowel.StdOutput())
    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(
        dowel.TensorBoardOutput(output_test_folder, x_axis='Batch'))
    logger.log('Logging to {}'.format(output_test_folder))

    with open(config_filename, 'r') as f:
        config = json.load(f)

    seed = config["seed"] if "seed" in config else args.seed
    if seed is not None:
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        random.seed(args.seed)

    # Metaworld
    if config['env-name'].startswith('Metaworld'):
        env_name = config['env-name'].replace("Metaworld-", "")
        metaworld = __import__('metaworld')
        class_ = getattr(metaworld, env_name)
        metaworld_benchmark = class_()
        for name, env_cls in metaworld_benchmark.train_classes.items():
            env = env_cls()
            env.close()
        benchmark = metaworld_benchmark
    # Other gym envs
    else:
        env_name = config['env-name']
        env = gym.make(config['env-name'], **config.get('env-kwargs', {}))
        env.close()
        benchmark = None

    # Policy
    policy = get_policy_for_env(env,
                                hidden_sizes=config['hidden-sizes'],
                                nonlinearity=config['nonlinearity'])

    with open(policy_filename, 'rb') as f:
        state_dict = torch.load(f, map_location=torch.device(args.device))
        policy.load_state_dict(state_dict)
    policy.share_memory()

    # Dynamics
    dynamics = get_dynamics_for_env(env,
                                    config['use_vime'],
                                    config['use_inv_vime'],
                                    args.device,
                                    config,
                                    benchmark=benchmark)
    inverse_dynamics = config['use_inv_vime']
    use_dynamics = config["use_vime"] or config["use_inv_vime"]

    if use_dynamics:
        with open(dynamics_filename, 'rb') as f:
            state_dict = torch.load(f, map_location=torch.device(args.device))
            dynamics.load_state_dict(state_dict)
        dynamics.share_memory()

    # Eta
    if config['adapt_eta']:
        eta_value = torch.Tensor([config["adapted-eta"]])
    else:
        eta_value = torch.Tensor([config["eta"]])
    eta_value = torch.log(eta_value / (1 - eta_value))
    eta = EtaParameter(eta_value, adapt_eta=config['adapt_eta'])
    eta.share_memory()

    # Baseline
    baseline = LinearFeatureBaseline(get_input_size(env))

    # Sampler
    normalize_spaces = config[
        "normalize-spaces"] if "normalize-spaces" in config else True
    act_prev_mean = mp.Manager().list()
    obs_prev_mean = mp.Manager().list()

    # Sampler
    if normalize_spaces:
        obs_prev_mean.append({
            "mean": torch.Tensor(config["obs_mean"]),
            "std": torch.Tensor(config["obs_std"])
        })
        act_prev_mean.append({
            "mean": torch.Tensor(config["act_mean"]),
            "std": torch.Tensor(config["act_std"])
        })

    epochs_counter = mp.Value('i', 100)

    sampler = MultiTaskSampler(
        config['env-name'],
        env_kwargs=config.get('env-kwargs', {}),
        batch_size=config['fast-batch-size'],  # TODO
        policy=policy,
        baseline=baseline,
        dynamics=dynamics,
        inverse_dynamics=inverse_dynamics,
        env=env,
        seed=args.seed,
        num_workers=args.num_workers,
        epochs_counter=epochs_counter,
        act_prev_mean=act_prev_mean,
        obs_prev_mean=obs_prev_mean,
        # rew_prev_mean=rew_prev_mean,
        eta=eta,
        benchmark=benchmark,
        normalize_spaces=normalize_spaces)

    logs = {'tasks': []}
    train_returns, valid_returns = [], []
    for batch in trange(args.num_batches):
        tasks = sampler.sample_test_tasks(num_tasks=config['meta-batch-size'])
        train_episodes, valid_episodes = sampler.sample(
            tasks,
            num_steps=args.num_steps,
            fast_lr=config['fast-lr'],
            gamma=config['gamma'],
            gae_lambda=config['gae-lambda'],
            device=args.device)

        logs['tasks'].extend(tasks)
        train_returns.append(get_returns(train_episodes[0]))
        valid_returns.append(get_returns(valid_episodes))

        logs['train_returns'] = np.concatenate(train_returns, axis=0)
        logs['valid_returns'] = np.concatenate(valid_returns, axis=0)

        tabular.record("Batch", batch)

        log_returns(train_episodes,
                    valid_episodes,
                    batch,
                    log_dynamics=use_dynamics,
                    benchmark=benchmark,
                    env=env,
                    env_name=env_name,
                    is_testing=True)
        log_trajectories(config['env-name'], output_test_folder,
                         train_episodes, valid_episodes, batch)

        logger.log(tabular)

        logger.dump_all()

        # with open(eval_filename + "_" + str(batch), 'wb') as f:
        #     np.savez(f, **logs)

    logger.remove_all()