Exemplo n.º 1
0
def test_meta_evaluator_with_tf():
    set_seed(100)
    tasks = SetTaskSampler(PointEnv, wrapper=set_length)
    max_episode_length = 200
    env = PointEnv()
    n_eps = 3
    with tempfile.TemporaryDirectory() as log_dir_name:
        ctxt = SnapshotConfig(snapshot_dir=log_dir_name,
                              snapshot_mode='none',
                              snapshot_gap=1)
        with TFTrainer(ctxt) as trainer:
            meta_eval = MetaEvaluator(test_task_sampler=tasks,
                                      n_test_tasks=10,
                                      n_exploration_eps=n_eps)
            policy = GaussianMLPPolicy(env.spec)
            algo = MockAlgo(env, policy, max_episode_length, n_eps, meta_eval)
            trainer.setup(algo, env)
            log_file = tempfile.NamedTemporaryFile()
            csv_output = CsvOutput(log_file.name)
            logger.add_output(csv_output)
            meta_eval.evaluate(algo)
            algo_pickle = cloudpickle.dumps(algo)
        tf.compat.v1.reset_default_graph()
        with TFTrainer(ctxt) as trainer:
            algo2 = cloudpickle.loads(algo_pickle)
            trainer.setup(algo2, env)
            trainer.train(10, 0)
Exemplo n.º 2
0
def test_meta_evaluator():
    set_seed(100)
    tasks = SetTaskSampler(PointEnv, wrapper=set_length)
    max_episode_length = 200
    with tempfile.TemporaryDirectory() as log_dir_name:
        trainer = Trainer(
            SnapshotConfig(snapshot_dir=log_dir_name,
                           snapshot_mode='last',
                           snapshot_gap=1))
        env = PointEnv(max_episode_length=max_episode_length)
        algo = OptimalActionInference(env=env,
                                      max_episode_length=max_episode_length)
        trainer.setup(algo, env)
        meta_eval = MetaEvaluator(test_task_sampler=tasks, n_test_tasks=10)
        log_file = tempfile.NamedTemporaryFile()
        csv_output = CsvOutput(log_file.name)
        logger.add_output(csv_output)
        meta_eval.evaluate(algo)
        logger.log(tabular)
        meta_eval.evaluate(algo)
        logger.log(tabular)
        logger.dump_output_type(CsvOutput)
        logger.remove_output_type(CsvOutput)
        with open(log_file.name, 'r') as file:
            rows = list(csv.DictReader(file))
        assert len(rows) == 2
        assert float(
            rows[0]['MetaTest/__unnamed_task__/TerminationRate']) < 1.0
        assert float(rows[0]['MetaTest/__unnamed_task__/Iteration']) == 0
        assert (float(rows[0]['MetaTest/__unnamed_task__/MaxReturn']) >= float(
            rows[0]['MetaTest/__unnamed_task__/AverageReturn']))
        assert (float(rows[0]['MetaTest/__unnamed_task__/AverageReturn']) >=
                float(rows[0]['MetaTest/__unnamed_task__/MinReturn']))
        assert float(rows[1]['MetaTest/__unnamed_task__/Iteration']) == 1
Exemplo n.º 3
0
def test_meta_evaluator_with_tf():
    set_seed(100)
    tasks = SetTaskSampler(lambda: GarageEnv(PointEnv()))
    max_path_length = 200
    env = GarageEnv(PointEnv())
    n_traj = 3
    with tempfile.TemporaryDirectory() as log_dir_name:
        ctxt = SnapshotConfig(snapshot_dir=log_dir_name,
                              snapshot_mode='none',
                              snapshot_gap=1)
        with LocalTFRunner(ctxt) as runner:
            meta_eval = MetaEvaluator(test_task_sampler=tasks,
                                      max_path_length=max_path_length,
                                      n_test_tasks=10,
                                      n_exploration_traj=n_traj)
            policy = GaussianMLPPolicy(env.spec)
            algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval)
            runner.setup(algo, env)
            log_file = tempfile.NamedTemporaryFile()
            csv_output = CsvOutput(log_file.name)
            logger.add_output(csv_output)
            meta_eval.evaluate(algo)
            algo_pickle = cloudpickle.dumps(algo)
        with tf.Graph().as_default():
            with LocalTFRunner(ctxt) as runner:
                algo2 = cloudpickle.loads(algo_pickle)
                runner.setup(algo2, env)
                runner.train(10, 0)
Exemplo n.º 4
0
def test_pickle_meta_evaluator():
    set_seed(100)
    tasks = SetTaskSampler(lambda: GarageEnv(PointEnv()))
    max_path_length = 200
    env = GarageEnv(PointEnv())
    n_traj = 3
    with tempfile.TemporaryDirectory() as log_dir_name:
        runner = LocalRunner(
            SnapshotConfig(snapshot_dir=log_dir_name,
                           snapshot_mode='last',
                           snapshot_gap=1))
        meta_eval = MetaEvaluator(test_task_sampler=tasks,
                                  max_path_length=max_path_length,
                                  n_test_tasks=10,
                                  n_exploration_traj=n_traj)
        policy = RandomPolicy(env.spec.action_space)
        algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval)
        runner.setup(algo, env)
        log_file = tempfile.NamedTemporaryFile()
        csv_output = CsvOutput(log_file.name)
        logger.add_output(csv_output)
        meta_eval.evaluate(algo)
        meta_eval_pickle = cloudpickle.dumps(meta_eval)
        meta_eval2 = cloudpickle.loads(meta_eval_pickle)
        meta_eval2.evaluate(algo)
 def setup_method(self):
     super().setup_method()
     self.temp_dir = tempfile.TemporaryDirectory()
     snapshot_config = SnapshotConfig(snapshot_dir=self.temp_dir.name,
                                      snapshot_mode='all',
                                      snapshot_gap=1)
     fixture_exp(snapshot_config, self.sess)
     for c in self.graph.collections:
         self.graph.clear_collection(c)
Exemplo n.º 6
0
def test_dqn_cartpole(setup):
    tempdir = tempfile.TemporaryDirectory()
    config = SnapshotConfig(snapshot_dir=tempdir.name,
                            snapshot_mode='last',
                            snapshot_gap=1)

    trainer = Trainer(config)
    algo, env, _, n_epochs, batch_size = setup
    trainer.setup(algo, env, sampler_cls=LocalSampler)
    last_avg_return = trainer.train(n_epochs=n_epochs, batch_size=batch_size)
    assert last_avg_return > 10
    env.close()

    # test resume from snapshot
    trainer.restore(tempdir.name)
    trainer.resume(n_epochs=1, batch_size=batch_size)
Exemplo n.º 7
0
def test_meta_evaluator_n_traj():
    set_seed(100)
    tasks = SetTaskSampler(PointEnv)
    max_path_length = 200
    env = GarageEnv(PointEnv())
    n_traj = 3
    with tempfile.TemporaryDirectory() as log_dir_name:
        runner = LocalRunner(
            SnapshotConfig(snapshot_dir=log_dir_name,
                           snapshot_mode='last',
                           snapshot_gap=1))
        algo = MockAlgo(env, max_path_length, n_traj)
        runner.setup(algo, env)
        meta_eval = MetaEvaluator(runner,
                                  test_task_sampler=tasks,
                                  max_path_length=max_path_length,
                                  n_test_tasks=10,
                                  n_exploration_traj=n_traj)
        log_file = tempfile.NamedTemporaryFile()
        csv_output = CsvOutput(log_file.name)
        logger.add_output(csv_output)
        meta_eval.evaluate(algo)
Exemplo n.º 8
0
def run_experiment(argv):
    """Run experiment.

    Args:
        argv (list[str]): Command line arguments.

    Raises:
        BaseException: Propagate any exception in the experiment.

    """
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=('Number of parallel workers to perform rollouts. '
              "0 => don't start any workers"))
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='last',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_from_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    if args.n_parallel > 0:
        parallel_sampler.initialize(n_parallel=args.n_parallel)
        if args.seed is not None:
            parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = os.path.join(os.path.join(os.getcwd(), 'data'),
                               args.exp_name)
    else:
        log_dir = args.log_dir

    tabular_log_file = os.path.join(log_dir, args.tabular_log_file)
    text_log_file = os.path.join(log_dir, args.text_log_file)
    params_log_file = os.path.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = os.path.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
    logger.add_output(dowel.StdOutput())

    logger.push_prefix('[%s] ' % args.exp_name)

    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode=args.snapshot_mode,
                                     snapshot_gap=args.snapshot_gap)

    method_call = cloudpickle.loads(base64.b64decode(args.args_data))
    try:
        method_call(snapshot_config, variant_data, args.resume_from_dir,
                    args.resume_from_epoch)
    except BaseException:
        children = garage.plotter.Plotter.get_plotters()
        children += garage.tf.plotter.Plotter.get_plotters()
        if args.n_parallel > 0:
            children += [parallel_sampler]
        child_proc_shutdown(children)
        raise

    logger.remove_all()
    logger.pop_prefix()
Exemplo n.º 9
0
def run_task(snapshot_config, exp_config):
    logger.log(f"Config of this experiment is {exp_config}")
    env_config = exp_config["env"]
    env_name = env_config["name"]
    replay_buffer_size = exp_config.get("replay_buffer_size")
    n_epochs = exp_config.get("n_epochs")
    steps_per_epoch = exp_config.get("steps_per_epoch")
    sampler_batch_size = exp_config.get("sampler_batch_size")
    n_train_steps = exp_config.get("n_train_steps")
    learning_rate = exp_config.get("learning_rate")
    buffer_batch_size = exp_config.get("buffer_batch_size")
    target_network_update_freq = exp_config.get("target_network_update_freq")
    min_buffer_size = exp_config.get("min_buffer_size")
    net_config = exp_config.get("q-net")
    loss_weights = exp_config.get("loss_weights")
    deepmdp_config = exp_config.get("deepmdp")
    epsilon_greedy_config = exp_config.get("epsilon_greedy")
    plots = exp_config.get("plots")
    steps = n_epochs * steps_per_epoch * sampler_batch_size
    num_frames = env_config.get("num_frames")
    if num_frames is None:
        num_frames = 4
    snapshot_config = SnapshotConfig(
        os.path.join(os.getcwd(), f'runs/{get_info()}/snapshots'),
        snapshot_config["snapshot_mode"], snapshot_config["snapshot_gap"])

    if "LunarLander-v2" in env_name:
        # Pass either LunarLander-v2 or LunarLander-v2-img to have the env give back image or semantical observations.
        if env_name[-4:] == "-img":
            env = setup_lunar_lander_with_image_obs(
                env_name[:-4], num_frames, do_noops=env_config["do_noops"])
        elif env_name[-4:] == "-obf":
            env = setup_lunar_lander_with_obfuscated_states(
                env_name[:-4], num_frames, do_noops=env_config["do_noops"])
        elif env_name[-4:] == "-stk":
            env = setup_stacked_lunar_lander_env(
                env_name[:-4], num_frames, normalize=env_config["normalize"])
        else:
            env = GarageEnv(gym.make(env_name))
    elif "SpaceInvaders-v0" == env_name:
        env = setup_atari_env(env_name, num_frames)
    else:
        raise ValueError("Env name not known")

    # Set env seed
    env.seed(get_seed())
    # Set seed for action space (needed for epsilon-greedy reproducability)
    env.action_space.seed(get_seed())

    # Init visualizer
    visualizer = Visualizer(get_info() + "_main", plots)
    visualizer.publish_config(exp_config)

    runner = LocalRunner(snapshot_config)
    replay_buffer = SimpleReplayBuffer(env.spec,
                                       size_in_transitions=replay_buffer_size,
                                       time_horizon=1)

    strategy = EpsilonGreedyStrategy(env.spec, steps, **epsilon_greedy_config)
    qf = DiscreteCNNQFunction(env_spec=env.spec, **net_config)

    aux_objectives = []
    if deepmdp_config["use"]:
        reward_objective = RewardAuxiliaryObjective(
            env.spec, qf.embedding_size, deepmdp_config["reward_head"])
        transition_objective = TransitionAuxiliaryObjective(
            env.spec, qf.embedding_size, deepmdp_config["transition_head"])
        aux_objectives.append(reward_objective)
        aux_objectives.append(transition_objective)

    policy = DiscreteQfDerivedPolicy(env.spec, qf)
    algo = DQN(policy=policy,
               qf=qf,
               env_spec=env.spec,
               experiment_id=get_info(),
               plot_list=plots,
               visualizer=visualizer,
               replay_buffer=replay_buffer,
               qf_optimizer=torch.optim.Adam,
               exploration_strategy=strategy,
               n_train_steps=n_train_steps,
               buffer_batch_size=buffer_batch_size,
               min_buffer_size=min_buffer_size,
               n_epoch_cycles=steps_per_epoch,
               target_network_update_freq=target_network_update_freq,
               qf_lr=learning_rate,
               max_path_length=1000,
               auxiliary_objectives=aux_objectives,
               **loss_weights)

    # Use modded off policy sampler for passing generating summary statistics about episode's qvals in algo-object.
    runner.setup(algo=algo, env=env, sampler_cls=OffPolicyVectorizedSampler)
    runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)

    # Bypass GarageEnv>>close as this requires a display
    env.env.close()
Exemplo n.º 10
0
def run_experiment(argv):
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=('Number of parallel workers to perform rollouts. '
              "0 => don't start any workers"))
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='last',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_from_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')
    parser.add_argument('--use_cloudpickle',
                        type=ast.literal_eval,
                        default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    # SIGINT is blocked for all processes created in parallel_sampler to avoid
    # the creation of sleeping and zombie processes.
    #
    # If the user interrupts run_experiment, there's a chance some processes
    # won't die due to a dead lock condition where one of the children in the
    # parallel sampler exits without releasing a lock once after it catches
    # SIGINT.
    #
    # Later the parent tries to acquire the same lock to proceed with his
    # cleanup, but it remains sleeping waiting for the lock to be released.
    # In the meantime, all the process in parallel sampler remain in the zombie
    # state since the parent cannot proceed with their clean up.
    with mask_signals([signal.SIGINT]):
        if args.n_parallel > 0:
            parallel_sampler.initialize(n_parallel=args.n_parallel)
            if args.seed is not None:
                parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = os.path.join(os.path.join(os.getcwd(), 'data'),
                               args.exp_name)
    else:
        log_dir = args.log_dir

    tabular_log_file = os.path.join(log_dir, args.tabular_log_file)
    text_log_file = os.path.join(log_dir, args.text_log_file)
    params_log_file = os.path.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = os.path.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir))
    logger.add_output(dowel.StdOutput())

    logger.push_prefix('[%s] ' % args.exp_name)

    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode=args.snapshot_mode,
                                     snapshot_gap=args.snapshot_gap)

    # read from stdin
    if args.use_cloudpickle:
        import cloudpickle
        method_call = cloudpickle.loads(base64.b64decode(args.args_data))
        try:
            method_call(snapshot_config, variant_data, args.resume_from_dir,
                        args.resume_from_epoch)
        except BaseException:
            children = garage.plotter.Plotter.get_plotters()
            children += garage.tf.plotter.Plotter.get_plotters()
            if args.n_parallel > 0:
                children += [parallel_sampler]
            child_proc_shutdown(children)
            raise
    else:
        data = pickle.loads(base64.b64decode(args.args_data))
        maybe_iter = concretize(data)
        if is_iterable(maybe_iter):
            for _ in maybe_iter:
                pass

    logger.remove_all()
    logger.pop_prefix()
Exemplo n.º 11
0
        self._evolution_strategy.tell(
            self._shared_params,
            -np.array(self._all_returns))  # Report back results
        self.policy.set_param_values(
            self._evolution_strategy.best.get()
            [0])  # TODO: DOES NOTHING, as is overwritten everywhere

        self._all_returns.clear()  # Clear for next epoch
        self._resample_shared_parameters()


if __name__ == "__main__":

    path = PROJECT_APP_PATH.user_data / "data" / "local" / "experiment"
    snapshot_config = SnapshotConfig(snapshot_dir=path,
                                     snapshot_mode="last",
                                     snapshot_gap=1)

    def stest_cma_es_cartpole():
        """Test CMAES with Cartpole-v1 environment."""
        with LocalTFRunner(snapshot_config) as runner:
            with TensorBoardPytorchWriter(PROJECT_APP_PATH.user_log /
                                          "CMA") as writer:
                env = GarageEnv(env_name="CartPole-v1")

                algo = CovarianceMatrixAdaptationEvolutionStrategyAgent(
                    env_spec=env.spec, max_rollout_length=100)
                algo.build()

                runner.setup(algo, env, sampler_cls=LocalSampler)
                runner.train(n_epochs=1, batch_size=1000)