def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)
    #args.env="MountainCarContinuous-v0"
    train_copos(args)
Exemplo n.º 2
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)

    args.num_timesteps = 0
    args.play = True
    args.env = 'YamaXRealForwardWalk-v0'

    model, env = train(args, extra_args)
    env.close()

    env = build_env(args)
    obs = env.reset()

    def initialize_placeholders(nlstm=128, **kwargs):
        return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1))

    state, dones = initialize_placeholders(**extra_args)
    while True:
        actions, _, state, _ = model.step(obs, S=state, M=dones)
        obs, _, done, _ = env.step(actions)
        env.render()
        done = done.any() if isinstance(done, np.ndarray) else done

        if done:
            obs = env.reset()

    env.close()
Exemplo n.º 3
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        #rank = 0
        #logger.configure()
        #logger.configure(dir=extra_args['logdir'])
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()
    env.close()
    return model
Exemplo n.º 4
0
def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    base_dir = extra_args["base_dir"]
    name = extra_args["name"]
    results = dict()

    save_dir = set_default_value(extra_args, 'save_dir', "/")

    std_type = 1
    d = 10
    f = 1
    trail = 1

    for method in ['scrb', 'plain']:
        log_pattern, legend_name = method_to_log_pattern(method)
        log_files = extract_log_files(base_dir, patterns=[name, log_pattern])
        values = []
        stds = []
        length = np.inf
        for logfile in log_files:
            value = parse_log(logfile, field_name="test/hit_time_mean", normalize=False, dilute_fact=d, f=f)[:-trail]
            std = parse_log(logfile, field_name="test/hit_time_std", normalize=False, dilute_fact=d, f=f)[:-trail]
            values.append(value)
            stds.append(std)
            if len(value) < length:
                length = len(value)
            # std = parse_log(f"{scrb_log_dir}/log.txt", field_name="test/hit_time_std", normalize=False, dilute_fact=d, f=f)[:-trail]

        values = [value[:length] for value in values]
        stds = [std[:length] for std in stds]

        if std_type == 1:
            standard_deviation = np.mean(stds, axis=0)
        else:
            standard_deviation = np.std(values, axis=0)
        results[method] = dict()
        results[method]["mean"] = np.mean(values, axis=0)
        results[method]["std"] = standard_deviation
        results[method]["xscale"] = d
        results[method]["name"] = legend_name

    plot(results, save_dir)
Exemplo n.º 5
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    args.num_env = 1
    extra_args = parse_cmdline_kwargs(unknown_args)

    model, env = train(args, extra_args)
    env.close()
    logger.log("Running trained model")
    env = build_env(args)
    if not args.play:
        ts = time.gmtime()
        directory = time.strftime("./render/%s", ts)
        logger.log("Output video to directory:", directory)
        env.envs = [gym.wrappers.Monitor(env.envs[0], directory=directory)]
    obs = env.reset()

    def initialize_placeholders(nlstm=128, **kwargs):
        return np.zeros((args.num_env, 2 * nlstm)), np.zeros((1))

    state, dones = initialize_placeholders(**extra_args)
    NUM_VIDEO = 1
    while True:
        actions, _, state, _ = model.step(obs, S=state, M=dones)
        obs, _, done, _ = env.step(actions)
        if args.play:
            env.render()
        done = done.any() if isinstance(done, np.ndarray) else done

        if done:
            NUM_VIDEO -= 1
            if NUM_VIDEO <= 0:
                break
            obs = env.reset()

    env.close()
Exemplo n.º 6
0
def parse_args():
    parser = argparse.ArgumentParser(
        "Reinforcement Learning experiments for multiagent environments")
    # Environment
    parser.add_argument("--scenario",
                        type=str,
                        default="simple",
                        help="name of the scenario script")
    parser.add_argument("--max-episode-len",
                        type=int,
                        default=25,
                        help="maximum episode length")
    parser.add_argument("--num-episodes",
                        type=int,
                        default=60000,
                        help="number of episodes")
    parser.add_argument("--num-adversaries",
                        type=int,
                        default=None,
                        help="number of adversaries")
    parser.add_argument("--good-policy",
                        type=str,
                        default="maddpg",
                        help="policy for good agents")
    parser.add_argument("--adv-policy",
                        type=str,
                        default="maddpg",
                        help="policy of adversaries")
    # Core training parameters
    parser.add_argument("--lr",
                        type=float,
                        default=1e-2,
                        help="learning rate for Adam optimizer")
    parser.add_argument("--gamma",
                        type=float,
                        default=0.95,
                        help="discount factor")
    parser.add_argument("--batch-size",
                        type=int,
                        default=1024,
                        help="number of episodes to optimize at the same time")
    parser.add_argument("--num-units",
                        type=int,
                        nargs="+",
                        default=[64, 64],
                        help="number of units in the mlp")
    # Checkpointing
    parser.add_argument("--exp-name",
                        type=str,
                        default=None,
                        help="name of the experiment")
    parser.add_argument(
        "--save-dir",
        type=str,
        default=None,
        help="directory in which training state and model should be saved")
    parser.add_argument(
        "--save-rate",
        type=int,
        default=10000,
        help="save model once every time this many episodes are completed")
    parser.add_argument(
        "--print-rate",
        type=int,
        default=1000,
        help=
        "print training scalars once every time this many episodes are completed"
    )
    parser.add_argument(
        "--load-dir",
        type=str,
        default=None,
        help="directory in which training state and model are loaded")
    # Evaluation
    parser.add_argument("--restore", action="store_true", default=False)
    parser.add_argument("--display", action="store_true", default=False)
    parser.add_argument("--save-render-images",
                        action="store_true",
                        default=False)
    parser.add_argument("--render-dir",
                        type=str,
                        default=None,
                        help="directory in which render image should be saved")
    parser.add_argument("--benchmark", action="store_true", default=False)
    parser.add_argument("--benchmark-iters",
                        type=int,
                        default=100000,
                        help="number of iterations run for benchmarking")
    parser.add_argument("--benchmark-dir",
                        type=str,
                        default=None,
                        help="directory where benchmark data is saved")
    parser.add_argument("--plots-dir",
                        type=str,
                        default=None,
                        help="directory where plot data is saved")
    args, unknown_args = parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)

    if args.exp_name is None:
        args.exp_name = "experiment-{}".format(args.scenario)
    if args.save_dir is None:
        args.save_dir = os.path.join(logger.get_dir(), "checkpoints")
    if (args.render_dir is None) and (args.load_dir is not None):
        args.render_dir = args.load_dir + "-render"
    if args.benchmark_dir is None:
        args.benchmark_dir = os.path.join(logger.get_dir(), "benchmark_files")
    if args.plots_dir is None:
        args.plots_dir = os.path.join(logger.get_dir(), "learning_curves")

    if not args.display:
        os.makedirs(args.save_dir, exist_ok=True)
    if args.save_render_images:
        os.makedirs(args.render_dir, exist_ok=True)
    if args.benchmark:
        os.makedirs(args.benchmark_dir, exist_ok=True)
    if not args.display:
        os.makedirs(args.plots_dir, exist_ok=True)

    return args, extra_args
Exemplo n.º 7
0
def main(args):
    # print("\n\n\n\n\nXXX")
    # print(sys.path)
    # import baselines
    # print(baselines.__file__())
    # for varname in ['PMI_RANK', 'OMPI_COMM_WORLD_RANK']:
    #     if varname in os.environ:
    #         print(varname, int(os.environ[varname]))
    # print("parsing args...")

    arg_parser = init_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)

    # if args.num_cpu > 1:
    if args.allow_run_as_root:
        whoami = mpi_fork_run_as_root(args.num_cpu,
                                      bind_to_core=args.bind_to_core)
    else:
        whoami = mpi_fork(args.num_cpu, bind_to_core=args.bind_to_core)
    if whoami == 'parent':
        print('parent exiting with code 0...')
        sys.exit(0)

    U.single_threaded_session().__enter__()

    rank = MPI.COMM_WORLD.Get_rank()

    # assert MPI.COMM_WORLD.Get_size() == args.num_cpu, MPI.COMM_WORLD.Get_size()

    # configure logger
    # rank = MPI.COMM_WORLD.Get_rank()  # FIXME: how to log when rank != 0??
    # if rank == 0:
    configure_logger(args.log_path, format_strs=[])
    logger.info(f"main: {rank} / {MPI.COMM_WORLD.Get_size()}")
    logger.info(f"logger dir: {logger.get_dir()}")

    extra_args = parse_cmdline_kwargs(unknown_args)
    logger.info(args, extra_args)

    # else:
    #     configure_logger(log_path=None)  # or still args.log_path?

    # raise RuntimeError(f"tf session: {tf.get_default_session()}, {MPI.COMM_WORLD.Get_rank()} / {MPI.COMM_WORLD.Get_size()}")

    def make_wrapped_env():
        env = gym.make(args.env)
        if args.env_type == 'maze':
            pass
        elif args.env_type == 'robotics':
            from baselines.envs.goal_sampler_env_wrapper import GoalSamplerEnvWrapper
            env = GoalSamplerEnvWrapper(env)
        elif args.env_type == 'ant':
            env = GoalExplorationEnv(env=env,
                                     only_feasible=True,
                                     extend_dist_rew=0,
                                     inner_weight=0,
                                     goal_weight=1)
        else:
            raise NotImplementedError(args.env_type)
        # FIXME: if resample space is feasible, can set only_feasible = False to avoid unnecessary computation
        return env

    venv_kwargs = dict(
        make_wrapped_env=make_wrapped_env,
        seed=args.seed,
        reward_scale=args.reward_scale,
        flatten_dict_observations=False,
        mpi_rank=rank,
        monitor_log_dir=args.log_path,  # FIXME
    )
    venv = make_vec_env(num_env=args.num_env, **venv_kwargs)
    eval_venv = make_vec_env(num_env=args.num_env, **venv_kwargs)
    if args.debug:
        plotter_venv = make_vec_env(num_env=1, **venv_kwargs)
    else:
        plotter_venv = None

    # Seed everything.
    rank_seed = args.seed + 1000000 * rank if args.seed is not None else None
    set_global_seeds(rank_seed)
    logger.info(f'setting global rank: {rank_seed} ')

    # Prepare params.
    params = dict()
    params.update(config.DEFAULT_PARAMS)
    params.update(config.DEFAULT_ENV_PARAMS[args.env])
    params.update(**extra_args)  # makes it possible to override any parameter

    # if args.debug:
    #     params['n_cycles'] = 2
    #     params['n_batches'] = 2
    #     params['ve_n_batches'] = 2
    #     params['size_ensemble'] = 2

    # env settings
    params['env_name'] = args.env
    params['num_cpu'] = args.num_cpu
    params['rollout_batch_size'] = args.num_env
    params['timesteps_per_cpu'] = int(args.num_timesteps)

    with open(os.path.join(logger.get_dir(), 'params.json'), 'w') as f:
        json.dump(params, f)

    params['make_env'] = make_wrapped_env

    learn_fun_return = learn(
        venv=venv,
        eval_venv=eval_venv,
        plotter_venv=plotter_venv,
        params=params,
        save_path=args.log_path,
        save_interval=args.save_interval,
    )

    if rank == 0:
        save_path = os.path.expanduser(logger.get_dir())
        for k, v in learn_fun_return.items():
            v.save(os.path.join(save_path, f"final-{k}.joblib"))

    venv.close()
    eval_venv.close()
    if plotter_venv is not None:
        plotter_venv.close()
Exemplo n.º 8
0
import numpy as np
import os
import shutil
from argparse import Namespace

from baselines.run import build_env, train, parse_cmdline_kwargs
from baselines.a2c.a2c import Model
from baselines.common.cmd_util import common_arg_parser

print("Running trained model")
arg_parser = common_arg_parser()
args, unknown_args = arg_parser.parse_known_args()
extra_args = parse_cmdline_kwargs(unknown_args)
model, env = train(args, extra_args)

# Load the data to test on
data = np.load('test_images.npy')
tests = len(data)

# Build environment
env = build_env(args)
env.envs[0].env.env.phase = 'test'

# Load model
model.load(args.env + args.alg)

# Test model on all images in dataset
path = "/tmp/movements"
norm_steps = []
fail = 0
# for j in range(100):
def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)
    train_copos(args)