def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)
    #args.env="MountainCarContinuous-v0"
    train_copos(args)
Пример #2
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: run.parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    train(args, extra_args)
Пример #3
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)
    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        def initialize_placeholders(nlstm=128,**kwargs):
            return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1))
        state, dones = initialize_placeholders(**extra_args)
        while True:
            actions, _, state, _ = model.step(obs,S=state, M=dones)
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()

        env.close()
Пример #4
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    ppo_parser = ppo_arg_parser()
    network_parser = network_arg_parser()
    args, ppo_args = arg_parser.parse_known_args(args)
    ppo_args, network_args = ppo_parser.parse_known_args(ppo_args)
    network_args, extra_args = network_parser.parse_known_args(network_args)
    extra_args = parse_cmdline_kwargs(extra_args)

    dt = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    save_dir = osp.join(os.getenv('OPENAI_LOGDIR'), args.env, dt)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure(dir=save_dir)
        with open(osp.join(logger.get_dir(), 'run.conf'), 'wt') as fh:
            print(datetime.now().isoformat(), file=fh)
            print(json.dumps(vars(args), indent=2), file=fh)
            print(json.dumps(vars(ppo_args), indent=2), file=fh)
            print(json.dumps(vars(network_args), indent=2), file=fh)
            if extra_args:
                print(json.dumps(vars(extra_args), indent=2), file=fh)
    else:
        logger.configure(format_strs=[])  # disable logging
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, ppo_args, network_args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.join(save_dir, '{}_{}'.format(args.env,
                                                      args.num_timesteps))
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)
    train_copos(args)