Exemplo n.º 1
0
def main(args):
    parser = get_config()
    all_args = parse_args(args, parser)

    if all_args.algorithm_name == "rmappo":
        assert (all_args.use_recurrent_policy or all_args.use_naive_recurrent_policy), ("check recurrent policy!")
    elif all_args.algorithm_name == "mappo":
        assert (all_args.use_recurrent_policy == False and all_args.use_naive_recurrent_policy == False), (
            "check recurrent policy!")
    else:
        raise NotImplementedError

    # cuda
    if all_args.cuda and torch.cuda.is_available():
        print("choose to use gpu...")
        device = torch.device("cuda:0")
        torch.set_num_threads(all_args.n_training_threads)
        if all_args.cuda_deterministic:
            torch.backends.cudnn.benchmark = False
            torch.backends.cudnn.deterministic = True
    else:
        print("choose to use cpu...")
        device = torch.device("cpu")
        torch.set_num_threads(all_args.n_training_threads)

    # run dir
    run_dir = Path(os.path.split(os.path.dirname(os.path.abspath(__file__)))[
                       0] + "/results") / all_args.env_name / all_args.hanabi_name / all_args.algorithm_name / all_args.experiment_name
    if not run_dir.exists():
        os.makedirs(str(run_dir))

    # wandb
    if all_args.use_wandb:
        run = wandb.init(config=all_args,
                         project=all_args.env_name,
                         entity=all_args.user_name,
                         notes=socket.gethostname(),
                         name=str(all_args.algorithm_name) + "_" +
                              str(all_args.experiment_name) +
                              "_seed" + str(all_args.seed),
                         group=all_args.hanabi_name,
                         dir=str(run_dir),
                         job_type="training",
                         reinit=True)
    else:
        if not run_dir.exists():
            curr_run = 'run1'
        else:
            exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in run_dir.iterdir() if
                             str(folder.name).startswith('run')]
            if len(exst_run_nums) == 0:
                curr_run = 'run1'
            else:
                curr_run = 'run%i' % (max(exst_run_nums) + 1)
        run_dir = run_dir / curr_run
        if not run_dir.exists():
            os.makedirs(str(run_dir))

    setproctitle.setproctitle(str(all_args.algorithm_name) + "-" + str(
        all_args.env_name) + "-" + str(all_args.experiment_name) + "@" + str(all_args.user_name))

    # seed
    torch.manual_seed(all_args.seed)
    torch.cuda.manual_seed_all(all_args.seed)
    np.random.seed(all_args.seed)

    # env init
    envs = make_train_env(all_args)
    eval_envs = make_eval_env(all_args) if all_args.use_eval else None
    num_agents = all_args.num_agents

    config = {
        "all_args": all_args,
        "envs": envs,
        "eval_envs": eval_envs,
        "num_agents": num_agents,
        "device": device,
        "run_dir": run_dir
    }

    # run experiments
    if all_args.share_policy:
        from onpolicy.runner.shared.hanabi_runner_forward import HanabiRunner as Runner
    else:
        from onpolicy.runner.separated.hanabi_runner_forward import HanabiRunner as Runner

    runner = Runner(config)
    runner.run()

    # post process
    envs.close()
    if all_args.use_eval and eval_envs is not envs:
        eval_envs.close()

    if all_args.use_wandb:
        run.finish()
    else:
        runner.writter.export_scalars_to_json(str(runner.log_dir + '/summary.json'))
        runner.writter.close()
Exemplo n.º 2
0
def main(args):
    parser = get_config()
    all_args = parse_args(args, parser)

    if all_args.algorithm_name == "rmappo" or all_args.algorithm_name == "rmappg":
        assert (all_args.use_recurrent_policy
                or all_args.use_naive_recurrent_policy), (
                    "check recurrent policy!")
    elif all_args.algorithm_name == "mappo" or all_args.algorithm_name == "mappg":
        assert (all_args.use_recurrent_policy
                and all_args.use_naive_recurrent_policy) == False, (
                    "check recurrent policy!")
    else:
        raise NotImplementedError

    assert all_args.use_render, ("u need to set use_render be True")
    assert not (all_args.model_dir == None
                or all_args.model_dir == ""), ("set model_dir first")
    assert all_args.n_rollout_threads == 1, (
        "only support to use 1 env to render.")

    # cuda
    if all_args.cuda and torch.cuda.is_available():
        print("choose to use gpu...")
        device = torch.device("cuda:0")
        torch.set_num_threads(all_args.n_training_threads)
        if all_args.cuda_deterministic:
            torch.backends.cudnn.benchmark = False
            torch.backends.cudnn.deterministic = True
    else:
        print("choose to use cpu...")
        device = torch.device("cpu")
        torch.set_num_threads(all_args.n_training_threads)

    # run dir
    run_dir = Path(
        os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] +
        "/results"
    ) / all_args.env_name / all_args.scenario_name / all_args.algorithm_name / all_args.experiment_name
    if not run_dir.exists():
        os.makedirs(str(run_dir))

    if not run_dir.exists():
        curr_run = 'run1'
    else:
        exst_run_nums = [
            int(str(folder.name).split('run')[1])
            for folder in run_dir.iterdir()
            if str(folder.name).startswith('run')
        ]
        if len(exst_run_nums) == 0:
            curr_run = 'run1'
        else:
            curr_run = 'run%i' % (max(exst_run_nums) + 1)
    run_dir = run_dir / curr_run
    if not run_dir.exists():
        os.makedirs(str(run_dir))

    setproctitle.setproctitle(str(all_args.algorithm_name) + "-" + \
        str(all_args.env_name) + "-" + str(all_args.experiment_name) + "@" + str(all_args.user_name))

    # seed
    torch.manual_seed(all_args.seed)
    torch.cuda.manual_seed_all(all_args.seed)
    np.random.seed(all_args.seed)

    # env init
    envs = make_render_env(all_args)
    eval_envs = None
    num_agents = all_args.num_agents
    all_args.episode_length = envs.get_max_step()[0]

    config = {
        "all_args": all_args,
        "envs": envs,
        "eval_envs": eval_envs,
        "num_agents": num_agents,
        "device": device,
        "run_dir": run_dir
    }

    # run experiments
    if all_args.share_policy:
        from onpolicy.runner.shared.gridworld_runner import GridWorldRunner as Runner
    else:
        from onpolicy.runner.separated.gridworld_runner import GridWorldRunner as Runner

    runner = Runner(config)
    runner.render()

    # post process
    envs.close()
Exemplo n.º 3
0
def main(args):
    '''
    examine.py is used to display environments and run policies.

    For an example environment jsonnet, see
        mujoco-worldgen/examples/example_env_examine.jsonnet
    You can find saved policies and the in the 'examples' together with the environment they were
    trained in and the hyperparameters used. The naming used is 'examples/<env_name>.jsonnet' for
    the environment jsonnet file and 'examples/<env_name>.npz' for the policy weights file.
    Example uses:
        bin/examine.py hide_and_seek
        bin/examine.py mae_envs/envs/base.py
        bin/examine.py base n_boxes=6 n_ramps=2 n_agents=3
        bin/examine.py my_env_jsonnet.jsonnet
        bin/examine.py my_env_jsonnet.jsonnet my_policy.npz
        bin/examine.py hide_and_seek my_policy.npz n_hiders=3 n_seekers=2 n_boxes=8 n_ramps=1
        bin/examine.py examples/hide_and_seek_quadrant.jsonnet examples/hide_and_seek_quadrant.npz
    '''
    #names, kwargs = parse_arguments(argv)
    parser = get_config()
    args = parse_args(args, parser)
    kwargs={'args': args}

    env_name = args.env_name
    num_hiders = args.num_hiders
    num_seekers = args.num_seekers
    num_agents = num_hiders + num_seekers
    core_dir = abspath(join(dirname(__file__)))
    envs_dir = 'envs/hns/envs'  # where hide_and_seek.py is.
    xmls_dir = 'xmls'

    if args.use_render:  # run policies on the environment
        # importing PolicyViewer and load_policy here because they depend on several
        # packages which are only needed for playing policies, not for any of the
        # environments code.
        from onpolicy.envs.hns.viewer.policy_viewer import PolicyViewer_hs      
        from onpolicy.envs.hns.ma_policy.load_policy import load_policy
        env, args_remaining_env = load_env(env_name, core_dir=core_dir,
                                           envs_dir=envs_dir, xmls_dir=xmls_dir,
                                           return_args_remaining=True, **kwargs)
        
        if isinstance(env.action_space, Tuple):
            env = JoinMultiAgentActions(env)
        if env is None:
            raise Exception(f'Could not find environment based on pattern {env_name}')
        
        env.reset()  # generate action and observation spaces
        
        policies = []
        for agent_id in range(num_agents):
            if args.share_policy:
                actor_critic = torch.load(str(args.model_dir) + "/agent_model.pt")['model']
            else:
                actor_critic = torch.load(str(args.model_dir) + "/agent" + str(agent_id) + "_model.pt")['model']
            policies.append(actor_critic)

        args_remaining_policy = args_remaining_env
        
        if env is not None and policies is not None:
            args_to_pass, args_remaining_viewer = extract_matching_arguments(PolicyViewer_hs, kwargs)
            args_remaining = set(args_remaining_env)
            args_remaining = args_remaining.intersection(set(args_remaining_policy))
            args_remaining = args_remaining.intersection(set(args_remaining_viewer))
            assert len(args_remaining) == 0, (
                f"There left unused arguments: {args_remaining}. There shouldn't be any.")
            viewer = PolicyViewer_hs(env, policies, **args_to_pass)
            viewer.run()
    else:
        # examine the environment
        examine_env(env_name, kwargs,
                    core_dir=core_dir, envs_dir=envs_dir, xmls_dir=xmls_dir,
                    env_viewer=EnvViewer)
Exemplo n.º 4
0
        # [n_thread, balala*n]
        share_obs = share_obs.reshape(*share_obs.shape[:-2], -1)
        # [n_thread,n_agent,balala*n]
        share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents,
                                                        axis=1)
        return share_obs


if __name__ == "__main__":
    from onpolicy.config import get_config
    from onpolicy.envs.gfootball.gfootball_env import GoogleFootballEnv
    from onpolicy.envs.env_wrappers import SubprocVecEnv
    import gym
    gym.logger.set_level(gym.logger.ERROR)

    args = get_config().parse_known_args()[0]
    config = {
        'all_args':
        args,
        'envs':
        SubprocVecEnv([
            lambda: GoogleFootballEnv(num_of_left_agents=3,
                                      env_name='test_example_multiagent',
                                      representation="simple115v2",
                                      channel_dimensions=(48, 36))
            for i in range(args.n_rollout_threads)
        ]),
        'eval_envs':
        SubprocVecEnv([
            lambda: GoogleFootballEnv(num_of_left_agents=3,
                                      env_name='test_example_multiagent',
Exemplo n.º 5
0
def main(args):
    parser = get_config()
    all_args = parse_args(args, parser)

    if all_args.algorithm_name == "rmappo" or all_args.algorithm_name == "rmappg":
        assert (all_args.use_recurrent_policy
                or all_args.use_naive_recurrent_policy), (
                    "check recurrent policy!")
    elif all_args.algorithm_name == "mappo" or all_args.algorithm_name == "mappg":
        assert (all_args.use_recurrent_policy == False
                and all_args.use_naive_recurrent_policy
                == False), ("check recurrent policy!")
    else:
        raise NotImplementedError

    assert all_args.use_render, ("u need to set use_render be True")
    assert not (all_args.model_dir == None
                or all_args.model_dir == ""), ("set model_dir first")
    assert all_args.n_rollout_threads == 1, (
        "only support to use 1 env to render.")

    # cuda
    if all_args.cuda and torch.cuda.is_available():
        print("choose to use gpu...")
        device = torch.device("cuda:0")
        torch.set_num_threads(all_args.n_training_threads)
        if all_args.cuda_deterministic:
            torch.backends.cudnn.benchmark = False
            torch.backends.cudnn.deterministic = True
    else:
        print("choose to use cpu...")
        device = torch.device("cpu")
        torch.set_num_threads(all_args.n_training_threads)

    # run dir and video dir
    run_dir = Path(all_args.model_dir).parent.absolute()
    if all_args.save_videos and all_args.video_dir == "":
        video_dir = run_dir / "videos"
        all_args.video_dir = str(video_dir)

        if not video_dir.exists():
            os.makedirs(str(video_dir))

    setproctitle.setproctitle("-".join([
        all_args.env_name, all_args.scenario_name, all_args.algorithm_name,
        all_args.experiment_name
    ]) + "@" + all_args.user_name)

    # seed
    torch.manual_seed(all_args.seed)
    torch.cuda.manual_seed_all(all_args.seed)
    np.random.seed(all_args.seed)

    # env init
    envs = make_train_env(all_args)
    num_agents = all_args.num_agents

    config = {
        "all_args": all_args,
        "envs": envs,
        "eval_envs": None,
        "num_agents": num_agents,
        "device": device,
        "run_dir": run_dir
    }

    # run experiments
    if all_args.share_policy:
        from onpolicy.runner.shared.football_runner import FootballRunner as Runner
    else:
        from onpolicy.runner.separated.football_runner import FootballRunner as Runner

    runner = Runner(config)
    runner.render()

    # post process
    envs.close()