示例#1
0
    def __init__(self, env_config, trainers=[], opponent="novice"):
        self.env_config = env_config
        self.trainers = trainers

        if opponent == "novice":
            self.rule_agent = YanivNoviceRuleAgent(
                single_step=env_config.get("single_step", True))
        elif opponent == "intermediate":
            self.rule_agent = YanivIntermediateRuleAgent(
                single_step=env_config.get("single_step", True))
        else:
            raise ValueError("opponent wrong {}".format(opponent))

        self.env = YanivEnv(env_config)

        self.players = []
        for i in range(self.env.num_players):
            if i < len(self.trainers):
                self.players.append(self.trainers[i])
            else:
                self.players.append(self.rule_agent)

        self.reset_stats()
示例#2
0
    parser.add_argument("--eval-int", type=int, default=5)
    parser.add_argument("--random-players", type=int, default=0)
    parser.add_argument("--restore", type=str, default="")
    parser.add_argument("--wandb-id", type=str, default=None)
    parser.add_argument("--name", type=str, default="")

    args = parser.parse_args()

    print("cuda: ")
    cuda_avail()

    ray.init(
        local_mode=True,
    )

    register_env("yaniv", lambda config: YanivEnv(config))
    ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel)

    env = YanivEnv(env_config)
    obs_space = env.observation_space
    act_space = env.action_space

    config = {
        "algorithm": "R2D2",
        "env": "yaniv",
        "env_config": env_config,
        "framework": "torch",
        "num_gpus": args.num_gpus,
        "num_workers": args.num_workers,
        "num_envs_per_worker": 1,
        "num_cpus_per_worker": 0.5,
示例#3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-iters", type=int, default=10)
    parser.add_argument("--train", action="store_true")
    parser.add_argument("--num-workers", type=int, default=2)
    parser.add_argument("--eval-num", type=int, default=1)
    parser.add_argument("--random-players", type=int, default=0)
    args = parser.parse_args()

    cuda_avail()
    ray.init(local_mode=True)
    print("post init")
    cuda_avail()

    register_env("yaniv", lambda config: YanivEnv(config))
    ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel)

    config = {
        "env": "yaniv",
        "env_config": env_config,
        "model": {
            "custom_model": "yaniv_mask",
        },
        "framework": "torch",
        "num_gpus": 1
    }

    stop = {"training_iteration": args.num_iters}

    if args.train:
示例#4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--eval-num", type=int, default=5)
    parser.add_argument("--eval-every", type=int, default=1)
    parser.add_argument("--num-workers", type=int, default=1)
    parser.add_argument("--cpus-per-worker", type=float, default=0.5)
    parser.add_argument("--cpus-for-driver", type=float, default=0.5)
    parser.add_argument("--address", type=str, default=None)
    parser.add_argument(
        "--model-path",
        type=str,
        default="/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models",
    )
    parser.add_argument("--opponent", type=str, default="intermediate")
    args = parser.parse_args()

    register_env("yaniv", lambda config: YanivEnv(config))
    ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel)

    if args.opponent == "intermediate":
        stepfn = intermediate_rule_step
    elif args.opponent == "novice":
        stepfn = novice_rule_step
    else:
        raise ValueError("opponent not defined: {}".format(args.opponent))

    env_config = {
        "end_after_n_deck_replacements": 0,
        "end_after_n_steps": 130,
        "early_end_reward": 0,
        "use_scaled_negative_reward": True,
        "use_scaled_positive_reward": True,
        "max_negative_reward": -1,
        "negative_score_cutoff": 30,
        "single_step": False,
        "step_reward": 0,
        "use_unkown_cards_in_state": False,
        "use_dead_cards_in_state": True,
        "observation_scheme": 1,
        "n_players": 2,
        "state_n_players": 2,
        "player_step_fn": {"player_1": stepfn},
    }

    env = YanivEnv(env_config)
    obs_space = env.observation_space
    act_space = env.action_space

    config = {
        "callbacks": YanivCallbacks,
        "num_gpus": 1,
        "env": "yaniv",
        "env_config": env_config,
        "framework": "torch",
        "multiagent": {
            "policies": {
                "policy_1": (None, obs_space, act_space, {}),
            },
            "policy_mapping_fn": policy_mapping_fn,
            "policies_to_train": ["policy_1"],
        },
        "model": {
            "custom_model": "yaniv_mask",
            "fcnet_hiddens": [512, 512],
        },
        "num_envs_per_worker": 1,
        "num_cpus_per_worker": args.cpus_per_worker,
        "num_cpus_for_driver": args.cpus_for_driver,
        "num_workers": 1,
        "evaluation_num_workers": args.num_workers,
        "evaluation_num_episodes": args.eval_num,
        "evaluation_interval": 1,
    }

    ray.init(include_dashboard=False, address=args.address)
    trainer = A3CTrainer(env="yaniv", config=config)

    # models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models"
    # models_path = "/scratch/student/models"
    models_path = args.model_path
    models = os.listdir(models_path)

    results = []

    for model in tqdm(sorted(models)):
        if not model.startswith("model"):
            print("idk", model)
            continue

        model_num = int(model[6:-4])

        if model_num % args.eval_every != 0:
            continue

        path = os.path.join(models_path, model)
        with open(path, "rb") as f:
            policy = pickle.load(f)

        trainer.get_policy("policy_1").set_state(policy)
        metrics = trainer._evaluate()
        metrics["evaluation"].pop("hist_stats")

        stats = {
            k: v
            for k, v in metrics["evaluation"]["custom_metrics"].items()
            if k.endswith("mean")
        }
        stats["model_number"] = model_num
        tqdm.write(
            "model: {: <6}: win_mean: {}, episodes: {}".format(
                model_num,
                stats["player_0_win_mean"],
                metrics["evaluation"]["episodes_this_iter"],
            )
        )
        results.append(stats)

    with open("{}_vs_models_{}.json".format(args.opponent, args.eval_num), "w") as f:
        json.dump(results, f, indent=4)
示例#5
0
class YanivTournament:
    def __init__(self, env_config, trainers=[], opponent="novice"):
        self.env_config = env_config
        self.trainers = trainers

        if opponent == "novice":
            self.rule_agent = YanivNoviceRuleAgent(
                single_step=env_config.get("single_step", True))
        elif opponent == "intermediate":
            self.rule_agent = YanivIntermediateRuleAgent(
                single_step=env_config.get("single_step", True))
        else:
            raise ValueError("opponent wrong {}".format(opponent))

        self.env = YanivEnv(env_config)

        self.players = []
        for i in range(self.env.num_players):
            if i < len(self.trainers):
                self.players.append(self.trainers[i])
            else:
                self.players.append(self.rule_agent)

        self.reset_stats()

    def run_episode(self, render=False):
        obs = self.env.reset()
        if render:
            self.env.game.render()

        done = {"__all__": False}

        states = [
            t.get_policy("policy_1").model.get_initial_state()
            for t in self.trainers
        ]

        steps = 0
        while not done["__all__"]:
            player = self.players[self.env.current_player]
            player_id = self.env.current_player_string

            if player in self.trainers:
                action, state, _ = player.compute_action(
                    obs[player_id],
                    policy_id="policy_1",
                    state=states[self.env.current_player],
                    full_fetch=True,
                )
                states[self.env.current_player] = state

                if self.env.game.round.discarding:
                    dec_action = self.env._decode_action(action)
                    if dec_action != utils.YANIV_ACTION:
                        self.player_stats[player_id]["discard_freqs"][str(
                            int(len(dec_action) / 2))] += 1
                else:
                    pickup_action = self.env._decode_action(action)
                    self.player_stats[player_id]["pickup_freqs"][
                        pickup_action] += 1

                obs, reward, done, info = self.env.step({player_id: action})
            else:
                state = self.env.game.get_state(self.env.current_player)
                extracted_state = {}
                extracted_state["raw_obs"] = state
                extracted_state["raw_legal_actions"] = [
                    a for a in state["legal_actions"]
                ]
                action = self.rule_agent.step(extracted_state)

                if self.env.game.round.discarding:
                    if action != utils.YANIV_ACTION:
                        self.player_stats[player_id]["discard_freqs"][str(
                            int(len(action) / 2))] += 1
                else:
                    self.player_stats[player_id]["pickup_freqs"][action] += 1

                obs, reward, done, info = self.env.step({player_id: action},
                                                        raw_action=True)

            steps += 1

            if render:
                self.env.game.render()

        self.game_stats["avg_roundlen"] += steps

        winner = self.env.game.round.winner
        if winner == -1:
            self.game_stats["avg_draws"] += 1
        else:
            winner_id = self.env._get_player_string(winner)
            self.player_stats[winner_id]["avg_wins"] += 1
            self.player_stats[winner_id]["winning_hands"].append(
                utils.get_hand_score(self.env.game.players[winner].hand))

        assaf = self.env.game.round.assaf
        if assaf is not None:
            self.player_stats[self.env._get_player_string(
                assaf)]["avg_assafs"] += 1

        s = self.env.game.round.scores
        if s is not None:
            for i in range(self.env.num_players):
                if s[i] > 0:
                    self.player_stats[self.env._get_player_string(
                        i)]["scores"].append(s[i])

        self.games_played += 1

    def reset_game(self):
        self.scores = [[] for _ in range(self.env.num_players)]

    def run_game(self):
        self.reset_game()
        self.run_episode()

    def run(self, eval_num):
        self.reset_stats()

        for _ in range(eval_num):
            self.run_episode()

        return self.get_average_stats()

    def reset_stats(self):
        self.games_played = 0

        self.game_stats = {
            "avg_roundlen": 0,
            "avg_draws": 0,
        }

        self.player_stats = {
            player_id: {
                "avg_wins": 0,
                "avg_assafs": 0,
                "scores": [],
                "winning_hands": [],
                "discard_freqs": {
                    "1": 0,
                    "2": 0,
                    "3": 0,
                    "4": 0,
                    "5": 0,
                },
                "pickup_freqs": {a: 0
                                 for a in utils.pickup_actions},
            }
            for player_id in self.env._get_players()
        }

    def get_average_stats(self):
        stats = {
            "game": deepcopy(self.game_stats),
            "player": deepcopy(self.player_stats),
        }

        for key in stats["game"].keys():
            if key.startswith("avg"):
                stats["game"][key] /= self.games_played

        for player_stats in stats["player"].values():
            for key in player_stats:
                if key.startswith("avg"):
                    player_stats[key] /= self.games_played

            player_stats["avg_losing_score"] = (np.mean(
                player_stats["scores"]) if len(player_stats["scores"]) > 0 else
                                                0)
            player_stats.pop("scores")

            player_stats["avg_winning_hand"] = (
                np.mean(player_stats["winning_hands"])
                if len(player_stats["winning_hands"]) > 0 else 0)
            player_stats.pop("winning_hands")

        return stats

    def print_stats(self):
        avg_stats = self.get_average_stats()
        cleaned = json.dumps(avg_stats)

        print(yaml.safe_dump(json.loads(cleaned), default_flow_style=False))