def __init__(self, env_config, trainers=[], opponent="novice"): self.env_config = env_config self.trainers = trainers if opponent == "novice": self.rule_agent = YanivNoviceRuleAgent( single_step=env_config.get("single_step", True)) elif opponent == "intermediate": self.rule_agent = YanivIntermediateRuleAgent( single_step=env_config.get("single_step", True)) else: raise ValueError("opponent wrong {}".format(opponent)) self.env = YanivEnv(env_config) self.players = [] for i in range(self.env.num_players): if i < len(self.trainers): self.players.append(self.trainers[i]) else: self.players.append(self.rule_agent) self.reset_stats()
parser.add_argument("--eval-int", type=int, default=5) parser.add_argument("--random-players", type=int, default=0) parser.add_argument("--restore", type=str, default="") parser.add_argument("--wandb-id", type=str, default=None) parser.add_argument("--name", type=str, default="") args = parser.parse_args() print("cuda: ") cuda_avail() ray.init( local_mode=True, ) register_env("yaniv", lambda config: YanivEnv(config)) ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel) env = YanivEnv(env_config) obs_space = env.observation_space act_space = env.action_space config = { "algorithm": "R2D2", "env": "yaniv", "env_config": env_config, "framework": "torch", "num_gpus": args.num_gpus, "num_workers": args.num_workers, "num_envs_per_worker": 1, "num_cpus_per_worker": 0.5,
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=10) parser.add_argument("--train", action="store_true") parser.add_argument("--num-workers", type=int, default=2) parser.add_argument("--eval-num", type=int, default=1) parser.add_argument("--random-players", type=int, default=0) args = parser.parse_args() cuda_avail() ray.init(local_mode=True) print("post init") cuda_avail() register_env("yaniv", lambda config: YanivEnv(config)) ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel) config = { "env": "yaniv", "env_config": env_config, "model": { "custom_model": "yaniv_mask", }, "framework": "torch", "num_gpus": 1 } stop = {"training_iteration": args.num_iters} if args.train:
def main(): parser = argparse.ArgumentParser() parser.add_argument("--eval-num", type=int, default=5) parser.add_argument("--eval-every", type=int, default=1) parser.add_argument("--num-workers", type=int, default=1) parser.add_argument("--cpus-per-worker", type=float, default=0.5) parser.add_argument("--cpus-for-driver", type=float, default=0.5) parser.add_argument("--address", type=str, default=None) parser.add_argument( "--model-path", type=str, default="/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models", ) parser.add_argument("--opponent", type=str, default="intermediate") args = parser.parse_args() register_env("yaniv", lambda config: YanivEnv(config)) ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel) if args.opponent == "intermediate": stepfn = intermediate_rule_step elif args.opponent == "novice": stepfn = novice_rule_step else: raise ValueError("opponent not defined: {}".format(args.opponent)) env_config = { "end_after_n_deck_replacements": 0, "end_after_n_steps": 130, "early_end_reward": 0, "use_scaled_negative_reward": True, "use_scaled_positive_reward": True, "max_negative_reward": -1, "negative_score_cutoff": 30, "single_step": False, "step_reward": 0, "use_unkown_cards_in_state": False, "use_dead_cards_in_state": True, "observation_scheme": 1, "n_players": 2, "state_n_players": 2, "player_step_fn": {"player_1": stepfn}, } env = YanivEnv(env_config) obs_space = env.observation_space act_space = env.action_space config = { "callbacks": YanivCallbacks, "num_gpus": 1, "env": "yaniv", "env_config": env_config, "framework": "torch", "multiagent": { "policies": { "policy_1": (None, obs_space, act_space, {}), }, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": ["policy_1"], }, "model": { "custom_model": "yaniv_mask", "fcnet_hiddens": [512, 512], }, "num_envs_per_worker": 1, "num_cpus_per_worker": args.cpus_per_worker, "num_cpus_for_driver": args.cpus_for_driver, "num_workers": 1, "evaluation_num_workers": args.num_workers, "evaluation_num_episodes": args.eval_num, "evaluation_interval": 1, } ray.init(include_dashboard=False, address=args.address) trainer = A3CTrainer(env="yaniv", config=config) # models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models" # models_path = "/scratch/student/models" models_path = args.model_path models = os.listdir(models_path) results = [] for model in tqdm(sorted(models)): if not model.startswith("model"): print("idk", model) continue model_num = int(model[6:-4]) if model_num % args.eval_every != 0: continue path = os.path.join(models_path, model) with open(path, "rb") as f: policy = pickle.load(f) trainer.get_policy("policy_1").set_state(policy) metrics = trainer._evaluate() metrics["evaluation"].pop("hist_stats") stats = { k: v for k, v in metrics["evaluation"]["custom_metrics"].items() if k.endswith("mean") } stats["model_number"] = model_num tqdm.write( "model: {: <6}: win_mean: {}, episodes: {}".format( model_num, stats["player_0_win_mean"], metrics["evaluation"]["episodes_this_iter"], ) ) results.append(stats) with open("{}_vs_models_{}.json".format(args.opponent, args.eval_num), "w") as f: json.dump(results, f, indent=4)
class YanivTournament: def __init__(self, env_config, trainers=[], opponent="novice"): self.env_config = env_config self.trainers = trainers if opponent == "novice": self.rule_agent = YanivNoviceRuleAgent( single_step=env_config.get("single_step", True)) elif opponent == "intermediate": self.rule_agent = YanivIntermediateRuleAgent( single_step=env_config.get("single_step", True)) else: raise ValueError("opponent wrong {}".format(opponent)) self.env = YanivEnv(env_config) self.players = [] for i in range(self.env.num_players): if i < len(self.trainers): self.players.append(self.trainers[i]) else: self.players.append(self.rule_agent) self.reset_stats() def run_episode(self, render=False): obs = self.env.reset() if render: self.env.game.render() done = {"__all__": False} states = [ t.get_policy("policy_1").model.get_initial_state() for t in self.trainers ] steps = 0 while not done["__all__"]: player = self.players[self.env.current_player] player_id = self.env.current_player_string if player in self.trainers: action, state, _ = player.compute_action( obs[player_id], policy_id="policy_1", state=states[self.env.current_player], full_fetch=True, ) states[self.env.current_player] = state if self.env.game.round.discarding: dec_action = self.env._decode_action(action) if dec_action != utils.YANIV_ACTION: self.player_stats[player_id]["discard_freqs"][str( int(len(dec_action) / 2))] += 1 else: pickup_action = self.env._decode_action(action) self.player_stats[player_id]["pickup_freqs"][ pickup_action] += 1 obs, reward, done, info = self.env.step({player_id: action}) else: state = self.env.game.get_state(self.env.current_player) extracted_state = {} extracted_state["raw_obs"] = state extracted_state["raw_legal_actions"] = [ a for a in state["legal_actions"] ] action = self.rule_agent.step(extracted_state) if self.env.game.round.discarding: if action != utils.YANIV_ACTION: self.player_stats[player_id]["discard_freqs"][str( int(len(action) / 2))] += 1 else: self.player_stats[player_id]["pickup_freqs"][action] += 1 obs, reward, done, info = self.env.step({player_id: action}, raw_action=True) steps += 1 if render: self.env.game.render() self.game_stats["avg_roundlen"] += steps winner = self.env.game.round.winner if winner == -1: self.game_stats["avg_draws"] += 1 else: winner_id = self.env._get_player_string(winner) self.player_stats[winner_id]["avg_wins"] += 1 self.player_stats[winner_id]["winning_hands"].append( utils.get_hand_score(self.env.game.players[winner].hand)) assaf = self.env.game.round.assaf if assaf is not None: self.player_stats[self.env._get_player_string( assaf)]["avg_assafs"] += 1 s = self.env.game.round.scores if s is not None: for i in range(self.env.num_players): if s[i] > 0: self.player_stats[self.env._get_player_string( i)]["scores"].append(s[i]) self.games_played += 1 def reset_game(self): self.scores = [[] for _ in range(self.env.num_players)] def run_game(self): self.reset_game() self.run_episode() def run(self, eval_num): self.reset_stats() for _ in range(eval_num): self.run_episode() return self.get_average_stats() def reset_stats(self): self.games_played = 0 self.game_stats = { "avg_roundlen": 0, "avg_draws": 0, } self.player_stats = { player_id: { "avg_wins": 0, "avg_assafs": 0, "scores": [], "winning_hands": [], "discard_freqs": { "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, }, "pickup_freqs": {a: 0 for a in utils.pickup_actions}, } for player_id in self.env._get_players() } def get_average_stats(self): stats = { "game": deepcopy(self.game_stats), "player": deepcopy(self.player_stats), } for key in stats["game"].keys(): if key.startswith("avg"): stats["game"][key] /= self.games_played for player_stats in stats["player"].values(): for key in player_stats: if key.startswith("avg"): player_stats[key] /= self.games_played player_stats["avg_losing_score"] = (np.mean( player_stats["scores"]) if len(player_stats["scores"]) > 0 else 0) player_stats.pop("scores") player_stats["avg_winning_hand"] = ( np.mean(player_stats["winning_hands"]) if len(player_stats["winning_hands"]) > 0 else 0) player_stats.pop("winning_hands") return stats def print_stats(self): avg_stats = self.get_average_stats() cleaned = json.dumps(avg_stats) print(yaml.safe_dump(json.loads(cleaned), default_flow_style=False))