def load_NetPlayer(board, args): num_nodes = board.world.map_graph.number_of_nodes() num_edges = board.world.map_graph.number_of_edges() model_args = misc.read_json(args["model_parameters_json"]) net = GCN_risk( num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) net.eval() state_dict = load_dict(args["model_path"], device='cpu', encoding='latin1') net.load_state_dict(state_dict['model']) apprentice = agent.NetApprentice(net) kwargs = {} for a in ["move_selection", "name", "temp"]: if a in args: kwargs[a] = args[a] netPlayer = agent.NetPlayer(apprentice, **kwargs) return netPlayer
op_armies = [] model_cont = [] for i, model_name in enumerate(models_sorted): a = re.search(f"[a-z]+_[0-9]+_{match_number}", model_name) a = 1 if a is None: continue print(f"Chosen model is {model_name}") state_dict = load_dict(os.path.join(path_model, model_name), device='cpu', encoding='latin1') net.load_state_dict(state_dict['model']) net.eval() for k in range(num_matchs): if (k + 1) % 10 == 0: print(f'Match {k+1}') world = World(path_board) apprentice = agent.NetApprentice(net) netPlayer = agent.NetPlayer(apprentice, move_selection="random_proportional", temp=0.5) # Play against random pRandom = RandomAgent('Random') battle_board = Board(world, [netPlayer, pRandom]) battle_board.setPreferences(prefs) for j in range(max_turns): battle_board.play() if battle_board.gameOver: break w = 0 if battle_board.players[netPlayer.code].is_alive: if not battle_board.players[pRandom.code].is_alive: w = 1
def create_self_play_script(input_file, move_type, verbose): # ---------------- Start ------------------------- inputs = misc.read_json(input_file) misc.print_and_flush("create_self_play: Start") start = time.process_time() saved_states_per_episode = inputs["saved_states_per_episode"] max_episode_depth = inputs["max_episode_depth"] apprentice_params = inputs["apprentice_params"] expert_params = inputs["expert_params"] path_data = inputs["path_data"] path_model = inputs["path_model"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"] # --------------------------------------------------------------- # Create board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if apprentice_params["type"] == "net": if verbose: misc.print_and_flush("create_self_play: Creating model") net = GCN_risk(num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) model_name = apprentice_params["model_name"] if model_name: # If it is not the empty string try: if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}") state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1') net.load_state_dict(state_dict['model']) if verbose: misc.print_and_flush("create_self_play: Model has been loaded") except Exception as e: print(e) if verbose: misc.print_and_flush("create_self_play: Defining net apprentice") # Define initial apprentice apprentice = agent.NetApprentice(net) else: if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice") apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"], temp = apprentice_params["temp"], max_depth = apprentice_params["max_depth"], sims_per_eval = apprentice_params["sims_per_eval"]) if verbose: misc.print_and_flush("create_self_play: Defining expert") # build expert expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"], sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"], wa = expert_params["wa"], wb = expert_params["wb"], cb = expert_params["cb"], use_val = expert_params["use_val"]) if verbose: misc.print_and_flush("create_self_play: Creating data folders") # Create folders to store data for folder in move_types: os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True) os.makedirs(path_model, exist_ok = True) #### START start_inner = time.process_time() state = copy.deepcopy(board_orig) # Play episode, select states to save if verbose: misc.print_and_flush("create_self_play: Self-play") states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose) if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}") # Tag the states and save them start_inner = time.process_time() if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)") for st in states_to_save: st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose) _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose) if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}") misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) criterion = TPT_Loss move_types = [ 'initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify' ] types_cycle = itertools.cycle(move_types) print("Defining apprentice") # Define initial apprentice apprentice = agent.MctsApprentice( num_MCTS_sims=initial_apprentice_mcts_sims, temp=1, max_depth=max_depth) apprentice = agent.NetApprentice( net) # Test the net apprentice, it is way faster # CAMBIAR print("Defining expert") # build expert expert = build_expert_mcts( None) # Start with only MCTS with no inner apprentice expert = build_expert_mcts( agent.NetApprentice(net)) # Test the network # CAMBIAR expert.num_MCTS_sims = expert_mcts_sims print("Creating data folders") # Create folders to store data for folder in move_types: os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok=True)