Exemplo n.º 1
0
    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board

    prefs = board_params

    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if apprentice_params["type"] == "net":
        if verbose:
            misc.print_and_flush(
                f"create_self_play ({num_task}): Creating model")
        net = GCN_risk(
            num_nodes, num_edges, model_args['board_input_dim'],
            model_args['global_input_dim'], model_args['hidden_global_dim'],
            model_args['num_global_layers'], model_args['hidden_conv_dim'],
            model_args['num_conv_layers'], model_args['hidden_pick_dim'],
            model_args['num_pick_layers'], model_args['out_pick_dim'],
            model_args['hidden_place_dim'], model_args['num_place_layers'],
            model_args['out_place_dim'], model_args['hidden_attack_dim'],
            model_args['num_attack_layers'], model_args['out_attack_dim'],
            model_args['hidden_fortify_dim'], model_args['num_fortify_layers'],
            model_args['out_fortify_dim'], model_args['hidden_value_dim'],
            model_args['num_value_layers'], model_args['dropout'],
            model_args['block'])

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
def create_self_play_script(input_file, move_type, verbose):
    # ---------------- Start -------------------------
    
    inputs = misc.read_json(input_file)    
    
    misc.print_and_flush("create_self_play: Start")
    start = time.process_time()
    
    saved_states_per_episode = inputs["saved_states_per_episode"]
    max_episode_depth = inputs["max_episode_depth"]
    apprentice_params = inputs["apprentice_params"]
    expert_params = inputs["expert_params"]
    

    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    model_args =  misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]
    
    move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"]
    # ---------------------------------------------------------------    

    # Create board
    world = World(path_board)


    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    
    prefs = board_params
            
    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if apprentice_params["type"] == "net":
        if verbose: misc.print_and_flush("create_self_play: Creating model")
        net = GCN_risk(num_nodes, num_edges, 
                         model_args['board_input_dim'], model_args['global_input_dim'],
                         model_args['hidden_global_dim'], model_args['num_global_layers'],
                         model_args['hidden_conv_dim'], model_args['num_conv_layers'],
                         model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'],
                         model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'],
                         model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'],
                         model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'],
                         model_args['hidden_value_dim'], model_args['num_value_layers'],
                         model_args['dropout'])

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')        
        net.to(device)
        
        model_name = apprentice_params["model_name"]
        if model_name: # If it is not the empty string
            try:
        
                if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}")
                state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1')                
                net.load_state_dict(state_dict['model'])        
                if verbose: misc.print_and_flush("create_self_play: Model has been loaded")
            except Exception as e:
                print(e)
                
                                    
        if verbose: misc.print_and_flush("create_self_play: Defining net apprentice")
        # Define initial apprentice        
        apprentice = agent.NetApprentice(net)
    else:
        if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice")
        apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"],
                                          temp = apprentice_params["temp"], 
                                          max_depth = apprentice_params["max_depth"],
                                          sims_per_eval = apprentice_params["sims_per_eval"])


    if verbose: misc.print_and_flush("create_self_play: Defining expert")
    # build expert
    expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"],
                    sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"],
                    wa = expert_params["wa"], wb = expert_params["wb"],
                    cb = expert_params["cb"], use_val = expert_params["use_val"])
    
                         
    if verbose: misc.print_and_flush("create_self_play: Creating data folders")
    # Create folders to store data
    for folder in move_types:
        os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True)
    os.makedirs(path_model, exist_ok = True)
                                    

    #### START
    start_inner = time.process_time()
    
    state = copy.deepcopy(board_orig)    

    # Play episode, select states to save
    if verbose: misc.print_and_flush("create_self_play: Self-play")
    
    states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose)

    if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}")
    
    
    # Tag the states and save them
    start_inner = time.process_time()
    if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)")  
    for st in states_to_save:
        st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose)
        _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose)
    if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}")
    
        
    misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")
def train_model_main(input_file, iteration, checkpoint, verbose):
    # ---------------- Start -------------------------
    misc.print_and_flush(f"train_model {iteration}: Start")
    start = time.process_time()
    
    inputs = misc.read_json(input_file)
        
    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    batch_size = inputs["batch_size"]
    model_args =  misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]    
    
    
    epochs = inputs["epochs"]
    eval_every = inputs["eval_every"]
     

    # ---------------- Load model -------------------------
    
    move_types = ['initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify']

    # Create Board
    world = World(path_board)


    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    # TODO: Send to inputs
    prefs = board_params
            
    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if verbose: misc.print_and_flush("Creating model")
    net = GCN_risk(num_nodes, num_edges, 
                     model_args['board_input_dim'], model_args['global_input_dim'],
                     model_args['hidden_global_dim'], model_args['num_global_layers'],
                     model_args['hidden_conv_dim'], model_args['num_conv_layers'],
                     model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'],
                     model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'],
                     model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'],
                     model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'],
                     model_args['hidden_value_dim'], model_args['num_value_layers'],
                     model_args['dropout'])

    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net.to(device)    
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    criterion = TPT_Loss
    

    #state_dict = model.load_dict(os.path.join(path_model, checkpoint), device = 'cpu', encoding = 'latin1')
    #net.load_state_dict(state_dict['model'])
    #optimizer.load_state_dict(state_dict['optimizer'])
    #scheduler.load_state_dict(state_dict['scheduler'])
    load_path = os.path.join(path_model, checkpoint) if checkpoint else None 
    # This is used only at the beginning. Then the model that is loaded is trained and saved at each time.
    # We avoid reloading the last saved model
    
        
        
    # Train network on dataset
    if verbose: misc.print_and_flush("Training network")
    shuffle(move_types)
    for j, move_type in enumerate(move_types):
        if verbose: misc.print_and_flush(f"\tTraining {j}:  {move_type}")
        save_path = f"{path_model}/model_{iteration}_{j}_{move_type}.tar"
        root_path = f'{path_data}/{move_type}'
        
        if len(os.listdir(os.path.join(root_path, 'raw')))<batch_size: continue
        
        risk_dataset = RiskDataset(root = root_path)
        # TODO: add validation data
        loader = G_DataLoader(risk_dataset, batch_size=batch_size, shuffle = True)
        if verbose: misc.print_and_flush(f"\tTrain on {root_path}, model = {save_path}")
        train_model(net, optimizer, scheduler, criterion, device,
                    epochs = epochs, train_loader = loader, val_loader = None, eval_every = eval_every,
                    load_path = load_path, save_path = save_path)
        
        load_path = None # The model is already in memory

    misc.print_and_flush(f"train_model: Total time taken -> {round(time.process_time() - start,2)}")
        
        # Create json file with inputs for the self play tasks
        input_dict = {
          "saved_states_per_episode": saved_states_per_episode,          
          "apprentice_params": apprentice_params,          
          "expert_params": expert_params,          
          "path_data": path_data,
          "path_model": path_model,
          "model_parameters": inputs["model_parameters"],
          "board_params": board_params,
          "max_episode_depth": inputs["max_episode_depth"]          
        }
        self_play_input_json = os.path.join(params_path, self_play_tag) + ".json"
        misc.write_json(input_dict, self_play_input_json)

        misc.print_and_flush(f"Running {num_iter} iterations, each of {num_cpu} tasks")
        for j in range(num_iter):
            # Each iteration launches num_cpu tasks
            misc.print_and_flush(f"\n\t*** Inner iter {j+1} of {num_iter}")
            move_type = next(types_cycle)
            create_self_play_script(self_play_input_json, move_type, verbose)
                        
        print(f"Time taken self-play: {round(time.process_time() - start,2)}")
    
        ##### 2. Train network on dataset
        start = time.process_time()
        print("Training network")
        
        # Create the input file
        input_dict = {
          "path_data": path_data,