示例#1
0
    def play(self, state):
        canon, map_to_orig = state.toCanonical(state.activePlayer.code)
        batch = torch_geometric.data.Batch.from_data_list([boardToData(canon)])
        mask, moves = maskAndMoves(canon, canon.gamePhase, batch.edge_index)

        if not self.apprentice is None:
            _, _, _, players, misc = canon.toDicts()
            global_x = buildGlobalFeature(players, misc).unsqueeze(0)
            pick, place, attack, fortify, value = self.apprentice.forward(
                batch, global_x)
            if canon.gamePhase == 'initialPick':
                policy = pick
            elif canon.gamePhase in ['initialFortify', 'startTurn']:
                policy = place
            elif canon.gamePhase == 'attack':
                policy = attack
            elif canon.gamePhase == 'fortify':
                policy = fortify
        else:
            policy = torch.ones_like(mask) / max(mask.shape)
        policy = policy * mask
        value = value.squeeze()
        cor_value = torch.FloatTensor([
            value[map_to_orig.get(i)]
            if not map_to_orig.get(i) is None else 0.0 for i in range(6)
        ])
        return policy, cor_value
示例#2
0
def play_episode(root, max_depth, apprentice, move_type="all", verbose=False):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive:
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = agent.maskAndMoves(state, state.gamePhase, edge_index)

        try:
            policy, value = apprentice.getPolicy(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e

        if isinstance(mask, torch.Tensor):
            mask = mask.detach().numpy()

        probs = policy * mask

        probs = probs.flatten()

        probs = probs / probs.sum()

        # Random selection? e-greedy?

        ind = np.random.choice(range(len(actions)), p=probs)
        move = agent.buildMove(state, actions[ind])

        saved = (move_type == "all" or move_type == state.gamePhase)
        if verbose:
            # print(f"\t\tPlay episode: turn {i}, move = {move}, saved = {saved}")
            pass

        if saved:
            episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)

    return episode
示例#3
0
def create_self_play_data(move_type, path, root, apprentice, max_depth = 100, saved_states_per_episode=1, verbose = False):
    """ Function to create episodes from self play.
        Visited states are saved and then re visited with the expert to label the data
        
        To do this in parallel, we use the multiprocessing library. The idea is to feed a queue of states
        Then use this queue to, in parallel, tag each state with the expert move
    """
    
    """
    samples_type = {'initialPick':0, 'initialFortify':0, 'startTurn':0, 'attack':0, 'fortify':0}
    
    # Get information about existing files, to continue enlarging the dataset
    for k, v in samples_type.items():
        path_aux = os.path.join(path, k, 'raw')
            
        val = max(list(map(int,
                        filter(isint,
                                    [n[(n.find("_")+1):n.find(".")] 
                                        for n in  os.listdir(path_aux) if 'board' in n]
                                )
                        )
                        ) + [0])
        samples_type[k] = val

    """
    
    edge_index = boardToData(root).edge_index    

    # ******************* PLAY EPISODE ***************************
    episode = play_episode(root, max_depth, apprentice)
        
    # ******************* SELECT STATES ***************************
    # Take some states from episode    
    try:
        # Define here how many states to select, and how
        options = [s for s in episode if s.gamePhase == move_type]
        if not options:
            # TODO: What to do in this case? For now just take some random states to avoid wasting the episode
            options = episode
        states_to_save = np.random.choice(options, min(saved_states_per_episode, len(options)))
    except Exception as e:
        raise e
        
    return states_to_save
示例#4
0
def play_episode(root, max_depth, apprentice):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):  
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over            
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive: 
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = maskAndMoves(state, state.gamePhase, edge_index)
        try:
            policy, value = apprentice.play(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e
        policy = policy * mask
        probs = policy.squeeze().detach().numpy()
        probs =  probs / probs.sum()

        # Random selection? e-greedy?
        ind = np.random.choice(range(len(actions)), p = probs)
        move = buildMove(state, actions[ind])
        
        episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)
        
    return episode
示例#5
0
 def playMove(self, board, temp=1, num_sims=None, use_val = False):
   """ This function will be used in every type of move
       Call the MCTS, get the action probabilities, take the argmax or use any other criterion
   """
   edge_index = boardToData(board).edge_index
   mask, actions = maskAndMoves(board, board.gamePhase, edge_index)
   
   # Do the MCTS
   policy, value, _ = self.MCTS.getActionProb(board, temp=temp, num_sims = num_sims, use_val = use_val)
   
   policy = policy * mask.squeeze().detach().numpy()
   probs = policy / policy.sum()
   
   
   # Use some criterion to choose the move
   z = np.random.uniform()
   if self.move_selection == "argmax" or (self.move_selection == "e_greedy" and z < self.eps_greedy):
     ind = np.argmax(probs)
   elif self.move_selection == "random_proportional" or (self.move_selection == "e_greedy" and z >= self.eps_greedy):
     ind = np.random.choice(range(len(actions)), p = probs)
   
   # Return the selected move
   return buildMove(board, actions[ind])
示例#6
0
    def search(self, state, depth, use_val=False):
        # print("\n\n-------- SEARCH --------")
        # print(f"depth: {depth}")
        # state.report()

        # Is terminal? return vector of score per player
        if isTerminal(state) or depth > self.max_depth:
            # print("\n\n-------- TERMINAL --------")
            return score_players(state), score_players(state)

        # Active player is dead, then end turn
        while not state.activePlayer.is_alive:
            state.endTurn()
            if state.gameOver:
                return score_players(state), score_players(state)

        s = hash(state)
        # Is leaf?
        if not s in self.Ps:
            canon, map_to_orig = state.toCanonical(state.activePlayer.code)
            batch = torch_geometric.data.Batch.from_data_list(
                [boardToData(canon)])
            mask, moves = maskAndMoves(canon, canon.gamePhase,
                                       batch.edge_index)

            if not self.apprentice is None:
                policy, value = self.apprentice.play(canon)
            else:
                # No bias, just uniform sampling for the moment
                policy, value = torch.ones_like(mask) / max(
                    mask.shape), torch.zeros((1, 6))

            policy = policy * mask
            self.Vs[s], self.As[s] = mask.squeeze(), moves
            self.Ps[s] = policy.squeeze()
            self.Ns[s] = 1

            # Return an evaluation
            v = np.zeros(6)
            for _ in range(self.sims_per_eval):
                sim = copy.deepcopy(state)
                sim.simulate(agent.RandomAgent())
                v += score_players(sim)
            v /= self.sims_per_eval

            # Fix order of value returned by net
            value = value.squeeze()
            # Apprentice already does this
            # cor_value = torch.FloatTensor([value[map_to_orig.get(i)] if not map_to_orig.get(i) is None else 0.0  for i in range(6)])
            cor_value = value
            return v, cor_value

        # Not a leaf, keep going down. Use values for the current player
        p = state.activePlayer.code
        action = -1
        bestScore = -float('inf')
        # print("Valid:")
        # print(self.Vs[s])
        for i, act in enumerate(self.As[s]):
            a = hash(act)
            # print(i, act)
            if self.Vs[s][i] > 0.0:
                if (s, a) in self.Rsa:
                    # PUCT formula
                    uct = self.Rsa[(s, a)][p] + self.cb * np.sqrt(
                        np.log(self.Ns[s]) / max(self.Nsa[(s, a)], self.eps))
                    val = self.wb * self.Qsa[(s, a)] * (use_val)
                    pol = self.wa * self.Ps[s][i] / (self.Nsa[(s, a)] + 1)
                    sc = uct + pol + val[p]
                else:
                    # Unseen action, take it
                    action = act
                    break
                if sc > bestScore:
                    bestScore = sc
                    action = act

        if isinstance(action, int) and action == -1:
            print("**** No move?? *****")
            state.report()
            print(self.As[s])
            print(self.Vs[s])

        # print('best: ', action)
        a = hash(action)  # Best action in simplified way
        move = buildMove(state, action)
        # Play action, continue search
        # TODO: For now, armies are placed on one country only to simplify the game
        # print(move)
        state.playMove(move)
        v, net_v = self.search(state, depth + 1, use_val)
        if isinstance(net_v, torch.Tensor):
            net_v = net_v.detach().numpy()
        if isinstance(v, torch.Tensor):
            v = v.detach().numpy()

        if (s, a) in self.Rsa:
            rsa, qsa, nsa = self.Rsa[(s, a)], self.Qsa[(s, a)], self.Nsa[(s,
                                                                          a)]
            self.Rsa[(s, a)] = (nsa * rsa + v) / (nsa + 1)
            self.Qsa[(s, a)] = (nsa * qsa + net_v) / (nsa + 1)
            self.Nsa[(s, a)] += 1

        else:
            self.Rsa[(s, a)] = v
            self.Qsa[(s, a)] = net_v
            self.Nsa[(s, a)] = 1

        self.Ns[s] += 1

        return v, net_v
def create_self_play_data(path,
                          root,
                          num_samples,
                          start_sample,
                          apprentice,
                          expert,
                          max_depth=100,
                          saved_states_per_episode=1,
                          verbose=False):
    """ Function to create episodes from self play.
        Visited states are saved and then re visited with the expert to label the data

    """
    samples = 0

    samples_type = {
        'initialPick': 0,
        'initialFortify': 0,
        'startTurn': 0,
        'attack': 0,
        'fortify': 0
    }
    for k, v in samples_type.items():
        path_aux = os.path.join(path, k, 'raw')

        val = max(
            list(
                map(
                    int,
                    filter(isint, [
                        n[(n.find("_") + 1):n.find(".")]
                        for n in os.listdir(path_aux) if 'board' in n
                    ]))) + [0])
        samples_type[k] = val

    move_to_save = itertools.cycle(list(samples_type.keys()))
    edge_index = boardToData(root).edge_index
    while samples < num_samples:

        # ******************* PLAY EPISODE ***************************
        episode = []
        state = copy.deepcopy(root)
        for i in range(max_depth):
            print_message_over(f"Playing episode: {i}/{max_depth}")

            # Check if episode is over
            if state.gameOver: break

            # Check is current player is alive or not
            if not state.activePlayer.is_alive:
                # print("\npassing, dead player")
                state.endTurn()
                continue

            # Get possible moves, and apprentice policy
            mask, actions = maskAndMoves(state, state.gamePhase, edge_index)
            try:
                policy, value = apprentice.play(state)
            except Exception as e:
                state.report()
                print(state.activePlayer.is_alive)
                print(state.activePlayer.num_countries)
                raise e
            policy = policy * mask
            probs = policy.squeeze().detach().numpy()
            probs = probs / probs.sum()

            ind = np.random.choice(range(len(actions)), p=probs)
            move = buildMove(state, actions[ind])

            episode.append(copy.deepcopy(state))

            # Play the move to continue
            state.playMove(move)

        # ******************* SAVE STATES ***************************
        # Take some states from episode
        # Choose which kind of move we are going to save

        to_save = next(move_to_save)

        try:
            # Define here how many states to select, and how
            options = [s for s in episode if s.gamePhase == to_save]
            init_to_save = to_save
            while not options:
                to_save = next(move_to_save)
                if to_save == init_to_save:
                    raise Exception(
                        "Episode is empty? No dataset could be created for any game phase"
                    )
                options = [s for s in episode if s.gamePhase == to_save]
            states_to_save = np.random.choice(
                options, min(saved_states_per_episode, len(options)))
        except Exception as e:
            raise e

        # Get expert move for the chosen states
        for i, state in enumerate(states_to_save):
            print_message_over(
                f"Saving states: Saved {i}/{len(states_to_save)}... Total: {samples}/{num_samples}"
            )
            policy_exp, value_exp, _ = expert.getActionProb(state,
                                                            temp=1,
                                                            num_sims=None,
                                                            use_val=False)
            # Save the board, value and target
            board, _ = state.toCanonical(state.activePlayer.code)
            phase = board.gamePhase
            if isinstance(policy_exp, torch.Tensor):
                policy_exp = policy_exp.detach().numpy()
            if isinstance(value_exp, torch.Tensor):
                value_exp = value_exp.detach().numpy()

            saveBoardObs(path + '/' + phase + '/raw',
                         'board_{}.json'.format(samples_type[phase]), board,
                         board.gamePhase, policy_exp.tolist(),
                         value_exp.tolist())
            samples += 1
            samples_type[phase] += 1
            print_message_over(
                f"Saving states: Saved {i+1}/{len(states_to_save)}... Total: {samples}/{num_samples}"
            )

    print_message_over("Done!")
    print()
示例#8
0


#%%%
board.play() 
while not board.gameOver and board.gamePhase != "attack":
  board.play()
  
  
board.report()
print(board.countriesPandas())
print("\n")

# Get policy for board
canon, _ = board.toCanonical(board.activePlayer.code)
batch = torch_geometric.data.Batch.from_data_list([boardToData(canon)])
mask, moves = agent.maskAndMoves(canon, canon.gamePhase, batch.edge_index)
policy, value = apprentice.getPolicy(canon)
pop = policy.squeeze()

T = 1
exp = np.exp(np.log(np.maximum(pop, 0.000001))/T)
soft = exp/exp.sum()

co = board.countries()
for m, a, p, s in zip(mask.squeeze(), moves, pop, soft):
    if m.item():
        if len(a) > 2:
            print(
                f"{a[0]}: {co[a[1]]['id']} -> {co[a[2]]['id']} - {p:.3f} - {s:.3f}")
        else: