Exemplos de softmax_selection em Python, exemplos de play.nn_agent.softmax_selection em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: policy_gradient3.py Projeto: vipmath/hexse

    def play_deterministic_game(self, starting_intgamestate, thislogits, thisxnode, otherlogits, otherxnode, thisSess, otherSess):
        self.input_tensor.fill(0)
        black_groups = unionfind()
        white_groups = unionfind()
        turn = HexColor.BLACK
        intgamestate = []
        for imove in starting_intgamestate:
            black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups,
                                                            imove, turn, self.boardsize)
            turn = HexColor.EMPTY - turn
            intgamestate.append(imove)

        game_status = GameCheck.winner(black_groups, white_groups)
        empty_points = []
        for i in range(self.boardsize * self.boardsize):
            if i not in intgamestate:
                empty_points.append(i)
        aux_player_color=np.random.randint(HexColor.BLACK, HexColor.EMPTY)
        assert aux_player_color == 1 or aux_player_color == 2
        first_player=turn
        while game_status == HexColor.EMPTY:
            self.input_tensor.fill(0)
            self.input_tensor_builder.set_position_tensors_in_batch(self.input_tensor, 0, intgamestate)
            if aux_player_color != turn:
                logits_score = thisSess.run(thislogits, feed_dict={thisxnode: self.input_tensor})
            else:
                logits_score = otherSess.run(otherlogits, feed_dict={otherxnode: self.input_tensor})
            if turn == first_player:
                logits_score = np.squeeze(logits_score)
                best_action=-1
                largest_score=0
                for action in empty_points:
                    if best_action == -1:
                        largest_score = logits_score[action]
                        best_action = action
                    elif logits_score[action] > largest_score:
                        largest_score=logits_score[action]
                        best_action = action
                selected_int_move = best_action
            else:
                selected_int_move = softmax_selection(logits_score, empty_points)
            black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups,
                                                            selected_int_move, turn, self.boardsize)
            game_status = GameCheck.winner(black_groups, white_groups)
            intgamestate.append(selected_int_move)
            empty_points.remove(selected_int_move)
            turn = HexColor.EMPTY - turn

        reward = 0.25 + 1.0/len(intgamestate) if game_status == HexColor.BLACK else -1.0/len(intgamestate) - 0.25
        #print('played one game')
        return intgamestate, reward

Exemplo n.º 2

0

Exibir arquivo

    def playonegame(self, sess, logits, boardsize, x_input_node,
                    starting_intgamestate):
        self.input_tensor.fill(0)
        black_groups = unionfind()
        white_groups = unionfind()
        turn = HexColor.BLACK
        intgamestate = []
        for imove in starting_intgamestate:
            black_groups, white_groups = GameCheck.updateUF(
                intgamestate, black_groups, white_groups, imove, turn,
                boardsize)
            turn = HexColor.EMPTY - turn
            intgamestate.append(imove)

        whoplayedlastmove = HexColor.BLACK if len(
            intgamestate) % 2 == 1 else HexColor.WHITE
        game_status = GameCheck.winner(black_groups, white_groups)
        empty_points = []
        for i in range(boardsize * boardsize):
            if i not in intgamestate:
                empty_points.append(i)
        while game_status == HexColor.EMPTY:
            self.input_tensor_builder.set_position_tensors_in_batch(
                self.input_tensor, 0, intgamestate)
            logits_score = sess.run(
                logits, feed_dict={x_input_node: self.input_tensor})
            selected_int_move = softmax_selection(logits_score, empty_points)
            black_groups, white_groups = GameCheck.updateUF(
                intgamestate, black_groups, white_groups, selected_int_move,
                turn, boardsize)
            game_status = GameCheck.winner(black_groups, white_groups)
            intgamestate.append(selected_int_move)
            empty_points.remove(selected_int_move)
            turn = HexColor.EMPTY - turn

        reward = 1.0 if game_status == whoplayedlastmove else -1.0
        #print('played one game')
        return reward

Exemplo n.º 3

0

Exibir arquivo

    def playbatchgame(self,
                      sess,
                      logits,
                      boardsize,
                      batchsize,
                      x_input_node,
                      topk,
                      is_adversarial_pg=False):
        intmoveseqlist = []
        gameresultlist = []
        batch_cnt = 0
        while batch_cnt < batchsize:
            self.input_tensor.fill(0)
            black_groups = unionfind()
            white_groups = unionfind()
            turn = HexColor.BLACK
            intgamestate = []
            game_status = HexColor.EMPTY
            k = np.random.randint(1, 20)
            cnt = 0
            empty_points = []
            for i in range(boardsize * boardsize):
                if i not in intgamestate:
                    empty_points.append(i)
            while game_status == HexColor.EMPTY and cnt < k:
                self.input_tensor_builder.set_position_tensors_in_batch(
                    self.input_tensor, 0, intgamestate)
                logits_score = sess.run(
                    logits, feed_dict={x_input_node: self.input_tensor})

                selected_int_move = softmax_selection(logits_score,
                                                      empty_points,
                                                      temperature=5.0)
                black_groups, white_groups = GameCheck.updateUF(
                    intgamestate, black_groups, white_groups,
                    selected_int_move, turn, boardsize)
                game_status = GameCheck.winner(black_groups, white_groups)
                intgamestate.append(selected_int_move)
                empty_points.remove(selected_int_move)
                turn = HexColor.EMPTY - turn
                cnt += 1
            if game_status != HexColor.EMPTY:
                print('wasted!')
                continue
            intmoveseqlist.append(intgamestate)
            if is_adversarial_pg:
                self.input_tensor_builder.set_position_tensors_in_batch(
                    self.input_tensor, 0, intgamestate)
                logits_score = sess.run(
                    logits, feed_dict={x_input_node: self.input_tensor})
                logits_score = np.squeeze(logits_score)
                top_points = np.argpartition(-logits_score, kth=topk)[:topk]
                top_points = top_points.tolist()
                for i in top_points:
                    if i not in empty_points:
                        top_points.remove(i)
                if len(top_points) == 0:
                    top_points = np.random.choice(empty_points, topk)
                min_reward = 2.0
                for i in top_points:
                    intgamestate.append(i)
                    reward = self.playonegame(
                        sess,
                        logits,
                        boardsize,
                        x_input_node,
                        starting_intgamestate=intgamestate)
                    reward = -reward
                    min_reward = min(reward, min_reward)
                    intgamestate.remove(i)
                gameresultlist.append(min_reward)
            else:
                reward = self.playonegame(sess,
                                          logits,
                                          boardsize,
                                          x_input_node,
                                          starting_intgamestate=intgamestate)
                gameresultlist.append(reward)
            batch_cnt += 1
        return intmoveseqlist, gameresultlist