Пример #1
0
def predict(go_env, info_env, level, player, enemy = False):
    
    go_env_pred = copy(go_env)
    info = info_env 

    strategy = choose_strategy()
    if(strategy == 'P'):
        #print("{ Estrategia: pasar turno :c }")
        return 49

    invalid_moves = get_invalidMoves(info["invalid_moves"])
    #playsinthefuture = np.count_nonzero(info["invalid_moves"] == 0) - 1

    nextPlays = seeInFurture(go_env_pred, invalid_moves, level, player, strategy)
    if not enemy:
        print(nextPlays.flatten())
        print("El jugador "+player+" ha usado la estrategia "+strategy)
    
    black_area, white_area = gogame.areas(go_env_pred.state_)
    
    if player == "white":
        if nextPlays.size != 0 and nextPlays[0, 1] > 0: # or nextPlays[0, 2] < black_area:
            
            play = random.randrange(len(nextPlays)) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar
            return int(nextPlays[play, 0])
        else:
            return 49
    else:
        if nextPlays.size != 0 and nextPlays[0, 1] > 0: # or nextPlays[0, 2] < black_area:
            
            play = random.randrange(len(nextPlays)) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar
            return int(nextPlays[play, 0])
        else:
            return 49
Пример #2
0
def draw_info(batch, window_width, window_height, upper_grid_coord, state):
    turn = gogame.turn(state)
    turn_str = 'B' if turn == govars.BLACK else 'W'
    prev_player_passed = gogame.prev_player_passed(state)
    game_ended = gogame.game_ended(state)
    info_label = "Turn: {}\nPassed: {}\nGame: {}".format(
        turn_str, prev_player_passed, "OVER" if game_ended else "ONGOING")

    pyglet.text.Label(info_label,
                      font_name='Helvetica',
                      font_size=11,
                      x=window_width - 20,
                      y=window_height - 20,
                      anchor_x='right',
                      anchor_y='top',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width / 2,
                      align='right',
                      multiline=True)

    # Areas
    black_area, white_area = gogame.areas(state)
    pyglet.text.Label("{}B | {}W".format(black_area, white_area),
                      font_name='Helvetica',
                      font_size=16,
                      x=window_width / 2,
                      y=upper_grid_coord + 80,
                      anchor_x='center',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width,
                      align='center')
Пример #3
0
def predict(go_env,
            info_env,
            level,
            player,
            n_plays,
            enemy=False,
            smart=False):

    go_env_pred = copy(go_env)
    info = info_env

    strategy = choose_strategy()
    if (strategy == 'P'):
        return 49  # Pasar de turno

    invalid_moves = get_invalidMoves(info["invalid_moves"])

    nextPlays = seeInFurture(go_env_pred, invalid_moves, level, player,
                             strategy, n_plays, smart)
    if not enemy and not smart:
        #Descomentar para obtener mas detalle de los movimientos y estrategias
        #print(nextPlays.flatten())
        #print("El jugador "+player+" ha usado la estrategia "+strategy)
        pass

    black_area, white_area = gogame.areas(go_env_pred.state_)

    if player == "white":
        if nextPlays.size != 0 and nextPlays[
                0, 1] > 0:  # or nextPlays[0, 2] < black_area:

            play = random.randrange(
                len(nextPlays)
            )  # Si hay más de una jugada que promete un mismo max ptj, se elige al azar
            return int(nextPlays[play, 0])
        else:
            return 49
    else:
        if nextPlays.size != 0 and nextPlays[
                0, 1] > 0:  # or nextPlays[0, 2] < black_area:

            play = random.randrange(
                len(nextPlays)
            )  # Si hay más de una jugada que promete un mismo max ptj, se elige al azar
            return int(nextPlays[play, 0])
        else:
            return 49
Пример #4
0
    def reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = gogame.areas(self.state_)
            area_difference = black_area - white_area
            komi_correction = area_difference - self.komi
            if self.game_ended():
                return (1 if komi_correction > 0 else -1) * self.size**2
            return komi_correction
        else:
            raise Exception("Unknown Reward Method")
Пример #5
0
def seeInFurture(go_env_pred, invalidPlays, lvls, player, strategy, first = True):
    counter = 0
    playPoints = np.empty([0,2])
    maxPoints = 0
    tmpPoints = 0
    parentMove = np.empty([0,0])

    if player == "white":
        enemy = "black"
    else:
        enemy = "white"

    for counter in range(49):
        if counter not in invalidPlays:
            # Captura el puntaje de las jugadas de nivel n

            if len(invalidPlays) <= 1: # Primera jugada para ambos da 1 pt, no 49 >:c
                pts = 1.0

            else:
                tmp_env = copy(go_env_pred)
                prev_black_area, prev_white_area = gogame.areas(tmp_env.state_)
                tmp_env.step(counter)
                black_area, white_area = gogame.areas(tmp_env.state_)

                # Guarda mejores ptjs, si no es lvl == 1, crea una lista de movimientos prometedores.
                pts = countingPoints(strategy, prev_black_area, prev_white_area, black_area, white_area, player) # area ganada + area quitada

            if lvls == 1: # Crea lista de jugadas prometedoras del nivel más profundo y setea el ptj maximo
                if pts > maxPoints:
                    maxPoints = pts  
                    playPoints = np.array([[counter, pts]])
                elif pts == maxPoints: 
                    playPoints = np.append(playPoints, [[counter, pts]], axis=0)
            else: # Si no es el lvl 1, crea lista con jugadas prometedoras a analizar y setea ptj maximo
                if pts > tmpPoints:
                    tmpPoints = pts
                    parentMove = np.array([counter])
                elif pts == tmpPoints:
                    parentMove = np.append(parentMove, counter)
            
    lvls = lvls - 1 # Bajamos un nivel en el arbol

    if first and tmpPoints == 0: # Si las jugadas inmediatamente futuras tienen ptj max 0, se cancela la prediccion
        parentMove = np.empty([0,0])

    if lvls: # Si llegamos al nivel 0, significa que ya pasamos el ultimo nivel, es decir el 1
        tmp_max = 0

        for i in parentMove: # Llama recursivamente a seeInFuture y obtiene el max ptj de esa rama
            tmp_env = copy(go_env_pred)
            state, reward, done, info = tmp_env.step(int(i)) # Turno jugador
            enemy_action = predict(tmp_env, info, 1, enemy, True) # Predecir estrategia y movimiento de adversario
            state, reward, done, info = tmp_env.step(enemy_action) # Enemigos pasan (Supuesto) <-- Incertidumbre!!! o.o
            tmp_plays = get_invalidMoves(info["invalid_moves"])
            tmp_max = seeInFurture(tmp_env, tmp_plays, lvls, player, strategy, False)# Max Ptj lvl inferior

            if tmp_max > maxPoints and not first:
                maxPoints = tmp_max

            elif first: # si estamos en la rama principal, es decir, siguiente jugada, guardamos jugada y max pje de la rama
                if tmp_max == maxPoints: # Si tienen igual ptj se añade a la lista
                    if not maxPoints: # Si el ptj max(tmp_max) de 1 lvl mas abajo es 0, se asigna el ptj max del lvl actual
                        tmp_max = tmpPoints
                    playPoints = np.append(playPoints, [[i, tmp_max]], axis=0)

                elif tmp_max > maxPoints: # Si se encuentr un ptj mayor, se resetea la lista y setea el max
                    playPoints = np.array([[i, tmp_max]])
                    maxPoints = tmp_max

        if not maxPoints:
            maxPoints = tmpPoints

    if first: # Si es el primer nivel, devolvemos la jugada junto al pje max de sus hijos
        maxPoints = playPoints

    return maxPoints
Пример #6
0
def seeInFurture(go_env_pred,
                 invalidPlays,
                 lvls,
                 player,
                 strategy,
                 n_plays,
                 smart,
                 first=True):
    counter = 0
    playPoints = np.empty([0, 2])
    maxPoints = 0
    tmpPoints = 0
    parentMove = np.empty([0, 0])

    if player == "white":
        enemy = "black"
    else:
        enemy = "white"

    valid_Plays = np.empty([0, 0])

    #Obtener jugadas validas
    for counter in range(49):
        if counter not in invalidPlays:
            valid_Plays = np.append(valid_Plays, counter)

    #Elegir jugadas al azar entre las jugadas validas disponibles
    for iter in range(n_plays):
        if n_plays > len(valid_Plays):
            break
        rnd = random.randrange(0, len(valid_Plays) - 1)
        counter = int(valid_Plays[rnd])
        np.delete(valid_Plays, rnd)

        if len(invalidPlays
               ) <= 1:  # Primera jugada para ambos da 1 pt, no 49 >:c
            pts = 1.0

        else:
            if (smart == True):
                tmp_env = copy(go_env_pred)
                tmp_env.step(counter)

                blk = tmp_env.state_[0].flatten()
                wht = tmp_env.state_[1].flatten()

                tablero = blk

                tablero = np.where(tablero == 1, -1, 0)

                for i in range(len(wht)):
                    if int(wht[i]) == 1:
                        np.put(tablero, i, 1)

                tablero = tablero.reshape(1, 7, 7, 1)

                #Calcular prob de ganar haciendo esta jugada
                if player == "white":
                    pts = model.predict(tf.convert_to_tensor(tablero))[0][0]
                else:
                    pts = model.predict(tf.convert_to_tensor(tablero))[0][1]

                if pts < 0.25:  # Ganar
                    pts = model.predict(tf.convert_to_tensor(tablero))[0][2]
                    if pts < 0.3:  # Empatar
                        pts = 0  # Si la prob de ganar y empatar es muy baja pasa de turno

            else:
                tmp_env = copy(go_env_pred)
                prev_black_area, prev_white_area = gogame.areas(tmp_env.state_)
                tmp_env.step(counter)
                black_area, white_area = gogame.areas(tmp_env.state_)

                # Guarda mejores ptjs, si no es lvl == 1, crea una lista de movimientos prometedores.
                pts = countingPoints(strategy, prev_black_area,
                                     prev_white_area, black_area, white_area,
                                     player)  # area ganada + area quitada

        if lvls == 1:  # Crea lista de jugadas prometedoras del nivel más profundo y setea el ptj maximo
            if pts > maxPoints:
                maxPoints = pts
                playPoints = np.array([[counter, pts]])
            elif pts == maxPoints:
                playPoints = np.append(playPoints, [[counter, pts]], axis=0)
        else:  # Si no es el lvl 1, crea lista con jugadas prometedoras a analizar y setea ptj maximo
            if pts > tmpPoints:
                tmpPoints = pts
                parentMove = np.array([counter])
            elif pts == tmpPoints:
                parentMove = np.append(parentMove, counter)

    lvls = lvls - 1  # Bajamos un nivel en el arbol

    if first and tmpPoints == 0:  # Si las jugadas inmediatamente futuras tienen ptj max 0, se cancela la prediccion
        parentMove = np.empty([0, 0])
    if lvls:  # Si llegamos al nivel 0, significa que ya pasamos el ultimo nivel, es decir el 1
        tmp_max = 0
        for i in parentMove:  # Llama recursivamente a seeInFuture y obtiene el max ptj de esa rama
            tmp_env = copy(go_env_pred)
            state, reward, done, info = tmp_env.step(int(i))  # Turno jugador
            #print(state, "-", reward, "-", done, "-", info)
            enemy_action = predict(
                tmp_env, info, 1, enemy, 3,
                True)  # Predecir estrategia y movimiento de adversario
            state, reward, done, info = tmp_env.step(
                enemy_action
            )  # Enemigos pasan (Supuesto) <-- Incertidumbre!!! o.o
            tmp_plays = get_invalidMoves(info["invalid_moves"])
            tmp_max = seeInFurture(tmp_env, tmp_plays, lvls, player, strategy,
                                   3, smart, False)  # Max Ptj lvl inferior

            if tmp_max > maxPoints and not first:
                maxPoints = tmp_max

            elif first:  # si estamos en la rama principal, es decir, siguiente jugada, guardamos jugada y max pje de la rama
                if tmp_max == maxPoints:  # Si tienen igual ptj se añade a la lista
                    if not maxPoints:  # Si el ptj max(tmp_max) de 1 lvl mas abajo es 0, se asigna el ptj max del lvl actual
                        tmp_max = tmpPoints
                    playPoints = np.append(playPoints, [[i, tmp_max]], axis=0)

                elif tmp_max > maxPoints:  # Si se encuentr un ptj mayor, se resetea la lista y setea el max
                    playPoints = np.array([[i, tmp_max]])
                    maxPoints = tmp_max

        if not maxPoints:
            maxPoints = tmpPoints

    if first:  # Si es el primer nivel, devolvemos la jugada junto al pje max de sus hijos
        maxPoints = playPoints

    return maxPoints
Пример #7
0
                        blk = go_env.state_[0].flatten()
                        wht = go_env.state_[1].flatten()

                    tablero = blk

                    tablero = np.where(tablero == 1, "-1", 0)

                    for i in range(len(wht)):
                        if int(wht[i]) == 1:
                            np.put(tablero, i, "1")

                    tablero = ' '.join(tablero)

                    print(tablero)

                    black_area, white_area = gogame.areas(go_env.state_)

                    if white_area > black_area:
                        out = 0  # Blanco gana

                    elif white_area < black_area:
                        out = 1  # Negro gana

                    else:
                        out = 2  #Empate

                    dataset = open("dataset.csv", "a")
                    dataset.write(str(out) + "," + tablero + "\n")
                    dataset.close()

                    if n_stages != 0: