示例#1
0
def randwalk():

    g = ludopy.Game([1, 2, 3])
    there_is_a_winner = False

    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner), player_i = g.get_observation()
        print("dice", dice)
        print("move_pieces", move_pieces)
        print("player_pieces", player_pieces)
        print("enemy_pieces", enemy_pieces)
        print("player_is_a_winner", player_is_a_winner)
        print("there_is_a_winner", there_is_a_winner)
        print("player_i", player_i)
        print('determind_state', Q_Learning.determind_state(player_pieces))

        if len(move_pieces):
            piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
        else:
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

        cv2.imshow('test', (g.render_environment()))
        cv2.waitKey(0)

    print("Saving history to numpy file")
    g.save_hist("game_history.npy")
    print("Saving game video")
    g.save_hist_video("game_video.mp4")

    return True
示例#2
0
 def objective(self, x):
     # play game 100 times, using the given weights
     # times won = fitness value
     times_won = 0
     start = timer()
     for i in range(1000):
         game = ludopy.Game()
         player_is_a_winner = False
         there_is_a_winner = False
         while not there_is_a_winner:
             (dice, move_pieces, player_pieces, enemy_pieces,
              player_is_a_winner,
              there_is_a_winner), player_i = game.get_observation()
             # only do moves for player 0, all other players will move randomly
             piece_to_move = -1
             if player_i == 0:
                 if len(move_pieces):
                     piece_to_move = self.util_func(x, deepcopy(game), dice,
                                                    move_pieces,
                                                    player_pieces,
                                                    enemy_pieces)
             else:
                 if len(move_pieces):
                     piece_to_move = move_pieces[np.random.randint(
                         0, len(move_pieces))]
             _, _, _, _, _, there_is_a_winner = game.answer_observation(
                 piece_to_move)
         # game done, if first winnner was player 0, increment times_won
         if game.first_winner_was == 0:
             times_won += 1
     end = timer()
     #logging.info('Done playing 100 games for a single member in the population. Games won: {}, time taken: {}'.format(times_won, end-start))
     return times_won
示例#3
0
def play_match(player_0, player_1, player_2, player_3):
    g = ludopy.Game()
    there_is_a_winner = False

    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner), player_i = g.get_observation()
        if len(move_pieces):
            if player_i == 0:
                piece_to_move = player_0.play(dice, move_pieces, player_pieces,
                                              enemy_pieces)
            elif player_i == 1:
                piece_to_move = player_1.play(dice, move_pieces, player_pieces,
                                              enemy_pieces)
            elif player_i == 2:
                piece_to_move = player_2.play(dice, move_pieces, player_pieces,
                                              enemy_pieces)
            elif player_i == 3:
                piece_to_move = player_3.play(dice, move_pieces, player_pieces,
                                              enemy_pieces)
            else:
                raise ValueError("No players turn")
        else:
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)
        if there_is_a_winner:
            return player_i
示例#4
0
    def evaluate_fitness(self, evaluation_iterations=500):
        self.q_agent = QAgent(self.game, self.q_learner.q_table.q_table)

        #self.q_agent.q_table.evaluating = True

        self.agents[0] = self.q_agent

        self.eval_games_won = []

        for i in range(evaluation_iterations):
            there_is_a_winner = False

            while not there_is_a_winner:
                self.agents[self.game.current_player].move()

                there_is_a_winner = len(self.game.game_winners) > 0

            if self.game.first_winner_was == 0:
                self.eval_games_won.append(1)
            else:
                self.eval_games_won.append(0)

            self.game = ludopy.Game()

            self.q_agent.new_game(self.game)

            for i in range(1, 4):
                self.agents[i].new_game(self.game)

        self.fitness = sum(self.eval_games_won) / len(self.eval_games_won)

        return self.fitness
示例#5
0
def evaluate(x, n_games):
    # play game 100 times, using the given weights
    # times won = fitness value
    times_won = 0
    print('Starting eval of {} games...'.format(n_games))
    print('Weights: {}'.format(x))
    logging.info('Starting eval of {} games...'.format(n_games))
    logging.info('Weights: {}'.format(x))
    start = timer()
    for i in range(n_games):
        game = ludopy.Game()
        player_is_a_winner = False
        there_is_a_winner = False
        while not there_is_a_winner:
            (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation()
            # only do moves for player 0, all other players will move randomly
            piece_to_move = -1
            if player_i == 0:
                if len(move_pieces):
                    piece_to_move = util_func(x, deepcopy(game), dice, move_pieces, player_pieces, enemy_pieces)
            else:
                if len(move_pieces):
                    piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
            _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move)
        # game done, if first winnner was player 0, increment times_won
        if game.first_winner_was == 0:
            times_won += 1
        if i % 10000 == 0:
            logging.info('Done playing {} games. Won so far: {}'.format(i, times_won))
    end = timer()
    print('Done playing {} games. Games won: {}, time taken: {}'.format(n_games, times_won, end-start))
    logging.info('Done playing {} games. Games won: {}, time taken: {}'.format(n_games, times_won, end-start))
    return times_won
示例#6
0
def play(players):
    # print("\n\nNew game\n----------------------\n")
    game = ludopy.Game()
    there_is_a_winner = False
    player_i = -1

    while not there_is_a_winner:
        observation, player_i = game.get_observation()
        (dice, movable_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner) = observation

        player = players[player_i]

        if len(movable_pieces) > 0:
            # print('player: #' + str(player_i) + '\t' + str(dice) + '\t' + str(movable_pieces.tolist()) + '\t\t' + str(player_pieces.tolist()) + '\t\t' + str(enemy_pieces.tolist()))
            piece_to_move = player.select_piece_to_move(observation)
        else:
            # print('-')
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = game.answer_observation(
            piece_to_move)

    # print("Saving history to numpy file")
    # game.save_hist(f"game_history.npy")
    # print("Saving game video")
    # game.save_hist_video(f"game_video.mp4")

    players[player_i].wincount += 1
    return player_i
示例#7
0
    def __init__(self, chromo):
        self.g = ludopy.Game()
        self.current_state = []
        self.qtable = {}  # init a dictionary
        # self.learning_rate = 0.25  # 0.5 alpha
        # self.discount_factor = 0.95  # 0.9 gamma
        self.epsilon = 1.0  # 0.10 ; 1.0 = 100% random, 0 = max val (greedy)
        self.reward = 0  # r

        self.HOME_AREAL_INDEXS = np.array([53, 54, 55, 56, 57, 58])
        self.GLOBUS_INDEXS = np.array([9, 22, 35, 48])
        self.STAR_INDEXS = np.array([5, 12, 18, 25, 31, 38, 44, 51])
        self.GOAL_INDEX = 59
        self.ENEMY_1_GLOB_INDX = 14
        self.ENEMY_2_GLOB_INDX = 27
        self.ENEMY_3_GLOB_INDX = 40
        self.next_position = []
        self.next_qvalue = 0
        self.there_is_a_winner = False
        self.dice = None
        self.player_i = None
        self.move_pieces = []
        self.current_position = []
        self.enemy_pieces = []
        self.current_action = 0
        self.current_state = []

        self.chromosome = chromo
        self.discount_factor = chromo[0]  # 0.9 gamma
        self.learning_rate = chromo[1]  # 0.5 alpha
示例#8
0
def randwalk():
    import ludopy
    import numpy as np
    from PIL import Image as pilImg

    g = ludopy.Game()
    there_is_a_winner = False

    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner), player_i = g.get_observation()

        if len(move_pieces):
            piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
            # g.test()
            boardImg = g.render_environment()
            img = pilImg.fromarray(boardImg)
            img.save("test.jpeg")
        else:
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

    print("Saving history to numpy file")
    g.save_hist("game_history.npy")
    print("Saving game video")
    g.save_hist_video("game_video.mp4")

    return True
 def __init__(self):
     self.g = ludopy.Game()
     self.there_is_a_winner = False
     self.dice = None
     self.player_i = None
     self.move_pieces = []
     self.current_position = []
     self.enemy_pieces = []
     self.player = [Player(), Player(), Player(), Player()]
示例#10
0
def winRate(load_path, episodes, player_num):
    tf.reset_default_graph()
    number_of_players = 2
    number_of_pieces = 4
    reward = -1000
    EPISODES = episodes
    ghost_players = list(reversed(range(0, 4)))[:-number_of_players]
    players = list(reversed(range(0, 4)))[-number_of_players:]
    winner = None
    act = util.Action(number_of_players, number_of_pieces, reward)
    winnerCount = defaultdict(int)
    print(load_path, "---")
    PG = PolicyGradient(
        n_x=(number_of_players * number_of_pieces) + 5,  #input layer size
        n_y=5,  #ouput layer size
        learning_rate=0.02,
        reward_decay=0.99,
        load_path=load_path,
        save_path=None,
        player_num=player_num)
    preds = list()
    for episode in range(EPISODES):
        g = ludopy.Game(ghost_players=ghost_players,\
             number_of_pieces=number_of_pieces)

        there_is_a_winner = False
        winner = None
        totalMoves, wrongPred = 0, 0
        while True:
            for i in range(number_of_players):
                (dice, move_pieces, player_pieces, enemy_pieces, \
                         player_is_a_winner,there_is_a_winner),\
                                 player_i = g.get_observation()

                if player_i == 1:
                    action, random = act.getAction(PG, enemy_pieces,
                                                   player_pieces, move_pieces,
                                                   dice)
                    totalMoves += 1
                    if random:
                        wrongPred += 1
                else:
                    action = act.getAction(move_pieces=move_pieces)

                _, _, _, _, _, there_is_a_winner = g.answer_observation(action)

                if there_is_a_winner:
                    if episode % 1000 == 0 and 0:
                        print("saving the game--", episode)
                    winner = player_i
                    winnerCount[player_i] += 1
                    break
            if there_is_a_winner:
                preds.append([wrongPred, totalMoves])
                break
    return winnerCount, preds
示例#11
0
def evaluate_qlearning_multiprocessing(i):
    times_won = 0
    game = ludopy.Game()
    player_is_a_winner = False
    there_is_a_winner = False
    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation()
        # only do moves for player 0, all other players will move randomly
        piece_to_move = -1
        if player_i == 0:
            if len(move_pieces):
                piece_to_move = player.getNextAction(player.getState(player_pieces, enemy_pieces), dice, move_pieces)
        else:
            if len(move_pieces):
                piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
        _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move)
    # game done, if first winnner was player 0, increment times_won
    return game.first_winner_was
示例#12
0
def run_random_game(save_video=False):
    g = ludopy.Game()

    game_done = False

    game_obs = [[] for _ in range(4)]

    game_events = [[] for _ in range(4)]
    rewards = [[] for _ in range(4)]

    player_end = [False for _ in range(4)]

    while not all(player_end):
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, game_done), player_i = g.get_observation()
        # enemy_pieces = np.copy(enemy_pieces)
        action = -1
        if len(move_pieces):
            action = random.choice(move_pieces)

        (_, _, player_pieces_after, enemy_pieces_after, player_is_a_winner_after,
         game_done_after) = g.answer_observation(action)

        if action != -1:
            game_event = cal_game_events(player_pieces, enemy_pieces, player_pieces_after, enemy_pieces_after)
            game_events[player_i].append(list(game_event.values()))

            reward, end_game = cal_reward_and_endgame(game_event)
            rewards[player_i].append(reward)
            player_end[player_i] = end_game
        else:
            reward = 0

        cal_state(player_pieces, enemy_pieces, dice)  # For at test at der kan laves states

        game_obs[player_i].append(
            [dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, game_done, action, player_pieces_after,
             enemy_pieces_after, player_is_a_winner_after, game_done_after])

    if save_video:
        g.save_hist_video("test.mp4")

    return game_obs, np.array(game_events), list(game_event.keys()), np.array(rewards)
示例#13
0
 def __init__(self):
     self.there_is_a_winner = False
     self.g = ludopy.Game()
     self.player = None
     self.Q = []
     self.ca = capture_image()
     self.list_winner = []
     self.number_winner_my_player = 0
     self.tr = train_data()
     self.player_last_piece = []
     self.second_player = 0
     self.file_name = ""
     self.file_plyer_hist = ""
     self.type_play = True
     self.winner = 0
     self.my_player_winner = False
     self.gamma_m = 0
     self.alfa_m = 0
     self.percentage = []
     self.epsilon = 0
示例#14
0
def evaluate_qlearning_vs_ga_multiprocessing(i):
    weights = [104.0, 118.0, -80.0, 57.0, 94.0, -19.0, 98.0, -58.0, 69.0, 5.0]
    game = ludopy.Game()
    player_is_a_winner = False
    there_is_a_winner = False
    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation()
        # only do moves for player 0, all other players will move randomly
        piece_to_move = -1
        if player_i == 0:
            if len(move_pieces):
                piece_to_move = util_func(weights, deepcopy(game), dice, move_pieces, player_pieces, enemy_pieces)
        elif player_i == 2:
            if len(move_pieces):
                piece_to_move = player.getNextAction(player.getState(player_pieces, enemy_pieces), dice, move_pieces)
        else:
            if len(move_pieces):
                piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
        _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move)
    # game done, if first winnner was player 0, increment times_won
    return game.first_winner_was
示例#15
0
def randwalk(number_of_players=4, number_of_pieces=4):
    """
    Generate a saved Numpy array representing a 2-player Ludo game sequence.

    This is taken from the `test/randomwalk.py` in LUDOpy

    :param number_of_players: Number of Ludo players.
    :type number_of_players: `int`

    :param number_of_pieces: Number of pieces per player.
    :type number_of_pieces: `int`
    """

    # `ghost_players` is a LUDOpy specific way to specify the number of
    # players. So, if we want 2 players, the code below will generate a list:
    #
    #     [3, 2, 1, 0]
    #
    # and slice it so it omits players 2 and 3.
    #
    #     [3, 2, 1, 0][:2] == [3, 2]

    g = ludopy.Game(ghost_players=list(reversed(range(
        0, 4)))[:-number_of_players],
                    number_of_pieces=number_of_pieces)
    there_is_a_winner = False

    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner), player_i = g.get_observation()

        if len(move_pieces):
            piece_to_move = \
                move_pieces[np.random.randint(0, len(move_pieces))]
        else:
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

    return g
示例#16
0
    def train(self, training_iterations=10000, win_rate_iterations=100):
        for i in range(training_iterations):
            there_is_a_winner = False

            while not there_is_a_winner:
                self.agents[self.game.current_player].move()

                there_is_a_winner = len(self.game.game_winners) > 0

            self.n_games += 1

            if self.game.first_winner_was == 0:
                self.games_won.append(1)
            else:
                self.games_won.append(0)

            if len(self.games_won) > win_rate_iterations:
                self.games_won = self.games_won[-win_rate_iterations:]

            self.winning_rates.append(
                sum(self.games_won) / len(self.games_won))
            self.known_state_rates.append(
                sum(self.q_learner.q_table.known_state_encountered) /
                len(self.q_learner.q_table.known_state_encountered))

            if self.n_games % 1000 == 0:
                self.q_learner.dump_q_table(str(self.id) + "/qtable.json")

            self.game = ludopy.Game()

            if i == training_iterations - 1:
                self.q_learner.new_game(self.game, epsilon=0)
            else:
                self.q_learner.new_game(self.game)

            for i in range(1, 4):
                self.agents[i].new_game(self.game)
示例#17
0
def randwalk():
    import ludopy
    import numpy as np

    g = ludopy.Game()
    there_is_a_winner = False

    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
         there_is_a_winner), player_i = g.get_observation()

        if len(move_pieces):
            piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
        else:
            piece_to_move = -1

        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

    print("Saving history to numpy file")
    g.save_hist("game_history.npy")
    print("Saving game video")
    g.save_hist_video("game_video.mp4")

    return True
 def setUp(self):
     np.random.seed(0)
     self.__g = ludopy.Game(number_of_pieces=4)
import ludopy
import cv2
import numpy as np
import matplotlib.pyplot as plt
import bottleneck as bn
from ludopy import player
import qlearning

env = ludopy.Game()

EPISODES = 10000

ACTION_SPACE_SIZE = 4

INNER_STARS = [5, 18, 31, 44]
OUTER_STARS = [12, 25, 38, 51]

avg_window_size = 1000


ep_rewards = []
ep_won = []


rewards_table = {'star': 0.2, 'safe': 0.2, 'send_another_home': 0.2, 'send_self_home': -0.3, 'goal': 0.1,
                 'moved_into_goal_area': 0.2, 'out_of_start': 0.25, 'winner': 1, 'not_winner': -1}


def get_reward(moved_piece_previous_location, moved_piece_location, p_pieces, e_pieces, n_player_pieces_before, n_enemy_pieces_before):
    reward = 0
    if moved_piece_location in player.STAR_INDEXS:
示例#20
0
 def play(self,
          policyPlayers,
          randomPlayers,
          load_path,
          save_path,
          episodes,
          episodeStart,
          training,
          ghost_players,
          model2keep,
          n_x=125,
          n_y=5,
          learning_rate=0.02,
          reward_decay=0.99,
          player_num=0,
          number_of_players=2,
          number_of_pieces=4,
          reward=-1000,
          rewardType="monte",
          inputBoardType="fullBoard"):
     totalPlayers = len(policyPlayers) + len(randomPlayers)
     playerPool = policyPlayers + randomPlayers
     data = dict()
     for i in policyPlayers:
         data[i] = StoreTrainingData(n_y)
     act = Action(reward)
     PG = PolicyGradient(
         n_x=n_x,  #input layer size
         n_y=n_y,  #ouput layer size
         learning_rate=learning_rate,
         reward_decay=reward_decay,
         load_path=load_path,
         save_path=save_path,
         player_num=player_num,
         rewardType=rewardType,
         toKeep=model2keep)
     timeInterval = 50
     winCount = defaultdict(int)
     preds = list()
     startTime = time.time()
     for episode in range(episodeStart + 1, episodeStart + episodes):
         g = ludopy.Game(ghost_players=ghost_players,\
          number_of_pieces=number_of_pieces)
         while True:
             obs, currPlayer = g.get_observation()
             state = State(obs, currPlayer)
             action = None
             if currPlayer in policyPlayers and len(state.actions()) > 0:
                 action = act.action(self, state, n_y, playerPool,
                                     currPlayer, data[currPlayer], PG,
                                     training)
             elif currPlayer in randomPlayers:
                 action = act.action(self, state, n_y)
             _, _, _, _, _, there_is_a_winner = g.answer_observation(action)
             if int(time.time() - startTime) > timeInterval:
                 print("episode: {} running for {}".format(
                     episode,
                     time.time() - startTime))
                 timeInterval += 50
             if there_is_a_winner:
                 winCount[currPlayer] += 1
                 if episode % 1000 == 0:
                     print("wincount: {}".format(winCount))
                     print("time take for this epoch is {}".format(
                         time.time() - startTime))
                     startTime = time.time()
                     timeInterval = 50
                     winCount = defaultdict(int)
                     g.save_hist_video(
                         "videos/gameabc{}.avi".format(episode))
                 if training:
                     try:
                         self.__train(PG, data, episode, currPlayer)
                     except:
                         g.save_hist_video("error.avi".format(episode))
                         print(
                             "-----------------error------------------------"
                         )
                         pass
                 break
     return winCount
示例#21
0
        return state

    def getNextAction(self, state, dice, movePieces):
        diceIdx = dice - 1
        bestAction = movePieces[0]
        bestQValue = self.getQValue(state, diceIdx, bestAction)
        for action in movePieces:
            if self.getQValue(state,diceIdx, action) > bestQValue:
                bestAction = action
                bestQValue = self.getQValue(state,diceIdx,action)

        return bestAction


player = QLearningPlayer('BestQTable.npy') #Give the path to the QTable.
g = ludopy.Game()
gameNumber = 0
winners = []
numOfGames = 5000
while gameNumber < numOfGames:
    g.reset()
    gameNumber += 1
    there_is_a_winner = False
    while not there_is_a_winner:
        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation()

        if player_i == 0 and len(move_pieces) > 0:
            #This is the line that is important
            piece_to_move = player.getNextAction(player.getState(player_pieces,enemy_pieces), dice, move_pieces)
        else:
            if len(move_pieces):
示例#22
0
#!/usr/bin/python3.6

import ludopy
import numpy as np

from agents.random_agent import RandomAgent
from agents.q_learning.q_learning_agent import QLearningAgent

q_table_filename = None

game = ludopy.Game()

q_learner = QLearningAgent(game,
                           0,
                           learning_rate=0.5,
                           discount_factor=0.9,
                           epsilon=0.5,
                           win_reward=10.0,
                           lost_reward=-10.0,
                           piece_in_reward=5.0,
                           land_on_globe_reward=1.0,
                           land_on_star_reward=2.0,
                           knock_enemy_home_reward=0.5,
                           got_knocked_home_reward=-1.1,
                           no_move_reward=-0.5,
                           piece_number_scale_reward=0.001,
                           piece_number_init_func_value=5,
                           q_table_filename=q_table_filename)
print(len(q_learner.q_table.q_table))
random_agent_1 = RandomAgent(game, 1)
random_agent_2 = RandomAgent(game, 2)
示例#23
0
def run_ludo():
    #  Explore rate: 0.05, discount rate: 0.4 and learning rate: 0.1
    learning_rate_vec = [0.1] #[0.1, 0.2, 0.3, 0.4, 0.5]
    discount_factor_vec = [0.4] #[0.1, 0.2, 0.3, 0.4, 0.5]
    explore_rate_vec = [0.05] #[0.05, 0.10, 0.15, 0.2]

    after = 800

    number_of_runs_without_learning = 25
    number_of_runs_with_learning = 1000

    q_player = 0

    size_of_win_rate_vec = (len(explore_rate_vec),len(discount_factor_vec),len(learning_rate_vec), number_of_runs_with_learning)
    win_rate_vec = np.zeros(size_of_win_rate_vec)

    for ER_index, ER_value in enumerate(explore_rate_vec):
        for DF_index, DF_value in enumerate(discount_factor_vec):
            for LR_index, LR_value in enumerate(learning_rate_vec):
                q = Q_Learning.QLearning(q_player)
                q.training = 1

                q.learning_rate = LR_value
                q.discount_factor = DF_value
                q.explore_rate = ER_value

                for k in range(number_of_runs_with_learning):
                    print('Test2:   Number of learning games: ', k, ' ER: ', q.explore_rate, ' DF: ', q.discount_factor, ' LR: ', q.learning_rate)
                    g = ludopy.Game()
                    stop_while = False
                    q.training = 1
                    while not stop_while:
                        (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
                         there_is_a_winner), player_i = g.get_observation()

                        if player_i == q_player:
                            piece_to_move = q.update_q_table(player_pieces, enemy_pieces, dice, g, there_is_a_winner)
                            if there_is_a_winner == 1:
                                stop_while = True
                        else:
                            if len(move_pieces):
                                piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
                            else:
                                piece_to_move = -1

                        _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

                    q.reset_game()
                    if after < k:
                        wins = [0, 0, 0, 0]
                        q.training = 0

                        number_of_steps = 0
                        for j in range(number_of_runs_without_learning):
                            g = ludopy.Game()
                            stop_while = False
                            while not stop_while:
                                (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner,
                                 there_is_a_winner), player_i = g.get_observation()
                                if player_i == q_player:
                                    number_of_steps = number_of_steps + 1
                                    piece_to_move = q.update_q_table(player_pieces, enemy_pieces, dice, g, there_is_a_winner)
                                    if there_is_a_winner == 1:
                                        stop_while = True
                                else:
                                    if len(move_pieces):
                                        piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))]
                                    else:
                                        piece_to_move = -1
                                _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move)

                            q.reset_game()
                            wins[g.first_winner_was] = wins[g.first_winner_was] + 1
                        win_rate_vec[ER_index][DF_index][LR_index][k] = (wins[q_player] / number_of_runs_without_learning)
                        print('Win rate: ', wins[q_player] / number_of_runs_without_learning)

                q.save_Q_table("Best_learning_parameters" + str(k) + ".npy")

    test_name = "Test_run"
    file_name = test_name + "_data.npy"
    file_ext = file_name.split(".")[-1]
    assert file_ext == "npy", "The file extension has to be npy (numpy file)"
    np.save(file_name, win_rate_vec)

    file_name = test_name + "_parameters.npy"
    file_ext = file_name.split(".")[-1]
    assert file_ext == "npy", "The file extension has to be npy (numpy file)"
    np.save(file_name, [explore_rate_vec, discount_factor_vec, learning_rate_vec, number_of_runs_with_learning, number_of_runs_without_learning])


    return True
示例#24
0
 def setUp(self):
     np.random.seed(0)
     self.__g = ludopy.Game(ghost_players=[3, 2], number_of_pieces=4)
示例#25
0
    def __init__(self,
                 individual_id,
                 discount_factor=None,
                 learning_rate=None,
                 epsilon=None,
                 win_reward=None,
                 lost_reward=None,
                 piece_in_reward=None,
                 land_on_globe_reward=None,
                 land_on_star_reward=None,
                 knock_enemy_home_reward=None,
                 got_knocked_home_reward=None,
                 no_move_reward=None,
                 piece_number_scale_reward=None,
                 piece_number_init_func_value=None,
                 mutation_rate=0):
        self.id = individual_id

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if discount_factor == None or random_int < 100 * mutation_rate:
            self.discount_factor = np.random.uniform(discount_factor_bounds[0],
                                                     discount_factor_bounds[1])
        else:
            self.discount_factor = discount_factor

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if learning_rate == None or random_int < 100 * mutation_rate:
            self.learning_rate = np.random.uniform(learning_rate_bounds[0],
                                                   learning_rate_bounds[1])
        else:
            self.learning_rate = learning_rate

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if epsilon == None or random_int < 100 * mutation_rate:
            self.epsilon = np.random.uniform(epsilon_bounds[0],
                                             epsilon_bounds[1])
        else:
            self.epsilon = epsilon

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if win_reward == None or random_int < 100 * mutation_rate:
            self.win_reward = np.random.uniform(win_reward_bounds[0],
                                                win_reward_bounds[1])
        else:
            self.win_reward = win_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if lost_reward == None or random_int < 100 * mutation_rate:
            self.lost_reward = np.random.uniform(lost_reward_bounds[0],
                                                 lost_reward_bounds[1])
        else:
            self.lost_reward = lost_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if piece_in_reward == None or random_int < 100 * mutation_rate:
            self.piece_in_reward = np.random.uniform(piece_in_reward_bounds[0],
                                                     piece_in_reward_bounds[1])
        else:
            self.piece_in_reward = piece_in_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if land_on_globe_reward == None or random_int < 100 * mutation_rate:
            self.land_on_globe_reward = np.random.uniform(
                land_on_globe_reward_bounds[0], land_on_globe_reward_bounds[1])
        else:
            self.land_on_globe_reward = land_on_globe_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if land_on_star_reward == None or random_int < 100 * mutation_rate:
            self.land_on_star_reward = np.random.uniform(
                land_on_star_reward_bounds[0], land_on_star_reward_bounds[1])
        else:
            self.land_on_star_reward = land_on_star_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if knock_enemy_home_reward == None or random_int < 100 * mutation_rate:
            self.knock_enemy_home_reward = np.random.uniform(
                knock_enemy_home_reward_bounds[0],
                knock_enemy_home_reward_bounds[1])
        else:
            self.knock_enemy_home_reward = knock_enemy_home_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if got_knocked_home_reward == None or random_int < 100 * mutation_rate:
            self.got_knocked_home_reward = np.random.uniform(
                got_knocked_home_reward_bounds[0],
                got_knocked_home_reward_bounds[1])
        else:
            self.got_knocked_home_reward = got_knocked_home_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if no_move_reward == None or random_int < 100 * mutation_rate:
            self.no_move_reward = np.random.uniform(no_move_reward_bounds[0],
                                                    no_move_reward_bounds[1])
        else:
            self.no_move_reward = no_move_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if piece_number_scale_reward == None or random_int < 100 * mutation_rate:
            self.piece_number_scale_reward = np.random.uniform(
                piece_number_scale_reward_bounds[0],
                piece_number_scale_reward_bounds[1])
        else:
            self.piece_number_scale_reward = piece_number_scale_reward

        random_int = 100
        if mutation_rate > 0:
            random_int = np.random.randint(0, high=100)
        if piece_number_init_func_value == None or random_int < 100 * mutation_rate:
            self.piece_number_init_func_value = np.random.uniform(
                piece_number_init_func_value_bounds[0],
                piece_number_init_func_value_bounds[1])
        else:
            self.piece_number_init_func_value = piece_number_init_func_value

        self.game = ludopy.Game()

        self.q_learner = QLearningAgent(
            self.game, 0, self.discount_factor, self.learning_rate,
            self.epsilon, self.win_reward, self.lost_reward,
            self.piece_in_reward, self.land_on_globe_reward,
            self.land_on_star_reward, self.knock_enemy_home_reward,
            self.got_knocked_home_reward, self.no_move_reward,
            self.piece_number_scale_reward, self.piece_number_init_func_value)

        self.agents = [self.q_learner]

        for i in range(1, 4):
            self.agents.append(RandomAgent(self.game, i))

        self.games_won = []
        self.n_games = 0
        self.winning_rates = []
        self.known_state_rates = []

        self.fitness = None
示例#26
0
def train(episode, rewardType=None):
    tf.reset_default_graph()
    number_of_players = 2
    number_of_pieces = 4
    # Load checkpoint
    load_version = 11
    save_version = load_version + 1
    #load_path = "output/weights/ludo/{}/ludo-v2.ckpt".format(load_version)
    load_path = None
    save_path = "/content/drive/My Drive/cse8673_project/output/weights/ludo/{}/ludo-v2.ckpt".format(
        rewardType)
    PG_dict = {}
    reward = -1000
    act = util.Action(number_of_players, number_of_pieces, reward)
    PG = PolicyGradient(
        n_x=(number_of_players * number_of_pieces) + 5,  #input layer size
        n_y=5,  #ouput layer size
        learning_rate=0.02,
        reward_decay=0.99,
        load_path=load_path,
        save_path=save_path,
        player_num=0,
        rewardType=rewardType)
    EPISODES = episode
    ghost_players = list(reversed(range(0, 4)))[:-number_of_players]
    players = list(reversed(range(0, 4)))[-number_of_players:]
    winner = None
    winnerCount = defaultdict(int)
    for episode in range(EPISODES):
        if episode % 500 == 0:
            print("episode : ", episode)
        g = ludopy.Game(ghost_players=ghost_players,\
             number_of_pieces=number_of_pieces)

        episode_reward = 0

        there_is_a_winner = False
        winner = None
        count = 0
        while True:
            count += 1
            for i in range(number_of_players):
                if i == 0:
                    (dice, move_pieces, player_pieces, enemy_pieces,
                     player_is_a_winner,
                     there_is_a_winner), player_i = g.get_observation()

                    action, random = act.getAction(PG, enemy_pieces,
                                                   player_pieces, move_pieces,
                                                   dice)

                    _, _, _, _, _, there_is_a_winner = g.answer_observation(
                        action)
                else:
                    action = act.getAction(move_pieces=move_pieces)

                if there_is_a_winner:
                    winner = player_i
                    winnerCount[player_i] += 1
                    break

            #this is where the agents are leanring
            if there_is_a_winner:
                if winner == 0:
                    PG.episode_rewards = [
                        i + 2000 if i == -1000 else i
                        for i in PG.episode_rewards
                    ]

                discounted_episode_rewards_norm = PG.learn(episode, 0, winner)
    return winnerCount, save_path