Пример #1
0
 def visualise_game(self, model):
     game = LaberintGame(gui=True)
     _, _, board, x, y, lastAction = game.start()
     prev_observation = self.generate_observation(x, y, board, lastAction)
     for _ in range(self.goal_steps):
         predictions = []
         for action in range(0, 4):
             predictions.append(
                 model.predict(
                     self.add_action_to_observation(prev_observation,
                                                    action).reshape(
                                                        -1, 8, 1)))
         if predictions[0] > 0.5:
             action = 0
         else:
             if predictions[3] > 0.5:
                 action = 3
             else:
                 if predictions[2] > 0.5:
                     action = 2
                 else:
                     action = 1
         game_action = self.get_game_action(action, lastAction)
         done, _, board, x, y, lastAction = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(
                 x, y, board, lastAction)
Пример #2
0
 def visualize_game(self, model):
     game = LaberintGame(gui=True)
     _, _, board = game.start()
     prev_observation = self.generate_observation(board)
     prev_action = self.highest_action(model, prev_observation)
     for _ in range(self.goal_steps):
         action = self.next_action(model, prev_observation, prev_action)
         done, _, board = game.step(action)
         if done:
             break
         else:
             prev_action = action
             prev_observation = self.generate_observation(board)
Пример #3
0
    def test_model(self, model):
        steps_arr = []
        for _ in range(self.test_games):
            steps = 0
            game_memory = []
            game = LaberintGame()
            _, _, board, x, y, lastAction = game.start()
            prev_observation = self.generate_observation(
                x, y, board, lastAction)
            for _ in range(self.goal_steps):
                predictions = []
                for action in range(0, 4):
                    predictions.append(
                        model.predict(
                            self.add_action_to_observation(
                                prev_observation, action).reshape(-1, 8, 1)))
                if predictions[
                        0] > 0.5:  #aca se usa la regla de la mano izquierda
                    action = 0  #ir a la izquierda
                else:
                    if predictions[3] > 0.5:
                        action = 3  #ir adelante
                    else:
                        if predictions[2] > 0.5:
                            action = 2  #ir a la derecha
                        else:
                            action = 1  # volver atras


#                print(predictions)
#                print(action)
#                print(predictions[action])
                game_action = self.get_game_action(action, lastAction)
                #                print(game_action)
                done, _, board, x, y, lastAction = game.step(game_action)
                game_memory.append([prev_observation, action])
                if done:
                    break
                else:
                    prev_observation = self.generate_observation(
                        x, y, board, lastAction)
                    steps += 1
            steps_arr.append(steps)
        print('Average steps:', mean(steps_arr))
        print(Counter(steps_arr))
Пример #4
0
 def visualise_game(self, model):
     game = LaberintGame(gui=True)
     _, score, board = game.start()
     self.init_exit_position(board)
     self.update_avatar_position(board)
     self.positionDic.clear()
     prev_observation = self.generate_observation(board, score)
     prev_action = -1
     for _ in range(self.goal_steps_train):
         action = self.get_predicted_action(model, prev_observation,
                                            prev_action)
         self.positionDic[self.get_position_string(self.avatarX,
                                                   self.avatarY)] = 1
         done, score, board = game.step(action)
         self.update_avatar_position(board)
         if done:
             break
         else:
             prev_observation = self.generate_observation(board, score)
             prev_action = action
Пример #5
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = LaberintGame()
         _, _, board, x, y, lastAction = game.start()
         prev_observation = self.generate_observation(
             x, y, board, lastAction)
         old_x, old_y = x, y
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(x, y, lastAction)
             done, _, board, x, y, lastAction = game.step(game_action)
             if done:
                 if board[x][y] == "t":
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
                 break
             else:
                 if (x == old_x) & (y == old_y):
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
             # con estos valores, vamos a tener un vector y unicamente compuesto de 0 y 1 para simplificar el modelo
             prev_observation = self.generate_observation(
                 x, y, board, lastAction)
             old_x, old_y = x, y
     print(len(training_data))
     return training_data
Пример #6
0
 def test_model(self, model):
     steps_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = LaberintGame()
         _, _, board = game.start()
         prev_observation = self.generate_observation(board)
         prev_action = self.highest_action(model, prev_observation)
         for _ in range(self.goal_steps):
             action = self.next_action(model, prev_observation, prev_action)
             done, _, board = game.step(action)
             game_memory.append([prev_observation, action])
             if done:
                 break
             else:
                 prev_action = action
                 prev_observation = self.generate_observation(board)
                 steps += 1
         steps_arr.append(steps)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
Пример #7
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = LaberintGame(gui=False)
         done, score, board = game.start()
         self.maxBoardDistance = self.get_max_board_distance(board)
         self.init_exit_position(board)
         self.update_avatar_position(board)
         self.positionDic.clear()
         prev_observation = self.generate_observation(board, score)
         prev_action = -1
         for _ in range(self.goal_steps_train):
             action = self.get_predicted_action(model, prev_observation,
                                                prev_action)
             self.positionDic[self.get_position_string(
                 self.avatarX, self.avatarY)] = 1
             done, score, board = game.step(action)
             self.update_avatar_position(board)
             game_memory.append([prev_observation, action])
             if done:
                 print('-----')
                 print(steps)
                 # print(board)
                 print(prev_observation)
                 # print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(board, score)
                 prev_action = action
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     print(Counter(scores_arr))
Пример #8
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = LaberintGame()
         _, _, board = game.start()
         prev_observation = self.generate_observation(board)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(prev_observation)
             done, score, board = game.step(game_action)
             if done and score < -99:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])
                 break
             elif done and score > 99:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), 1
                 ])
                 break
             elif self.same_observation(prev_observation,
                                        self.generate_observation(board)):
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -0.5
                 ])
                 prev_observation = self.generate_observation(board)
             else:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), 0.85
                 ])
                 prev_observation = self.generate_observation(board)
     print(len(training_data))
     return training_data
Пример #9
0
 def initial_population(self):
     training_data = []
     for number in range(self.initial_games):
         if number < self.manualGames:
             game = LaberintGame(gui=True)
         else:
             game = LaberintGame()
         self.positionDic.clear()
         # print("New game started")
         done, prev_score, board = game.start()
         self.maxBoardDistance = self.get_max_board_distance(board)
         self.init_exit_position(board)
         self.update_avatar_position(board)
         # prev_observation = self.generate_observation(board)
         prev_exit_distance = self.get_exit_distance()
         for _ in range(self.goal_steps * 10):
             if number < self.manualGames:
                 game_action = manual.get()
             else:
                 game_action = self.generate_action()
             prev_observation = self.generate_observation(board, prev_score)
             done, score, board = game.step(game_action)
             self.update_avatar_position(board)
             # print(str(self.avatarX) + "," + str(self.avatarY))
             # print(str(self.exitX) + "," + str(self.exitY))
             if done:
                 if score < prev_score:
                     # print("died")
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, game_action), -1
                     ])
                 else:
                     print("win")
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, game_action), 1
                     ])
                 break
             else:
                 exit_distance = self.get_exit_distance()
                 if (score + 2 >= prev_score):
                     # print("good direction")
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, game_action), 1
                     ])
                 else:
                     # print("bad direction")
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, game_action), 0
                     ])
                 prev_exit_distance = exit_distance
                 prev_score = score
     return training_data