class SnakeGameDemoWidget(QtWidgets.QWidget): """ SnakeGame """ def __init__(self): super(SnakeGameDemoWidget, self).__init__() self.resize(QtCore.QSize(300, 300)) self.setWindowTitle("SnakeGame") self.snake_game = SnakeGame() self.snake_game.deterministic_food = True self.snake_game.food_positions = [ (6, 6), (2, 15), (17, 3) ] self.snake_game.rect = self.rect() self.snake_game.width = self.width() self.snake_game.height = self.height() self.snake_game.setup() self.tick_timer = QTimer() self.tick_timer.setInterval(100) self.tick_timer.timeout.connect(self.tick) def showEvent(self, event): self.snake_game.rect = self.rect() self.tick_timer.start() def resizeEvent(self, event): self.snake_game.rect = self.rect() def closeEvent(self, event): self.tick_timer.stop() def tick(self): self.snake_game.tick() self.update() def paintEvent(self, event): painter = QtGui.QPainter(self) painter.setRenderHint(QtGui.QPainter.Antialiasing, True) self.snake_game.paint(painter) def keyPressEvent(self, event): self.snake_game.key_pressed(event.key()) def sizeHint(self): return QtCore.QSize(300, 300)
def train(num_episodes): max_moves_without_fruit = 15 for i in range(num_episodes): game = SnakeGame(width, height, num_fruit=3) num_moves_without_fruit = 0 while not game.game_over: observation = numpy.copy(game.board) action = choose_action(snake_model, observation) game.tick(game_actions[action]) ## next_observation = numpy.copy(game.board) num_moves_without_fruit += 1 if game.game_over: reward = -10 elif game.just_ate_fruit: reward = 1 elif num_moves_without_fruit > max_moves_without_fruit: reward = -1 num_moves_without_fruit = 0 else: reward = 0 memory.add_to_memory(observation, action, reward) if game.game_over: #### total_reward = sum(memory.rewards) total_observation = numpy.stack(memory.observations, 0) total_action = numpy.array(memory.actions) total_rewards = discount_rewards(memory.rewards, gamma) train_step(snake_model, optimizer, total_observation, total_action, total_rewards) memory.clear() break