def test(times = 10): model = load_model('my_model.h5') s = [] for i in range(times): print(i) pygame.event.pump() failed = False lead_x = 70 lead_y = 70 snake = NeuralNetwork_Snake(showScreen, screenx, screeny, snakeImg, lead_x, lead_y) apple = Apple(showScreen, screenx, screeny, b_size, appleImg, snake.snake_list) while not failed: apple_x, apple_y = apple.apple_pos() snake.update_snake_list(apple_x, apple_y) if snake.is_alive() is False: failed = True s.append(snake.snake_length) showScreen.fill(white) if snake.eaten is True: apple.update_apple_pos(snake.snake_list) apple_x, apple_y = apple.apple_pos() allPredictions = {} for act in moves: state = snake.state([apple_x, apple_y], act) allPredictions[act] = model.predict(np.array(state).reshape(-1, 5))[0][0] act = max(allPredictions, key=allPredictions.get) snake.set_direction(act) apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() time.tick(FPS) print("Avg is: {}".format(sum(s)/len(s)))
def trainSnake(times = 40000): train_data = [] for i in range(times): print(i) pygame.event.pump() failed = False lead_x = 70 lead_y = 70 snake = NeuralNetwork_Snake(showScreen, screenx, screeny, snakeImg, lead_x, lead_y) apple = Apple(showScreen, screenx, screeny, b_size, appleImg, snake.snake_list) apple_x, apple_y = apple.apple_pos() act = "up" state = snake.state([apple_x, apple_y], act) former_distance = snake.distance([apple_x, apple_y]) while not failed: apple_x, apple_y = apple.apple_pos() snake.update_snake_list(apple_x, apple_y) distance = snake.distance([apple_x, apple_y]) score = 0 if snake.is_alive() is False: failed = True score = -1 showScreen.fill(white) if snake.eaten is True: apple.update_apple_pos(snake.snake_list) if snake.eaten is True or distance < former_distance: score = 1 train_data.append([np.array(state), score]) act = random.choice(moves) apple_x, apple_y = apple.apple_pos() former_distance = snake.distance([apple_x, apple_y]) state = snake.state([apple_x, apple_y], act) snake.set_direction(act) apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() time.tick(FPS) print(len(train_data)) f = open("train_data.txt", "wb") pickle.dump(train_data, f) f.close()
def one_game(): pygame.event.pump() game_over = False lead_x = 150 lead_y = 150 snake = Snake(showScreen, screenx, screeny, snakeImg, lead_x, lead_y) apple = Apple(showScreen, screenx, screeny, b_size, appleImg, snake.snake_list) while not game_over: x, y = apple.get_apple_pos() snake.update_snake_list(x, y) if snake.is_alive() is False: game_over = True showScreen.fill(white) if snake.eaten is True: apple.update_apple_pos(snake.snake_list) apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() a_x, a_y = apple.get_apple_pos() s_x, s_y = snake.get_snake_head() visited = snake.snake_list.copy() visited.remove([s_x, s_y]) result = BreathFirstSearch(screenx, screeny, b_size, visited, [a_x, a_y], [s_x, s_y]) next_cell = result[1] x_diff = next_cell[0] - s_x y_diff = next_cell[1] - s_y if x_diff > 0: snake.direction = "right" elif x_diff < 0: snake.direction = "left" elif y_diff > 0: snake.direction = "down" elif y_diff < 0: snake.direction = "up" time.tick(FPS)
def one_game(): pygame.event.pump() game_over = False # snake will start in the middle of the game window lead_x = 70 lead_y = 70 # snake default direction is right snake = Snake(gameDisplay, display_width, display_height, img, lead_x, lead_y) apple = Apple(gameDisplay, display_width, display_height, block_size, img2, snake.snake_list) while not game_over: # based on the direction, we can work out the x, y changes to update the snake x, y = apple.get_apple_pos() snake.update_snake_list(x, y) # check if snake dies if snake.is_alive() is False: game_over = True gameDisplay.fill(white) # if snake eats the apple, make a random new apple if snake.eaten is True: apple.update_apple_pos(snake.snake_list) apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() # this part is using the snake position and apple # position to use the A* method to get the path a_x, a_y = apple.get_apple_pos() s_x, s_y = snake.get_snake_head() visited = snake.snake_list.copy() visited.remove([s_x, s_y]) result = A_star(display_width, display_height, block_size, visited, (a_x, a_y), (s_x, s_y)) # since the path starts from snake position, the second element will # be next move next_cell = result[1] # update the snake position based on the next move position x_diff = next_cell[0] - s_x y_diff = next_cell[1] - s_y if x_diff > 0: snake.direction = "right" elif x_diff < 0: snake.direction = "left" elif y_diff > 0: snake.direction = "down" elif y_diff < 0: snake.direction = "up" clock.tick(FPS)
def training_game(times=10): # s list will store the score of each game s = [] for i in range(times): # print out the game number print(i) pygame.event.pump() game_over = False # Will be the leader of the #1 block of the snake lead_x = 70 lead_y = 70 # snake default direction is right snake = RL_Snake(gameDisplay, display_width, display_height, img, lead_x, lead_y) apple = Apple(gameDisplay, display_width, display_height, block_size, img2, snake.snake_list) a_x, a_y = apple.get_apple_pos() # get the initial state, and action will be "up" starting old_state = snake.get_state([a_x, a_y]) old_action = "up" while not game_over: # based on the direction, we can work out the x, y changes to update the snake a_x, a_y = apple.get_apple_pos() snake.update_snake_list(a_x, a_y) # snake not die or eats the apple, reward will be -10 # this is negative so that it will "encourage" the snake to # move towards to the apple, since that is the only positive award reward = -10 # check if snake dies if snake.is_alive() is False: game_over = True # if snake dies, award is -100 reward = -100 s.append(snake.snake_length - 1) gameDisplay.fill(white) # if snake eats the apple, make a random new apple if snake.eaten is True: apple.update_apple_pos(snake.snake_list) # if snake eats the apple, reward is 500 reward = 500 ############################################# # get he new state and new action, then we can update the Q table state = snake.get_state([a_x, a_y]) action = snake_agent.getA(tuple(state)) snake_agent.updateQ(tuple(old_state), old_action, tuple(state), action, reward) old_action = action # training will take a lot of time, so archive the Q table snake_agent.saveQ() # this part is using the snake position and apple # position to use the Sarsa method to get the action a_x, a_y = apple.get_apple_pos() old_state = snake.get_state([a_x, a_y]) snake.set_direction_by_action(action) ############################################# apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() clock.tick(FPS) # after traning is done, print out the average score print("Average score is: {}".format(sum(s) / len(s)))
def training_game(times=100): # s list will store the score of each game s = [] for i in range(times): # print out the game number print(i) pygame.event.pump() game_over = False # Will be the leader of the #1 block of the snake lead_x = 70 lead_y = 70 # snake default direction is right snake = DQN_Snake(gameDisplay, display_width, display_height, img, lead_x, lead_y) apple = Apple(gameDisplay, display_width, display_height, block_size, img2, snake.snake_list) a_x, a_y = apple.get_apple_pos() # get the initial state, and action will be "up" starting action = "up" old_state = snake.get_state([a_x, a_y]) while not game_over: # based on the direction, we can work out the x, y changes to update the snake a_x, a_y = apple.get_apple_pos() snake.update_snake_list(a_x, a_y) # get the new state state = snake.get_state([a_x, a_y]) # snake not die or eats the apple, reward will be -10 # this is negative so that it will "encourage" the snake to # move towards to the apple, since that is the only positive award reward = -10 # check if snake dies if snake.is_alive() is False: game_over = True # copy the weights from q_model to target_model agent.copy_weights() # if snake dies, award is -100 reward = -100 s.append(snake.snake_length - 1) gameDisplay.fill(white) # if snake eats the apple, make a random new apple if snake.eaten is True: apple.update_apple_pos(snake.snake_list) # if snake eats the apple, reward is 100 reward = 100 ############################################# # store the train_data to the memory agent.store_train_data(np.reshape(old_state, [1, 5]), look_up[action], reward, np.reshape(state, [1, 5]), game_over) # if the memory size is larger than the batch_size, start training if len(agent.memory) > batch_size: agent.train(batch_size) # push state to old state a_x, a_y = apple.get_apple_pos() old_state = snake.get_state([a_x, a_y]) # get the action from the DQN model action = actions[agent.get_action(np.reshape(old_state, [1, 5]))] snake.set_direction_by_action(action) ############################################# apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() clock.tick(FPS) # when the training is finised, sae the model agent.save_model() # after traning is done, print out the average score print("Average score is: {}".format(sum(s) / len(s)))
def training_game(times=40000): # we need to store the training data first # then use the data to train the NN train_data = [] # f = open("train_data.txt", "rb") # train_data = pickle.load(f) # f.close() for i in range(times): # print out the game number print(i) pygame.event.pump() game_over = False # Will be the leader of the #1 block of the snake lead_x = 70 lead_y = 70 # snake default direction is right snake = NN_Snake(gameDisplay, display_width, display_height, img, lead_x, lead_y) apple = Apple(gameDisplay, display_width, display_height, block_size, img2, snake.snake_list) a_x, a_y = apple.get_apple_pos() # get the initial state, and action will be "up" starting action = "up" state = snake.get_state([a_x, a_y], action) old_distance = snake.get_distance([a_x, a_y]) while not game_over: # based on the direction, we can work out the x, y changes to update the snake a_x, a_y = apple.get_apple_pos() snake.update_snake_list(a_x, a_y) # after snake moves, get the new distance distance = snake.get_distance([a_x, a_y]) # default reward is 0 reward = 0 # check if snake dies if snake.is_alive() is False: game_over = True # if snake dies, award is -1 reward = -1 gameDisplay.fill(white) # if snake eats the apple, make a random new apple if snake.eaten is True: apple.update_apple_pos(snake.snake_list) # if snake eats the apple, or moved closer to apple, reward is 1 if snake.eaten is True or distance < old_distance: reward = 1 ############################################# # collect the training data for NN train_data.append([np.array(state), reward]) # this part is using random method to move the snake action = random.choice(actions) a_x, a_y = apple.get_apple_pos() old_distance = snake.get_distance([a_x, a_y]) state = snake.get_state([a_x, a_y], action) snake.set_direction_by_action(action) ############################################# apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() clock.tick(FPS) # store the training data to txt print(len(train_data)) f = open("train_data.txt", "wb") pickle.dump(train_data, f) f.close()
def testing_game(times=10): # load the trained NN model model = load_model('my_model.h5') # s list will store the score of each game s = [] for i in range(times): # print out the game number print(i) pygame.event.pump() game_over = False # Will be the leader of the #1 block of the snake lead_x = 70 lead_y = 70 # snake default direction is right snake = NN_Snake(gameDisplay, display_width, display_height, img, lead_x, lead_y) apple = Apple(gameDisplay, display_width, display_height, block_size, img2, snake.snake_list) a_x, a_y = apple.get_apple_pos() # get the initial state, and action will be "up" starting action = "up" state = snake.get_state([a_x, a_y], action) old_distance = snake.get_distance([a_x, a_y]) while not game_over: # based on the direction, we can work out the x, y changes to update the snake a_x, a_y = apple.get_apple_pos() snake.update_snake_list(a_x, a_y) # check if snake dies if snake.is_alive() is False: game_over = True s.append(snake.snake_length) gameDisplay.fill(white) # if snake eats the apple, make a random new apple if snake.eaten is True: apple.update_apple_pos(snake.snake_list) ############################################# # get the position of the apple a_x, a_y = apple.get_apple_pos() # use NN model to get the action with max Q precictions = {} for action in actions: state = snake.get_state([a_x, a_y], action) precictions[action] = model.predict( np.array(state).reshape(-1, 5))[0][0] action = max(precictions, key=precictions.get) # set the direction of snake using the chosen action snake.set_direction_by_action(action) ############################################# apple.display() snake.eaten = False snake.display() snake.display_score() pygame.display.update() clock.tick(FPS) # after traning is done, print out the average score print("Average score is: {}".format(sum(s) / len(s)))