def __init__(self): self.env = game() self.env.reset() self.action = -1 done = 0 while True: suc = self.getSuccessors(self.getStartState()) if len(suc) < 1: #print("lose") self.env.setlose() done = True #step = random.randrange(0, 3) #self.env.step(step) else: step = random.choice(suc) self.env.step(step[1]) path = search.dfs(self) for action in path: # do it! render the previous view self.env.render() done = self.env.step(action) # print(env.getFood(), env.getLose(), env.getReward()) if done: break
def __init__(self): self.env = game() self.env.reset() action = -1 while True: for event in pygame.event.get(): if event.type == KEYDOWN: if event.key == K_UP: action = 0 elif event.key == K_DOWN: action = 1 elif event.key == K_LEFT: action = 2 elif event.key == K_RIGHT: action = 3 # do it! render the previous view # action = random.randrange(0, 3) self.env.render() done = self.env.step(action) # print() if done: break
def __init__(self, costFn=lambda x: 1): self.env = game() self.env.reset() self.action = -1 self.costFn = costFn while True: suc = self.getSuccessors(self.getStartState()) if len(suc) < 1: self.env.setlose() done = True # step = random.randrange(0, 3) # self.env.step(step) else: step = random.choice(suc) self.env.step(step[1]) path = search.aStarSearch(self, manhattanHeuristic) for action in path: # do it! render the previous view self.env.render() done = self.env.step(action) # print(env.getFood(), env.getLose(), env.getReward()) if done: break
def __init__(self): self.env = game() self.env.reset() self.action = -1 done = False h = search.HamiltonianCycle(self) path = h.hamicycle() path.append(path[0]) #print(path) directions = [] for cell in path: #print(self.getGrid()[cell]) directions.append(self.getGrid()[cell]) #print(directions) #print(len(directions)) i = 0 while (True): suc = self.getSuccessors(self.getStartState()) # print(directions[i], self.getStartState()) #print(i, len(directions)) if i >= len(directions) - 1: i = 0 if directions[i] == self.getStartState(): #print("hello") for s in suc: #print(s[0], i+1) #print(directions[i+1]) if s[0] == directions[i + 1]: self.env.render() #print(s[1]) if len(self.env.getSnake()) == self.getGridSize() - 1: self.env.setlose() done = self.env.step(s[1]) # if done: # print(done) #print(done) i = i + 1 if done: break
def some_random_games_first(): for episode in range(10): env = game() env.reset() first = True for _ in range(goal_steps): action = random.randrange(0, 3) if first: first = False action = 2 env.render() observation, reward, done, info = env.step(action) a = 0 if done: break
def some_random_games_first(): # Each of these is its own game. for episode in range(10): env = game() env.reset() first = True for _ in range(goal_steps): # action = random.randrange(0, 3) action = random.randrange(0, 3) # action = 2 if first: first = False action = 2 # do it! render the previous view env.render() observation, reward, done, info = env.step(action) a = 0 if done: break
def some_random_games_first(): # Each of these is its own game. for episode in range(10): env = game() env.reset() env.setAddedText('Initial games ' + str(episode + 1).zfill(2)) first = True done = False while not done: # action = random.randrange(0, 3) action = random.randrange(0, 3) # action = 2 if first: first = False action = 2 # do it! render the previous view env.render() observation, reward, done, info = env.step(action) a = 0 if done: break
from Snake import Game as game import pygame from pygame.locals import * env = game() env.reset() action = -1 import random goal_steps = 300 import tflearn from tflearn.layers.core import input_data, dropout, fully_connected from tflearn.layers.estimator import regression from statistics import median, mean from collections import Counter import numpy as np LR = 1e-3 goal_steps = 300 score_requirement = 50 initial_games = 5000 def some_random_games_first(): # Each of these is its own game. for episode in range(10): env = game() env.reset() first = True for _ in range(goal_steps):
def play_alone(): my_training_data = [] # all scores: scores = [] # just the scores that met our threshold: accepted_scores = [] # iterate through however many games we want: for _ in range(games_i_want_to_play): env = game() # reset env to play again env.reset() action = 2 score = 0 # moves specifically from this environment: game_memory = [] # previous observation that we saw prev_observation = [] # for each frame in 200 while True: for event in pygame.event.get(): if event.type == KEYDOWN: if event.key == K_UP or event.key == K_w: if env.snake.movedir == 'right': action = 0 if env.snake.movedir == 'left': action = 1 if env.snake.movedir == 'up': action = 2 if env.snake.movedir == 'down': action = 2 if event.key == K_DOWN or event.key == K_s: if env.snake.movedir == 'right': action = 1 if env.snake.movedir == 'left': action = 0 if env.snake.movedir == 'up': action = 2 if env.snake.movedir == 'down': action = 2 if event.key == K_LEFT or event.key == K_a: if env.snake.movedir == 'right': action = 2 if env.snake.movedir == 'left': action = 2 if env.snake.movedir == 'up': action = 0 if env.snake.movedir == 'down': action = 1 if event.key == K_RIGHT or event.key == K_d: if env.snake.movedir == 'right': action = 2 if env.snake.movedir == 'left': action = 2 if env.snake.movedir == 'up': action = 1 if env.snake.movedir == 'down': action = 0 # do it! env.render() observation, reward, done, info = env.step(action) # notice that the observation is returned FROM the action # so we'll store the previous observation here, pairing # the prev observation to the action we'll take. if len(prev_observation) > 0: game_memory.append([prev_observation, action]) prev_observation = observation score += reward action = 2 if done: break # IF our score is higher than our threshold, we'd like to save # every move we made # NOTE the reinforcement methodology here. # all we're doing is reinforcing the score, we're not trying # to influence the machine in any way as to HOW that score is # reached. if score > score_requirement: accepted_scores.append(score) for data in game_memory: # convert to one-hot (this is the output layer for our neural network) action_sample = [0, 0, 0] action_sample[data[1]] = 1 output = action_sample # saving our training data my_training_data.append([data[0], output]) # save overall scores scores.append(score) # some stats here, to further illustrate the neural network magic! if len(accepted_scores) > 0: print('Average accepted score:', mean(accepted_scores)) print('Score Requirement:', score_requirement) print('Median score for accepted scores:', median(accepted_scores)) print(Counter(accepted_scores)) # score_requirement = mean(accepted_scores) # just in case you wanted to reference later if os.path.exists('./saved_expert_player.npy'): prev_training_data = np.load('saved_expert_player.npy', allow_pickle=True)[0] my_training_data = my_training_data + prev_training_data training_data_save = np.array([my_training_data, score_requirement]) print('New training data length: ', len(training_data_save[0])) np.save('saved_expert_player.npy', training_data_save) return my_training_data