class Agent: def __init__(self, load_path=''): self.n_games = 0 self.epsilon = 0 self.gamma = 0.9 self.load_path = load_path self.memory = deque(maxlen=MAX_MEMORY) self.model = Net(11, 256, 3) if load_path: self.model.load_state_dict(torch.load(load_path)) self.trainer = Trainer(self.model, LR, self.gamma) def get_state(self, game): # 0 1 2 3 # U L R D # [[1, -10], [0, -10], [0, 10], [1, 10]] head = game.snake_pos near_head = [ [head[0], head[1] - 10], [head[0] - 10, head[1]], [head[0] + 10, head[1]], [head[0], head[1] + 10], ] directions = [ game.direction == 0, game.direction == 1, game.direction == 2, game.direction == 3, ] state = [ (directions[0] and game.is_colision(near_head[0])) or (directions[1] and game.is_colision(near_head[1])) or (directions[2] and game.is_colision(near_head[2])) or (directions[3] and game.is_colision(near_head[3])), (directions[0] and game.is_colision(near_head[1])) or (directions[1] and game.is_colision(near_head[3])) or (directions[2] and game.is_colision(near_head[0])) or (directions[3] and game.is_colision(near_head[2])), (directions[0] and game.is_colision(near_head[2])) or (directions[1] and game.is_colision(near_head[0])) or (directions[2] and game.is_colision(near_head[3])) or (directions[3] and game.is_colision(near_head[1])), game.food_pos[0] < head[0], game.food_pos[0] > head[0], game.food_pos[1] < head[1], game.food_pos[1] > head[1], ] + directions return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): if not self.load_path: self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.num_games = 0 # number of games played self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque( maxlen=MAX_MEMORY) # pops from left if memory limit is exceeded self.model = Linear(11, 256, 3) self.trainer = Trainer(self.model, lr=LEARNING_RATE, gamma=self.gamma) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # initially the agent performs more of random moves i.e exploration self.epsilon = 80 - self.num_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move