def __init__(self): self.epsilion = 0.999 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, LR, self.gamma) self.epsilion_decay_value = 0.998
def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(4, 256, 4) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.memory = deque(maxlen=MAX_MEM) self.n_games: int = 0 self.epsilon = 0 self.gamma = 0.9 self.model = Q_Net(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.num_games = 0 self.epsilon = 0 # to control the randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # pop left self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.number_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.8 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = LinearQNet(11, 256, 3) self.trainer = QTrainer(self.model, learning_rate=LR, gamma=self.gamma)
def __init__(self): self.n_games = 0 self.epsilon = 0 #reandonmess self.gamma = 0.9 #discount rate self.model = Linear_Qnet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.memory = deque(maxlen=MAX_MEMEORY)
def __init__(self): self.n_games = 0 self.epsilon = 0 #randomness self.gamma = 0.9 #discount rate self.memory = deque(maxlen=MAX_MEMORY) #popleft() self.model = Linear_QNet(11, 256, 3) #input_lauer=11,hidden:256 ,output:3 self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self, agent_cfg) -> None: self.n_games = 0 self.agent_cfg = agent_cfg self.epsilon = agent_cfg.epsilon # randomness self.random_until = agent_cfg.random_until self.memory = deque(maxlen=agent_cfg.max_memory_size) self.model = LinearQNet(agent_cfg.model) self.trainer = QTrainer(self.model, agent_cfg.lr, agent_cfg.gamma)
def __init__(self): self.n_games = 0 self.epsilon = 0.5 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.epsilon_decay_value = (self.epsilon) / (END_EPSILON_DECAYING - START_EPSILON_DECAYING)
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0.5 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.epsilon_decay_value = (self.epsilon) / (END_EPSILON_DECAYING - START_EPSILON_DECAYING) #TO DO def get_state(self, game): drone = game.drone state = [drone.x, drone.y] return np.array(state, dtype=int) # Random Moves: tradeoff exploration / exploitation def get_action(self, state, episode): if END_EPSILON_DECAYING >= episode >= START_EPSILON_DECAYING: self.epsilon -= self.epsilon_decay_value final_move = [0, 0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 3) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move #Storing Memory def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached #TO DO def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) # Updating Q Values def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done)
def __init__(self, use_checkpoint=False): self.no_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) if use_checkpoint: checkpoint = torch.load("./model/model.pth") self.model.load_state_dict(checkpoint) self.model.eval()
def __init__(self): with open('games.txt', 'r') as f: self.n_games = int(f.read()) print(self.n_games) self.epsilon = 0 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) #self.model.load_state_dict(torch.load('model/model.pth')) self.model.eval() self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen = max_memory) self.model = Linear_QNet(11, 256, 3) PATH = './model/model.pth' if os.path.exists(PATH): self.model.load_state_dict(torch.load(PATH)) # self.model.eval() print('Pretrained = True') self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma)
def __init__(self): self.n_games = 0 self.n_state = 14 self.frame_to_read = 1 self.epsilon = 0.4 self.gamma = 0.8 self.memory = deque(maxlen=MAX_MEM) self.states = deque(maxlen=self.frame_to_read) for _ in range(self.frame_to_read): self.states.append([0 for _ in range(self.n_state)]) self.trainer = QTrainer(self.n_state * self.frame_to_read, LR, self.n_state * self.frame_to_read, [256, 256], 3, self.gamma)
def __init__(self, args, model): self.parameters_file = args.parameters_file self.args = args self.parameters = yaml.load(open(self.parameters_file, 'r'), Loader=yaml.FullLoader) self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=self.parameters["max_memory"]) # popleft() self.model = model self.trainer = QTrainer(self.model, lr=self.parameters["lr"], gamma=self.gamma)
def __init__(self): self.numberOfGames = 0 self.epsilon = 0 # controlls randomness self.gamma = 0.9 # discount rate, <1 # will popleft if there is too much in memory self.memory = deque(maxlen=maxMemory) self.model = Linear_QNet(11, 256, 3) if os.path.isfile('./model/model.pth'): model_folder_path = './model/model.pth' self.model.load_state_dict(torch.load(model_folder_path)) self.trainer = QTrainer(self.model, lr=learningRate, gamma=self.gamma)
class Agent: def __init__(self): self.epsilion = 0.999 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, LR, self.gamma) self.epsilion_decay_value = 0.998 def get_state(self, game): # drone = game.drone # [game.drone_x, game.drone_y, game.man_x, game.man_y] state = [game.drone_x, game.drone_y] return np.array(state, dtype=int) def get_action(self, state, episode): self.epsilion *= self.epsilion_decay_value if np.random.random() < self.epsilion: # take random action move = np.random.randint(0, 4) return move else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() return move def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done)
def __init__(self, game, pars=dict()): """ (Agent, Snake, dict()) -> None Initialize everything get everything that is passed from json file to modify attributes and train model """ self.n_games = 0 self.epsilon = pars.get('eps', EPSILON) self.eps = pars.get('eps', EPSILON) self.gamma = pars.get('gamma', GAMMA) # discount rate self.eps_range = pars.get('eps_range', EPS_RANGE) print(self.epsilon, self.eps) self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(len(game.get_state()), pars.get('hidden_size', HIDDEN_SIZE), OUTPUT_SIZE) self.trainer = QTrainer(self.model, lr=pars.get('lr', LR), gamma=self.gamma) self.game = game
class Agent: def __init__(self): self.memory = deque(maxlen=MAX_MEM) self.n_games: int = 0 self.epsilon = 0 self.gamma = 0.9 self.model = Q_Net(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples else: mini_sample = self.memory for state, action, reward, next_state, done in mini_sample: self.trainer.train_step(state, action, reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self, args, model): self.parameters_file = args.parameters_file self.args = args self.parameters = yaml.load(open(self.parameters_file, 'r'), Loader=yaml.FullLoader) self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=self.parameters["max_memory"]) # popleft() self.model = model self.trainer = QTrainer(self.model, lr=self.parameters["lr"], gamma=self.gamma) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger is straight if (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger is right if (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger is left if (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) # converting to 0 or 1 def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached def train_long_memory(self): if len(self.memory) > self.parameters["batch_size"]: mini_sample = random.sample( self.memory, self.parameters["batch_size"]) # list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def predict(self, state): state_tensor = torch.tensor(state, dtype=torch.float) prediction = self.model(state_tensor) # moves depending on the model move = torch.argmax(prediction).item() return move def get_action(self, state): move = 0 final_move = [0, 0, 0] # random moves: tradeoff exploration / exploitation if self.args.use_trained == True: move = self.predict(state) else: self.epsilon = 100 - self.n_games if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) else: move = self.predict(state) final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 #reandonmess self.gamma = 0.9 #discount rate self.model = Linear_Qnet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.memory = deque(maxlen=MAX_MEMEORY) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ #danger (dir_r and game.is_collison(point_r)) or (dir_l and game.is_collison(point_l)) or (dir_u and game.is_collison(point_u)) or (dir_d and game.is_collison(point_d)), #right (dir_u and game.is_collison(point_r)) or (dir_d and game.is_collison(point_l)) or (dir_l and game.is_collison(point_u)) or (dir_r and game.is_collison(point_d)), #left (dir_d and game.is_collison(point_r)) or (dir_u and game.is_collison(point_l)) or (dir_r and game.is_collison(point_u)) or (dir_l and game.is_collison(point_d)), #move dir_l, dir_r, dir_u, dir_d, #food location game.food.x < game.head.x, #left game.food.x > game.head.x, #left game.food.y < game.head.y, #left game.food.y > game.head.y #left ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) #list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): #random moves self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.num_games = 0 self.epsilon = 0 # to control the randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # pop left self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, env): head = env.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = env.snake_direction == Direction.LEFT dir_r = env.snake_direction == Direction.RIGHT dir_u = env.snake_direction == Direction.UP dir_d = env.snake_direction == Direction.DOWN state = [ # Danger straight (dir_r and env.is_collision(point_r)) or (dir_l and env.is_collision(point_l)) or (dir_u and env.is_collision(point_u)) or (dir_d and env.is_collision(point_d)), # Danger right (dir_u and env.is_collision(point_r)) or (dir_d and env.is_collision(point_l)) or (dir_l and env.is_collision(point_u)) or (dir_r and env.is_collision(point_d)), # Danger left (dir_d and env.is_collision(point_r)) or (dir_u and env.is_collision(point_l)) or (dir_r and env.is_collision(point_u)) or (dir_l and env.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location env.food.x < env.head_position.x, # food left env.food.x > env.head_position.x, # food right env.food.y < env.head_position.y, # food down env.food.y > env.head_position.y, # food up ] return np.array(state, dtype=int) def store_data(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached def train_long_memory(self): # grab one thousand samples from the memory if len(self.memory) > BATCH_SIZE: batch_sample = random.sample(self.memory, BATCH_SIZE) else: batch_sample = self.memory states, actions, rewards, next_states, dones = zip(*batch_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: trade-off between exploration and exploitation self.epsilon = 80 - self.num_games move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move_idx = random.randint(0, 2) move[move_idx] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move_idx = torch.argmax(prediction).item() move[move_idx] = 1 return move
class Agent: def __init__(self): self.number_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.8 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = LinearQNet(11, 256, 3) self.trainer = QTrainer(self.model, learning_rate=LR, gamma=self.gamma) def get_state(self, game): head = game.snake[0] point_left = Point(head.x - BLOCK_SIZE, head.y) point_right = Point(head.x + BLOCK_SIZE, head.y) point_up = Point(head.x, head.y - BLOCK_SIZE) point_down = Point(head.x, head.y + BLOCK_SIZE) direction_left = game.direction == Direction.LEFT direction_right = game.direction == Direction.RIGHT direction_up = game.direction == Direction.UP direction_down = game.direction == Direction.DOWN state = [ # Danger straight (direction_right and game.is_collision(point_right)) or (direction_left and game.is_collision(point_left)) or (direction_up and game.is_collision(point_up)) or (direction_down and game.is_collision(point_down)), # Danger right (direction_up and game.is_collision(point_right)) or (direction_down and game.is_collision(point_left)) or (direction_left and game.is_collision(point_up)) or (direction_right and game.is_collision(point_down)), # Danger left (direction_down and game.is_collision(point_right)) or (direction_up and game.is_collision(point_left)) or (direction_right and game.is_collision(point_up)) or (direction_left and game.is_collision(point_down)), # Move direction direction_left, direction_right, direction_up, direction_down, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y ] return np.array(state, dtype=int) def get_action(self, state): # random moves: tradeoff between exploration / exploitation self.epsilon = 80 - self.number_of_games / 10 final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: initial_state = torch.tensor(state, dtype=torch.float) prediction = self.model(initial_state) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move def remember(self, state, action, reward, next_state, game_over): self.memory.append((state, action, reward, next_state, game_over)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: sample = random.sample(self.memory, BATCH_SIZE) else: sample = self.memory states, actions, rewards, next_states, game_overs = zip(*sample) self.trainer.train_step(states, actions, rewards, next_states, game_overs) def train_short_memory(self, state, action, reward, next_state, game_over): self.trainer.train_step(state, action, reward, next_state, game_over)
class Agent: # Razred Agent. Agent je posrednik med modelom ter okoljem (igro). def __init__(self): with open('games.txt', 'r') as f: self.n_games = int(f.read()) print(self.n_games) self.epsilon = 0 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) #self.model.load_state_dict(torch.load('model/model.pth')) self.model.eval() self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) # Inicializacija. Prvo si sposodi shranjene rezultate, nastavi nekaj konstant in si izpododi nevronsko mrežo iz datoteke 'model.pth'. # V primeru, da boste ta program zagnali prvič, spremenite vrstice 25-27 v "self.n_games = 0" in vrstico 33 izbrišite. def get_state(self, game): # Funkcija, s katero agent dobi informacije o okolju. head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN # Definicije spodaj uporabljenih spremenljivk. state = [ # Nevarnost spredaj? (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Nevarnost desno? (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Nevarnost levo? (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Smer kače. dir_l, dir_r, dir_u, dir_d, # Relativni položaj hrane. game.food.x < game.head.x, game.food.x > game.head.x, game.food.y < game.head.y, game.food.y > game.head.y ] return np.array(state, dtype=int) # Vrne podatke agentu. def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory # Funkcija za ponovno učenje. (Po realni igri model ponovi igro še enkrat). states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) # Funkcija za realno-časno učenje. def get_action(self, state): self.epsilon = 500 - self.n_games final_move = [0, 0, 0] if random.randint(0, 500) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self, use_checkpoint=False): self.no_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) if use_checkpoint: checkpoint = torch.load("./model/model.pth") self.model.load_state_dict(checkpoint) self.model.eval() def get_state(self, game): head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # Food left game.food.x > game.head.x, # Food right game.food.y < game.head.y, # Food up game.food.y > game.head.y, # Food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, game_over): self.memory.append((state, action, reward, next_state, game_over)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, game_overs = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, game_overs) def train_short_memory(self, state, action, reward, next_state, game_over): self.trainer.train_step(state, action, reward, next_state, game_over) def get_action(self, state): self.epsilon = 80 - self.no_of_games action = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) action[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() action[move] = 1 return action
class Agent: def __init__(self): self.n_game = 0 self.epsilon = 0 # Randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) # for n,p in self.model.named_parameters(): # print(p.device,'',n) # self.model.to('cuda') # for n,p in self.model.named_parameters(): # print(p.device,'',n) # TODO: model,trainer # state (11 Values) #[ danger straight, danger right, danger left, # # direction left, direction right, # direction up, direction down # # food left,food right, # food up, food down] def get_state(self, game): head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger Straight (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)) or (dir_l and game.is_collision(point_l)) or (dir_r and game.is_collision(point_r)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), #Danger Left (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move Direction dir_l, dir_r, dir_u, dir_d, #Food Location game.food.x < game.head.x, # food is in left game.food.x > game.head.x, # food is in right game.food.y < game.head.y, # food is up game.food.y > game.head.y # food is down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if memory exceed def train_long_memory(self): if (len(self.memory) > BATCH_SIZE): mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff explotation / exploitation self.epsilon = 80 - self.n_game final_move = [0, 0, 0] if (random.randint(0, 200) < self.epsilon): move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float).cuda() prediction = self.model(state0).cuda() # prediction by model move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen = max_memory) self.model = Linear_QNet(11, 256, 3) PATH = './model/model.pth' if os.path.exists(PATH): self.model.load_state_dict(torch.load(PATH)) # self.model.eval() print('Pretrained = True') self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > batch_size: mini_sample = random.sample(self.memory, batch_size) # list of tuples of size = 1000 else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype = torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(4, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, game): head = game.ship.center dir_l = game.ship.moving_left == True dir_r = game.ship.moving_right == True dir_s = game.ship.moving_left == False and game.ship.moving_right == False alienlen10 = len(game.aliens) < 10 alienlen5 = len(game.aliens) < 5 state = [ head, # alienlen10, # alienlen5, dir_l, dir_r, dir_s, # game.ship.rect.left == 0, # game.ship.rect.right == game.ship.screen_rect.right, ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) #for state, action, reward, nexrt_state, done in mini_sample: # self.trainer.train_step(state, action, reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 #randomness self.gamma = 0.9 #discount rate self.memory = deque(maxlen=MAX_MEMORY) #popleft() self.model = Linear_QNet(11, 256, 3) #input_lauer=11,hidden:256 ,output:3 self.model.load_state_dict(torch.load('./optimized_model/model.pth')) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, game): head = game.snake[0] BLOCK_SIZE = 20 #Points to check danger point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ #For straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), #Danger Right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), #Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), #Move direction dir_l, dir_r, dir_u, dir_d, #Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY IS REACHED def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) #list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) #for state, action,reward, next_state, done in mini_sample: # self.trainer.train_step(state, action,reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves : tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint( 0, 200 ) < self.epsilon and False: #This was original ,we made small changes to it #if random.randint(0,200) < 20 and self.n_games<90: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model.forward(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.n_revise = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.statusGame = [] self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached def addStatus(self, Snake2, Score2, food2, frame_iteration, direction, old_record): Snake = [] food = [food2.x, food2.y] for itemSnack2 in Snake2: item = [itemSnack2.x, itemSnack2.y] Snake.append(item) self.statusGame.append( [Snake, Score2, food, frame_iteration, direction, old_record]) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) #for state, action, reward, nexrt_state, done in mini_sample: # self.trainer.train_step(state, action, reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move