def run(params): """ Run the DQN algorithm, based on the parameters previously set. """ pygame.init() agent = DQNAgent(params) agent = agent.to(DEVICE) agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate']) counter_games = 0 score_plot = [] counter_plot = [] record = 0 total_score = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if params['display']: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon: final_move = np.eye(3)[randint(0, 2)] else: # predict action based on the old state with torch.no_grad(): state_old_tensor = torch.tensor( state_old.reshape( (1, 11)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) final_move = np.eye(3)[np.argmax( prediction.detach().cpu().numpy()[0])] # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record) pygame.time.wait(params['speed']) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 total_score += game.score print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) mean, stdev = get_mean_stdev(score_plot) if params['train']: model_weights = agent.state_dict() torch.save(model_weights, params["weights_path"]) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return total_score, mean, stdev
def run(params): """ Run the DQN algorithm, based on the parameters previously set. """ pygame.init() agent = DQNAgent(params) agent = agent.to(DEVICE) agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate']) counter_games = 0 score_plot = [] counter_plot = [] record = 0 total_score = 0 while counter_games < params['epoch']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Pierwszy ruch initialize_game(player1, game, food1, agent, params['batch_size']) if params['display']: display(player1, food1, game, record) steps = 0 # Ruchy od ostatniej otrzymanej nagrody while (not game.crash) and (steps < 100): if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon dla losowosci akcji agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # Otrzymaj stary stan state_old = agent.get_state(game, player1, food1) # Wykonuj losowe akcje na podstawie agent.epsilon albo na podstawie starych if random.uniform(0, 1) < agent.epsilon: final_move = np.eye(3)[randint(0,2)] else: # Predykcja na podstawie starych akcji with torch.no_grad(): state_old_tensor = torch.tensor(state_old.reshape((1, 11)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) final_move = np.eye(3)[np.argmax(prediction.detach().cpu().numpy()[0])] # Wykonaj nowy ruch i otrzymaj nowy stan player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # Okresl nagrode dla nowego ruchu reward = agent.set_reward(player1, game.crash) # Kiedy zje, zeruj kroki if reward > 0: steps = 0 if params['train']: # Trenuj pamiec krotka na podstawie nowych ruchow agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # Zapisz pamiec nowych ruchow do pamieci dlugiej agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record) pygame.time.wait(params['speed']) steps+=1 if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 total_score += game.score print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) mean, stdev = get_mean_stdev(score_plot) if params['train']: model_weights = agent.state_dict() torch.save(model_weights, params["weights_path"]) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return total_score, mean, stdev