def train(): total_score = 0 record = 0 agent = Agent() game = SnakeGameAI() while True: # Estado anterior state_old = agent.get_state(game) # Movimiento final_move = agent.get_action(state_old) # Moverse y obtener nuevo estado reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # Memoria entrenamiento corto (train short) agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) if done: # Memoria entrenamiento largo (train long) game.reset() agent.n_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print('Game', agent.n_games, 'Score', score, 'Record:', record)
def train(cfg) -> None: run = init_logging(cfg) single_game_scores = [] mean_game_scores = [] smoothing = 15 assert smoothing > 0 record = 0 agent = Agent(cfg.agent) if cfg.agent.model.initial_weights is not None: artifact = run.use_artifact(cfg.agent.model.initial_weights) datadir = artifact.download(root=None) state_filename = os.path.join(datadir, 'state.pth') agent.model.load(state_filename) game = SnakeGameAI(cfg.game) for game_idx in range(0, cfg.max_games): while True: state_old = agent.get_state(game) action = agent.get_action(state_old) reward, gameover, score = game.play_step(action) run.log({ 'game_id': game_idx, 'reward': reward, 'step_score': score }) state = agent.get_state(game) agent.remember(state_old, action, reward, state, gameover) agent.train_short_memory(state_old, action, reward, state, gameover) if gameover: run.log({ 'game_id': game_idx, 'game_score': score, 'game_time': game.frame_iteration }) single_game_scores.append(score) mean_game_scores.append( sum(single_game_scores[-smoothing:]) / smoothing) agent.n_games += 1 game.reset() agent.train_long_memory() if score > record: record = score model_path = agent.model.save(run.id) fig = plot_scores(single_game_scores, mean_game_scores) fig.savefig(str(model_path / 'training_progress.png')) log.info(f'[Game {agent.n_games}] Score: {score} ({record})') if cfg.plt_display: plot_scores(single_game_scores, mean_game_scores) break run.summary.update({ 'high_score': record, 'mean_lastest_score': np.mean(single_game_scores[-10:]) })
def train(): plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGameAI() while True: # get old state state_old = agent.get_state(game) # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory agent.train_short_memory(state=state_old, action=final_move, reward=reward, next_state=state_new, done=done) # remember agent.remember(state=state_old, action=final_move, reward=reward, next_state=state_new, done=done) if done: # train long memory, plot results game.reset() agent.n_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print('Game: ', agent.n_games, 'Score: ', score, 'Record: ', record) plot_scores.append(score) total_score += score mean_score = total_score / agent.n_games plot_mean_scores.append(mean_score) plot(plot_scores, plot_mean_scores)
def train(): plot_scores = [] plot_avg_scores = [] total_score = 0 max_score = 0 agent = Agent() game = SnakeGameAI() while True: # get current state of the game state_old = agent.get_state(game) # get the move based on the current state final_move = agent.get_action(state_old) # actually perform the move in the game reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # training the short term memory of the agent agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember this iteration agent.remember(state_old, final_move, reward, state_new, done) if done: # reset the environment for another game game.reset() # increase game counter agent.num_games += 1 # train the long term memory after a game agent.train_long_memory() # update the highscore if necessary if score > max_score: max_score = score agent.model.save() print('Game', agent.num_games, 'Score', score, 'Highscore', max_score) plot_scores.append(score) total_score += score avg_score = total_score / agent.num_games plot_avg_scores.append(avg_score) plot(plot_scores, plot_avg_scores)
def train(): plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGameAI() while True: # the good old while true # get old state state_old = agent.get_state(game) # get move prediction final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory with the information we just got by playing A in state S and getting reward R and ending # up in S' (new state) agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember for after epoch learning agent.remember(state_old, final_move, reward, state_new, done) # after every epoch if done: # train long memory, plot result game.reset() agent.n_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print('Game', agent.n_games, 'Score', score, 'Record:', record) plot_scores.append(score) total_score += score mean_score = total_score / agent.n_games plot_mean_scores.append(mean_score) plot(plot_scores, plot_mean_scores)
def train(): plot_scores = [] plot_mean_scores = [] total_score = 0 highscore = 0 agent = Agent() game = SnakeGameAI() while True: # Get old state state_old = agent.get_state(game) # Get move final_move = agent.get_action(state_old) # Perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # Train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # Remember agent.remember(state_old, final_move, reward, state_new, done) if done: # Train long memory game.reset() agent.n_games += 1 agent.train_long_memory() if score > highscore: highscore = score agent.model.save() print('Game', agent.n_games, 'Score', score, 'Highscore:', highscore) plot_scores.append(score) total_score += score mean_score = total_score / agent.n_games plot_mean_scores.append(mean_score) plot(plot_scores, plot_mean_scores)
def train(): plot_scores = [] plot_mean_scores = [] total_score = 0 best_score = 0 agent = Agent(use_checkpoint=True) game = SnakeGameAI() while True: # get current state state_curr = agent.get_state(game) # get action action = agent.get_action(state_curr) # perform action & get new state reward, game_over, score = game.play_step(action) state_new = agent.get_state(game) agent.train_short_memory(state_curr, action, reward, state_new, game_over) agent.remember(state_curr, action, reward, state_new, game_over) if game_over: game.reset() agent.no_of_games += 1 agent.train_long_memory() if score > best_score: best_score = score agent.model.save() print("Game", agent.no_of_games, "Score", score, "Best score", best_score) plot_scores.append(score) total_score += score mean_score = total_score / agent.no_of_games plot_mean_scores.append(mean_score) plot(plot_scores, plot_mean_scores)
def train(): # data to plot plotScores = [] plotMeanScores = [] totalScore = 0 record = 0 agent = Agent() game = SnakeGameAI() while True: stateOld = agent.getState(game) move = agent.getAction(stateOld) reward, game_over, score = game.play_step(move) stateNew = agent.getState(game) agent.trainShortMemory(stateOld, move, reward, stateNew, game_over) agent.remember(stateOld, move, reward, stateNew, game_over) if game_over: # train long memory, plot results, reset the game game.reset() agent.numberOfGames += 1 agent.trainLongMemory() if score > record: record = score agent.model.save() print("Game", agent.numberOfGames, 'Score', score, 'Record', record) plotScores.append(score) totalScore += score mean_score = totalScore / agent.numberOfGames plotMeanScores.append(mean_score) plot(plotScores, plotMeanScores)