示例#1
0
def train():
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:
        # Estado anterior
        state_old = agent.get_state(game)

        # Movimiento
        final_move = agent.get_action(state_old)

        # Moverse y obtener nuevo estado
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # Memoria entrenamiento corto (train short)
        agent.train_short_memory(state_old, final_move, reward, state_new, done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            # Memoria entrenamiento largo (train long)
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print('Game', agent.n_games, 'Score', score, 'Record:', record)
示例#2
0
def train(cfg) -> None:
    run = init_logging(cfg)
    single_game_scores = []
    mean_game_scores = []
    smoothing = 15
    assert smoothing > 0
    record = 0
    agent = Agent(cfg.agent)
    if cfg.agent.model.initial_weights is not None:
        artifact = run.use_artifact(cfg.agent.model.initial_weights)
        datadir = artifact.download(root=None)
        state_filename = os.path.join(datadir, 'state.pth')
        agent.model.load(state_filename)
    game = SnakeGameAI(cfg.game)

    for game_idx in range(0, cfg.max_games):
        while True:
            state_old = agent.get_state(game)
            action = agent.get_action(state_old)
            reward, gameover, score = game.play_step(action)
            run.log({
                'game_id': game_idx,
                'reward': reward,
                'step_score': score
            })
            state = agent.get_state(game)
            agent.remember(state_old, action, reward, state, gameover)

            agent.train_short_memory(state_old, action, reward, state,
                                     gameover)

            if gameover:
                run.log({
                    'game_id': game_idx,
                    'game_score': score,
                    'game_time': game.frame_iteration
                })
                single_game_scores.append(score)
                mean_game_scores.append(
                    sum(single_game_scores[-smoothing:]) / smoothing)
                agent.n_games += 1
                game.reset()
                agent.train_long_memory()

                if score > record:
                    record = score
                    model_path = agent.model.save(run.id)
                    fig = plot_scores(single_game_scores, mean_game_scores)
                    fig.savefig(str(model_path / 'training_progress.png'))
                log.info(f'[Game {agent.n_games}] Score: {score} ({record})')
                if cfg.plt_display:
                    plot_scores(single_game_scores, mean_game_scores)
                break
    run.summary.update({
        'high_score': record,
        'mean_lastest_score': np.mean(single_game_scores[-10:])
    })
示例#3
0
def train():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state=state_old,
                                 action=final_move,
                                 reward=reward,
                                 next_state=state_new,
                                 done=done)

        # remember
        agent.remember(state=state_old,
                       action=final_move,
                       reward=reward,
                       next_state=state_new,
                       done=done)

        if done:
            # train long memory, plot results
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()
            print('Game: ', agent.n_games, 'Score: ', score, 'Record: ',
                  record)

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.n_games
            plot_mean_scores.append(mean_score)
            plot(plot_scores, plot_mean_scores)
示例#4
0
def train():
    plot_scores = []
    plot_avg_scores = []
    total_score = 0
    max_score = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:
        # get current state of the game
        state_old = agent.get_state(game)

        # get the move based on the current state
        final_move = agent.get_action(state_old)

        # actually perform the move in the game
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # training the short term memory of the agent
        agent.train_short_memory(state_old, final_move, reward, state_new,
                                 done)

        # remember this iteration
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            # reset the environment for another game
            game.reset()

            # increase game counter
            agent.num_games += 1

            # train the long term memory after a game
            agent.train_long_memory()

            # update the highscore if necessary
            if score > max_score:
                max_score = score
                agent.model.save()

            print('Game', agent.num_games, 'Score', score, 'Highscore',
                  max_score)

            plot_scores.append(score)
            total_score += score
            avg_score = total_score / agent.num_games
            plot_avg_scores.append(avg_score)
            plot(plot_scores, plot_avg_scores)
示例#5
0
def train():
    # plot_scores = []
    # plot_mean_scores = []
    # total_score = 0
    record = 0
    old_record = 0
    num_snake = 3
    agents = []
    snakes = []
    global game

    Threads_training_snake = []
    for i in range(num_snake):
        snake = Snake()
        snakes.append(snake)
    game = SnakeGameAI(num_snake=num_snake, Snake=snakes)
    for i in range(num_snake):
        agent = Agent()
        agents.append(agent)
        thread = threading.Thread(target=trainEachSnake, args=(
            agent,
            i,
        ))
        thread.start()

        Threads_training_snake.append(thread)
示例#6
0
文件: agent.py 项目: shnippi/SnakeQ
def train():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:  # the good old while true
        # get old state
        state_old = agent.get_state(game)

        # get move prediction
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory with the information we just got by playing A in state S and getting reward R and ending
        # up in S' (new state)
        agent.train_short_memory(state_old, final_move, reward, state_new,
                                 done)

        # remember for after epoch learning
        agent.remember(state_old, final_move, reward, state_new, done)

        # after every epoch
        if done:
            # train long memory, plot result
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print('Game', agent.n_games, 'Score', score, 'Record:', record)

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.n_games
            plot_mean_scores.append(mean_score)
            plot(plot_scores, plot_mean_scores)
示例#7
0
def train():
  plot_scores = []
  plot_mean_scores = []
  total_score = 0
  highscore = 0
  agent = Agent()
  game = SnakeGameAI()

  while True:
    # Get old state
    state_old = agent.get_state(game)

    # Get move
    final_move = agent.get_action(state_old)

    # Perform move and get new state
    reward, done, score = game.play_step(final_move)
    state_new = agent.get_state(game)

    # Train short memory
    agent.train_short_memory(state_old, final_move, reward, state_new, done)

    # Remember
    agent.remember(state_old, final_move, reward, state_new, done)

    if done:
      # Train long memory
      game.reset()
      agent.n_games += 1
      agent.train_long_memory()

      if score > highscore:
        highscore = score
        agent.model.save()

      print('Game', agent.n_games, 'Score', score, 'Highscore:', highscore)

      plot_scores.append(score)
      total_score += score
      mean_score = total_score / agent.n_games
      plot_mean_scores.append(mean_score)
      plot(plot_scores, plot_mean_scores)
示例#8
0
def train():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    best_score = 0
    agent = Agent(use_checkpoint=True)
    game = SnakeGameAI()

    while True:
        # get current state
        state_curr = agent.get_state(game)

        # get action
        action = agent.get_action(state_curr)

        # perform action & get new state
        reward, game_over, score = game.play_step(action)
        state_new = agent.get_state(game)

        agent.train_short_memory(state_curr, action, reward, state_new,
                                 game_over)

        agent.remember(state_curr, action, reward, state_new, game_over)

        if game_over:
            game.reset()
            agent.no_of_games += 1
            agent.train_long_memory()

            if score > best_score:
                best_score = score
                agent.model.save()

            print("Game", agent.no_of_games, "Score", score, "Best score",
                  best_score)

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.no_of_games
            plot_mean_scores.append(mean_score)
            plot(plot_scores, plot_mean_scores)
示例#9
0
def train():
    # data to plot
    plotScores = []
    plotMeanScores = []
    totalScore = 0
    record = 0
    agent = Agent()
    game = SnakeGameAI()

    while True:
        stateOld = agent.getState(game)

        move = agent.getAction(stateOld)

        reward, game_over, score = game.play_step(move)

        stateNew = agent.getState(game)

        agent.trainShortMemory(stateOld, move, reward, stateNew, game_over)

        agent.remember(stateOld, move, reward, stateNew, game_over)

        if game_over:
            # train long memory, plot results, reset the game
            game.reset()

            agent.numberOfGames += 1
            agent.trainLongMemory()

            if score > record:
                record = score
                agent.model.save()

            print("Game", agent.numberOfGames, 'Score', score, 'Record',
                  record)

            plotScores.append(score)
            totalScore += score
            mean_score = totalScore / agent.numberOfGames
            plotMeanScores.append(mean_score)
            plot(plotScores, plotMeanScores)