def collect_trajectories(env: gym.Env,
                         agent: Agent,
                         n_games: int = 10) -> np.ndarray:

    for _ in range(n_games):
        state = env.reset()
        done: bool = False
        state_history: list[np.ndarray] = []

        while not done:
            state_history.append(state)
            action = agent.choose_action(state)
            next_state, _, done, _ = env.step(action)
            state = next_state

    return np.vstack(state_history)
    # Init. Agent
    agent = Agent(env=env, n_games=n_games, training=False)
    agent.load_models(data_path)

    for i in tqdm(range(n_games), desc=f'Testing', total=n_games):
        score_history: List[np.float32] = [] * n_games

        for _ in tqdm(range(n_games), desc=f'Testing', total=n_games):
            score = 0
            done = False

            # Initial Reset of Environment
            state = env.reset()

            while not done:
                action = agent.choose_action(state)
                next_state, reward, done, _ = env.step(action)

                agent.memory.add(state, action, reward, next_state, done)

                state = copy.deepcopy(next_state)
                score += reward

            score_history.append(score)

        print(f'Test Analysis:\n'
              f'Mean:{np.mean(score_history)}\n'
              f'Variance:{np.std(score_history)}')

        test_data.append({'Test Score': score_history})
Exemplo n.º 3
0
              env=env,
              batch_size=64,
              layer1_size=256,
              layer2_size=128,
              n_actions=3)

#agent.load_models()
# np.random.seed(1)

score_history = []
for i in range(50):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        print(act)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state
        #env.render()
    score_history.append(score)

    if i % 10 == 0:
        agent.save_models()
        env.render()

    print('episode ', i, 'score %.2f' % score,
          'trailing 25 games avg %.3f' % np.mean(score_history[-25:]))