Exemplo n.º 1
0
def test_gym_task_reset(mock_gym, fix_env):
    # Assign
    mock_gym.make.return_value = fix_env
    task = GymTask("example")

    # Act
    out = task.reset()

    # Assert
    assert fix_env.reset.called_once()
    assert len(out) > 0
Exemplo n.º 2
0

def state_transform(state):
    """
    Simple cropping of the top and bottom edge and converting to blackwhite scale.
    """
    return (state[40:-10].sum(-1) > 0)[None, ...]


def agent_state_tranform(state):
    return state


env_name = 'Breakout-v0'
task = GymTask(env_name, state_transform=state_transform)
state_size = np.array(task.reset()).shape
writer = SummaryWriter()

config = {
    "update_freq":
    10,
    "batch_size":
    100,
    "warm_up":
    100,
    "lr":
    1e-4,
    "network_fn":
    lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)),
    "state_transform":
    agent_state_tranform,
    "gamma":
    0.99,
    "lr":
    1e-3,
    "update_freq":
    150,
    "batch_size":
    400,
    "buffer_size":
    int(5e3),
    "device":
    "cuda",
}

for _ in range(prev_states):
    task.reset()

agent = DQNAgent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent, data_logger=data_logger)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(
    reward_goal=1000,
    max_episodes=20000,
    log_every=1,
    eps_start=0.9,
    gif_every_episodes=200,
    force_new=True,
)
# env_runner.interact_episode(render=True)
data_logger.close()