Exemplo n.º 1
0
def test_training_monitor_logger_is_optional(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(
                make_training_monitor(progress_averaging=1,
                                      performance_sample_size=2)):
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert not caplog.messages
Exemplo n.º 2
0
def test_training_monitor_receives_and_logs_training_message(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=4)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 4)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=2.0)
    ]
Exemplo n.º 3
0
def test_training_monitor_receives_and_aggregates_multiple_messages(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=3)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 1)
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}] * 2)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=1 / 3)
    ]
Exemplo n.º 4
0
def test_training_monitor_can_print_multiple_monitor_messages(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=1)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=1, avg_reward=2.0),
        m.TRAINING_LINE_FORMAT.format(steps=2, avg_reward=-0.5)
    ]
Exemplo n.º 5
0
def test_training_monitor_prints_wall_clock_performance_in_specified_interval(
        caplog):
    with caplog.at_level(logging.INFO):
        with time_scope() as measure:
            with monitor(
                    make_training_monitor(logger=logger,
                                          performance_sample_size=2)) as m:
                time.sleep(0.2)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
                time.sleep(0.2)
                broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    eps = 0.01
    assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed + eps)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed - eps)]
Exemplo n.º 6
0
def test_training_monitor_prints_wall_clock_performance_measure_at_specified_frequency_even_at_uneven_calls(
        caplog):
    with caplog.at_level(logging.INFO):
        with time_scope() as measure:
            with monitor(
                    make_training_monitor(logger=logger,
                                          performance_sample_size=4)) as m:
                time.sleep(0.5)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 3)
                time.sleep(0.3)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2)

    eps = 0.01
    assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed + eps)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed - eps)]
Exemplo n.º 7
0
def test_combined_monitors():
    with monitor(CombinedMonitor([MonitorSpy(), MonitorSpy()])) as m:
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
        broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])
    broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert m.monitors[0].num_messages_received == 2
    assert m.monitors[1].num_messages_received == 2
Exemplo n.º 8
0
def test_training_monitor_ignores_episodic_return_being_not_present(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=2)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])
            broadcast(Message.TRAINING, infos=[{}])
            assert not caplog.messages
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            assert caplog.messages == [
                m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=0.75)
            ]
Exemplo n.º 9
0
def test_training_monitor_prints_training_monitor_messages_at_specified_frequency_even_at_uneven_intervals(
        caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=4)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': +2}] * 3)
            broadcast(Message.TRAINING, infos=[{'episodic_return': -1}] * 2)
            broadcast(Message.TRAINING, infos=[{'episodic_return': +1}] * 3)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=5 / 4),
        m.TRAINING_LINE_FORMAT.format(steps=8, avg_reward=2 / 4)
    ]
Exemplo n.º 10
0
def test_tensorboard_monitor_writes_specified_infos():
    writer = SummaryWriterSpy()
    tb_monitor = TensorboardMonitor(
        writer,
        progress_averaging=2,
        scalars=dict(episodic_return='returns/episodic'))
    with monitor(tb_monitor):
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2)
        broadcast(Message.TRAINING, infos=[{'episodic_return': None}])
        broadcast(Message.TRAINING,
                  infos=[{
                      'episodic_return': -0.5,
                      'other_value': "its a string"
                  }])
    broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])

    assert writer.received_scalars == [('returns/episodic', 2, 0),
                                       ('returns/episodic', 2, 1),
                                       ('returns/episodic', 0.75, 3)]
    assert tb_monitor.step == 4
Exemplo n.º 11
0
def test_training_monitor_captures_rewards():
    with monitor(make_training_monitor()) as m:
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
        broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert m.captured_returns == [2, -0.5]
Exemplo n.º 12
0
 def _do_step(self):
     actions, train_data = self._policy.compute_actions(self._last_obs)
     obs, rewards, dones, infos = self._env.step(actions)
     broadcast(Message.TRAINING, infos=infos)
     self._last_obs = obs
     return train_data, dones, infos, rewards
Exemplo n.º 13
0
 def _reset_terminated_envs(self):
     index_dones = list(self._env.terminated_env_ids)
     for idx_done in index_dones:
         self._last_obs[idx_done] = self._env.reset_env(idx_done)
     broadcast(Message.ENV_TERMINATED, index_dones=index_dones)
Exemplo n.º 14
0
def send_done_message(indices):
    broadcast(Message.ENV_TERMINATED, index_dones=np.array(indices))