def test_training_monitor_logger_is_optional(caplog): with caplog.at_level(logging.INFO): with monitor( make_training_monitor(progress_averaging=1, performance_sample_size=2)): broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) assert not caplog.messages
def test_training_monitor_receives_and_logs_training_message(caplog): with caplog.at_level(logging.INFO): with monitor(make_training_monitor(logger=logger, progress_averaging=4)) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 4) assert caplog.messages == [ m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=2.0) ]
def test_training_monitor_receives_and_aggregates_multiple_messages(caplog): with caplog.at_level(logging.INFO): with monitor(make_training_monitor(logger=logger, progress_averaging=3)) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 1) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}] * 2) assert caplog.messages == [ m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=1 / 3) ]
def test_training_monitor_can_print_multiple_monitor_messages(caplog): with caplog.at_level(logging.INFO): with monitor(make_training_monitor(logger=logger, progress_averaging=1)) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) assert caplog.messages == [ m.TRAINING_LINE_FORMAT.format(steps=1, avg_reward=2.0), m.TRAINING_LINE_FORMAT.format(steps=2, avg_reward=-0.5) ]
def test_training_monitor_prints_wall_clock_performance_in_specified_interval( caplog): with caplog.at_level(logging.INFO): with time_scope() as measure: with monitor( make_training_monitor(logger=logger, performance_sample_size=2)) as m: time.sleep(0.2) broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) time.sleep(0.2) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) eps = 0.01 assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed)] or \ caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed + eps)] or \ caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed - eps)]
def test_training_monitor_prints_wall_clock_performance_measure_at_specified_frequency_even_at_uneven_calls( caplog): with caplog.at_level(logging.INFO): with time_scope() as measure: with monitor( make_training_monitor(logger=logger, performance_sample_size=4)) as m: time.sleep(0.5) broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 3) time.sleep(0.3) broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2) eps = 0.01 assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed)] or \ caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed + eps)] or \ caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed - eps)]
def test_combined_monitors(): with monitor(CombinedMonitor([MonitorSpy(), MonitorSpy()])) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) assert m.monitors[0].num_messages_received == 2 assert m.monitors[1].num_messages_received == 2
def test_training_monitor_ignores_episodic_return_being_not_present(caplog): with caplog.at_level(logging.INFO): with monitor(make_training_monitor(logger=logger, progress_averaging=2)) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) broadcast(Message.TRAINING, infos=[{}]) assert not caplog.messages broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) assert caplog.messages == [ m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=0.75) ]
def test_training_monitor_prints_training_monitor_messages_at_specified_frequency_even_at_uneven_intervals( caplog): with caplog.at_level(logging.INFO): with monitor(make_training_monitor(logger=logger, progress_averaging=4)) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': +2}] * 3) broadcast(Message.TRAINING, infos=[{'episodic_return': -1}] * 2) broadcast(Message.TRAINING, infos=[{'episodic_return': +1}] * 3) assert caplog.messages == [ m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=5 / 4), m.TRAINING_LINE_FORMAT.format(steps=8, avg_reward=2 / 4) ]
def test_tensorboard_monitor_writes_specified_infos(): writer = SummaryWriterSpy() tb_monitor = TensorboardMonitor( writer, progress_averaging=2, scalars=dict(episodic_return='returns/episodic')) with monitor(tb_monitor): broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2) broadcast(Message.TRAINING, infos=[{'episodic_return': None}]) broadcast(Message.TRAINING, infos=[{ 'episodic_return': -0.5, 'other_value': "its a string" }]) broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) assert writer.received_scalars == [('returns/episodic', 2, 0), ('returns/episodic', 2, 1), ('returns/episodic', 0.75, 3)] assert tb_monitor.step == 4
def test_training_monitor_captures_rewards(): with monitor(make_training_monitor()) as m: broadcast(Message.TRAINING, infos=[{'episodic_return': 2}]) broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}]) assert m.captured_returns == [2, -0.5]
def _do_step(self): actions, train_data = self._policy.compute_actions(self._last_obs) obs, rewards, dones, infos = self._env.step(actions) broadcast(Message.TRAINING, infos=infos) self._last_obs = obs return train_data, dones, infos, rewards
def _reset_terminated_envs(self): index_dones = list(self._env.terminated_env_ids) for idx_done in index_dones: self._last_obs[idx_done] = self._env.reset_env(idx_done) broadcast(Message.ENV_TERMINATED, index_dones=index_dones)
def send_done_message(indices): broadcast(Message.ENV_TERMINATED, index_dones=np.array(indices))