def test_add_custom_scalars(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) writer.add_custom_scalars = MagicMock() with summary_writer_context(writer): SummaryWriterContext.add_custom_scalars_multilinechart( ["a", "b"], category="cat", title="title") with self.assertRaisesRegex( AssertionError, "Title \\(title\\) is already in category \\(cat\\)"): SummaryWriterContext.add_custom_scalars_multilinechart( ["c", "d"], category="cat", title="title") SummaryWriterContext.add_custom_scalars_multilinechart( ["e", "f"], category="cat", title="title2") SummaryWriterContext.add_custom_scalars_multilinechart( ["g", "h"], category="cat2", title="title") SummaryWriterContext.add_custom_scalars(writer) writer.add_custom_scalars.assert_called_once_with({ "cat": { "title": ["Multiline", ["a", "b"]], "title2": ["Multiline", ["e", "f"]], }, "cat2": { "title": ["Multiline", ["g", "h"]] }, })
def test_swallowing_exception(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) writer.add_scalar = MagicMock( side_effect=NotImplementedError("test")) writer.exceptions_to_ignore = (NotImplementedError, KeyError) with summary_writer_context(writer): SummaryWriterContext.add_scalar("test", torch.ones(1))
def test_writing_stack(self): with TemporaryDirectory() as tmp_dir1, TemporaryDirectory( ) as tmp_dir2: writer1 = SummaryWriter(tmp_dir1) writer1.add_scalar = MagicMock() writer2 = SummaryWriter(tmp_dir2) writer2.add_scalar = MagicMock() with summary_writer_context(writer1): with summary_writer_context(writer2): SummaryWriterContext.add_scalar("test2", torch.ones(1)) SummaryWriterContext.add_scalar("test1", torch.zeros(1)) writer1.add_scalar.assert_called_once_with("test1", torch.zeros(1), global_step=0) writer2.add_scalar.assert_called_once_with("test2", torch.ones(1), global_step=0)
def run_test_offline( env_name: str, model: ModelManager__Union, replay_memory_size: int, num_batches_per_epoch: int, num_train_epochs: int, passing_score_bar: float, num_eval_episodes: int, minibatch_size: int, use_gpu: bool, ): env = Gym(env_name=env_name) env.seed(SEED) env.action_space.seed(SEED) normalization = build_normalizer(env) logger.info(f"Normalization is: \n{pprint.pformat(normalization)}") manager = model.value trainer = manager.initialize_trainer( use_gpu=use_gpu, reward_options=RewardOptions(), normalization_data_map=normalization, ) # first fill the replay buffer to burn_in replay_buffer = ReplayBuffer(replay_capacity=replay_memory_size, batch_size=minibatch_size) # always fill full RB random_policy = make_random_policy_for_env(env) agent = Agent.create_for_env(env, policy=random_policy) fill_replay_buffer( env=env, replay_buffer=replay_buffer, desired_size=replay_memory_size, agent=agent, ) device = torch.device("cuda") if use_gpu else None # pyre-fixme[6]: Expected `device` for 2nd param but got `Optional[torch.device]`. trainer_preprocessor = make_replay_buffer_trainer_preprocessor( trainer, device, env) writer = SummaryWriter() with summary_writer_context(writer): for epoch in range(num_train_epochs): logger.info(f"Evaluating before epoch {epoch}: ") eval_rewards = evaluate_cem(env, manager, 1) for _ in tqdm(range(num_batches_per_epoch)): train_batch = replay_buffer.sample_transition_batch() preprocessed_batch = trainer_preprocessor(train_batch) trainer.train(preprocessed_batch) logger.info(f"Evaluating after training for {num_train_epochs} epochs: ") eval_rewards = evaluate_cem(env, manager, num_eval_episodes) mean_rewards = np.mean(eval_rewards) assert (mean_rewards >= passing_score_bar ), f"{mean_rewards} doesn't pass the bar {passing_score_bar}."
def test_not_swallowing_exception(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) writer.add_scalar = MagicMock( side_effect=NotImplementedError("test")) with self.assertRaisesRegex( NotImplementedError, "test"), summary_writer_context(writer): SummaryWriterContext.add_scalar("test", torch.ones(1))
def test_writing(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) writer.add_scalar = MagicMock() with summary_writer_context(writer): SummaryWriterContext.add_scalar("test", torch.ones(1)) writer.add_scalar.assert_called_once_with("test", torch.ones(1), global_step=0)
def test_global_step(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) writer.add_scalar = MagicMock() with summary_writer_context(writer): SummaryWriterContext.add_scalar("test", torch.ones(1)) SummaryWriterContext.increase_global_step() SummaryWriterContext.add_scalar("test", torch.zeros(1)) writer.add_scalar.assert_has_calls([ call("test", torch.ones(1), global_step=0), call("test", torch.zeros(1), global_step=1), ]) self.assertEqual(2, len(writer.add_scalar.mock_calls))
def train_policy( env: EnvWrapper, training_policy: Policy, num_train_episodes: int, post_step: Optional[PostStep] = None, post_episode: Optional[PostEpisode] = None, use_gpu: bool = False, ) -> np.ndarray: device = torch.device("cuda") if use_gpu else torch.device("cpu") agent = Agent.create_for_env( env, policy=training_policy, post_transition_callback=post_step, post_episode_callback=post_episode, device=device, ) running_reward = 0 writer = SummaryWriter() with summary_writer_context(writer): train_rewards = [] with trange(num_train_episodes, unit=" epoch") as t: for i in t: # Note: run_episode also performs a training step for the agent, if specified in post_step trajectory = run_episode(env=env, agent=agent, mdp_id=i, max_steps=200) ep_reward = trajectory.calculate_cumulative_reward() train_rewards.append(ep_reward) running_reward *= REWARD_DECAY running_reward += (1 - REWARD_DECAY) * ep_reward t.set_postfix(reward=running_reward) logger.info("============Train rewards=============") logger.info(train_rewards) logger.info( f"average: {np.mean(train_rewards)};\tmax: {np.max(train_rewards)}") return np.array(train_rewards)
def train_workflow( model_manager: ModelManager, train_dataset: Optional[Dataset], eval_dataset: Optional[Dataset], *, num_epochs: int, use_gpu: bool, named_model_ids: ModuleNameToEntityId, child_workflow_id: int, setup_data: Optional[Dict[str, bytes]] = None, normalization_data_map: Optional[Dict[str, NormalizationData]] = None, reward_options: Optional[RewardOptions] = None, reader_options: Optional[ReaderOptions] = None, resource_options: Optional[ResourceOptions] = None, warmstart_path: Optional[str] = None, ) -> RLTrainingOutput: writer = SummaryWriter() logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if setup_data is not None: data_module = model_manager.get_data_module( setup_data=setup_data, reward_options=reward_options, reader_options=reader_options, resource_options=resource_options, ) assert data_module is not None data_module.setup() else: data_module = None if normalization_data_map is None: assert data_module is not None normalization_data_map = data_module.get_normalization_data_map() warmstart_input_path = warmstart_path or None trainer_module = model_manager.build_trainer( use_gpu=use_gpu, reward_options=reward_options, normalization_data_map=normalization_data_map, ) if not reader_options: reader_options = ReaderOptions() if not resource_options: resource_options = ResourceOptions() with summary_writer_context(writer): train_output, lightning_trainer = model_manager.train( trainer_module, train_dataset, eval_dataset, None, data_module, num_epochs, reader_options, resource_options, checkpoint_path=warmstart_input_path, ) output_paths = {} for module_name, serving_module in model_manager.build_serving_modules( trainer_module, normalization_data_map).items(): torchscript_output_path = f"{model_manager.__class__.__name__}_{module_name}_{round(time.time())}.torchscript" torch.jit.save(serving_module, torchscript_output_path) logger.info(f"Saved {module_name} to {torchscript_output_path}") output_paths[module_name] = torchscript_output_path return dataclasses.replace(train_output, output_paths=output_paths)
def test_swallowing_histogram_value_error(self): with TemporaryDirectory() as tmp_dir: writer = SummaryWriter(tmp_dir) with summary_writer_context(writer): SummaryWriterContext.add_histogram("bad_histogram", torch.ones(100, 1))
def test_with_none(self): with summary_writer_context(None): self.assertIsNone( SummaryWriterContext.add_scalar("test", torch.ones(1)))