def test_model_management(tmpdir): results_path = os.path.join(tmpdir, "results") brain_name = "Mock_brain" final_model_path = os.path.join(results_path, brain_name) test_checkpoint_list = [ { "steps": 1, "file_path": os.path.join(final_model_path, f"{brain_name}-1.nn"), "reward": 1.312, "creation_time": time.time(), }, { "steps": 2, "file_path": os.path.join(final_model_path, f"{brain_name}-2.nn"), "reward": 1.912, "creation_time": time.time(), }, { "steps": 3, "file_path": os.path.join(final_model_path, f"{brain_name}-3.nn"), "reward": 2.312, "creation_time": time.time(), }, ] GlobalTrainingStatus.set_parameter_state(brain_name, StatusType.CHECKPOINTS, test_checkpoint_list) new_checkpoint_4 = NNCheckpoint( 4, os.path.join(final_model_path, f"{brain_name}-4.nn"), 2.678, time.time()) NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4) assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4 new_checkpoint_5 = NNCheckpoint( 5, os.path.join(final_model_path, f"{brain_name}-5.nn"), 3.122, time.time()) NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4) assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4 final_model_path = f"{final_model_path}.nn" final_model_time = time.time() current_step = 6 final_model = NNCheckpoint(current_step, final_model_path, 3.294, final_model_time) NNCheckpointManager.track_final_checkpoint(brain_name, final_model) assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4 check_checkpoints = GlobalTrainingStatus.saved_state[brain_name][ StatusType.CHECKPOINTS.value] assert check_checkpoints is not None final_model = GlobalTrainingStatus.saved_state[ StatusType.FINAL_CHECKPOINT.value] assert final_model is not None
def _checkpoint(self) -> NNCheckpoint: """ Checkpoints the policy associated with this trainer. """ n_policies = len(self.policies.keys()) if n_policies > 1: logger.warning( "Trainer has multiple policies, but default behavior only saves the first." ) policy = list(self.policies.values())[0] model_path = policy.model_path settings = SerializationSettings(model_path, self.brain_name) checkpoint_path = os.path.join(model_path, f"{self.brain_name}-{self.step}") policy.checkpoint(checkpoint_path, settings) new_checkpoint = NNCheckpoint( int(self.step), f"{checkpoint_path}.nn", self._policy_mean_reward(), time.time(), ) NNCheckpointManager.add_checkpoint( self.brain_name, new_checkpoint, self.trainer_settings.keep_checkpoints) return new_checkpoint
def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary): trainer = create_rl_trainer() mock_policy = mock.Mock() mock_policy.model_path = "mock_model_path" trainer.add_policy("TestBrain", mock_policy) trajectory_queue = AgentManagerQueue("testbrain") policy_queue = AgentManagerQueue("testbrain") trainer.subscribe_trajectory_queue(trajectory_queue) trainer.publish_policy_queue(policy_queue) time_horizon = 10 summary_freq = trainer.trainer_settings.summary_freq checkpoint_interval = trainer.trainer_settings.checkpoint_interval trajectory = mb.make_fake_trajectory( length=time_horizon, observation_shapes=[(1, )], max_step_complete=True, action_space=[2], ) # Check that we can turn off the trainer and that the buffer is cleared num_trajectories = 5 for _ in range(0, num_trajectories): trajectory_queue.put(trajectory) trainer.advance() # Check that there is stuff in the policy queue policy_queue.get_nowait() # Check that we have called write_summary the appropriate number of times calls = [ mock.call(step) for step in range(summary_freq, num_trajectories * time_horizon, summary_freq) ] mock_write_summary.assert_has_calls(calls, any_order=True) checkpoint_range = range(checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval) calls = [ mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY) for step in checkpoint_range ] mock_policy.checkpoint.assert_has_calls(calls, any_order=True) add_checkpoint_calls = [ mock.call( trainer.brain_name, NNCheckpoint( step, f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn", None, mock.ANY, ), trainer.trainer_settings.keep_checkpoints, ) for step in checkpoint_range ] mock_add_checkpoint.assert_has_calls(add_checkpoint_calls)
def _checkpoint(self) -> NNCheckpoint: """ Checkpoints the policy associated with this trainer. """ n_policies = len(self.policies.keys()) if n_policies > 1: logger.warning( "Trainer has multiple policies, but default behavior only saves the first." ) checkpoint_path = self.saver.save_checkpoint(self.brain_name, self.step) new_checkpoint = NNCheckpoint( int(self.step), f"{checkpoint_path}.nn", self._policy_mean_reward(), time.time(), ) NNCheckpointManager.add_checkpoint( self.brain_name, new_checkpoint, self.trainer_settings.keep_checkpoints) return new_checkpoint
def _checkpoint(self) -> NNCheckpoint: """ Checkpoints the policy associated with this trainer. """ n_policies = len(self.policies.keys()) if n_policies > 1: logger.warning( "Trainer has multiple policies, but default behavior only saves the first." ) checkpoint_path = self.model_saver.save_checkpoint( self.brain_name, self.step) export_ext = "nn" if self.framework == FrameworkType.TENSORFLOW else "onnx" new_checkpoint = NNCheckpoint( int(self.step), f"{checkpoint_path}.{export_ext}", self._policy_mean_reward(), time.time(), ) NNCheckpointManager.add_checkpoint( self.brain_name, new_checkpoint, self.trainer_settings.keep_checkpoints) return new_checkpoint