def test_eval_preparation(self): dl = DataloaderImages((149, 117), ignore_useless_states=False) st = ModelTrainer( lambda: DeconvModelEfficient(), self.test_src_dir, self.eval_output_path, load_datasets_path=self.test_split_dir, cache_path=None, batch_size=2, train_print_frequency=10, epochs=0, num_workers=4, num_validation_samples=2, num_test_samples=self.num_test_samples, data_processing_function=dl.get_sensordata_and_flowfront, data_gather_function=get_filelist_within_folder_blacklisted, classification_evaluator_function=lambda summary_writer: SensorToFlowfrontEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) st.start_training() dirs = [e for e in self.eval_output_path.iterdir() if e.is_dir()] code_dir = dirs[0] / 'rtm-predictions' slurm_script = dirs[0] / 'run_model_eval.sh' self.assertTrue(os.path.isdir(code_dir)) self.assertTrue(os.path.isfile(slurm_script)) with open(slurm_script) as f: lines = f.read().splitlines() tokens = lines[-1].split() self.assertEqual(dirs[0], Path(tokens[-3])) st.writer.flush() st.writer.close()
def test_training(self): num_epochs = 2 dl = DataloaderImages((149, 117), ignore_useless_states=False) st = ModelTrainer( lambda: DeconvModelEfficient(), self.test_src_dir, self.training_save_path, load_datasets_path=self.test_split_dir, cache_path=None, batch_size=16, train_print_frequency=10, epochs=num_epochs, num_workers=4, num_validation_samples=2, num_test_samples=self.num_test_samples, data_processing_function=dl.get_sensordata_and_flowfront, data_gather_function=get_filelist_within_folder_blacklisted, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=lambda summary_writer: SensorToFlowfrontEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) st.start_training() dirs = [e for e in self.training_save_path.iterdir() if e.is_dir()] with open(dirs[0] / 'output.log') as f: content = f.read() epochs = re.findall('Mean Loss on Eval', content) self.assertEqual(num_epochs, len(epochs)) # Check if steps are growing / if there are doubled steps in the output steps = [ int(re.findall(r'\d+', x)[0]) for x in re.findall(r'Duration of step.+\d:', content) ] self.assertEqual(len(set(steps)), len(steps))
def test_training_load_optimizer(self): dl = DataloaderImages((149, 117), ignore_useless_states=False) st = ModelTrainer( lambda: DeconvModelEfficient(), self.test_src_dir, self.training_save_path, load_datasets_path=self.test_split_dir, cache_path=None, batch_size=16, train_print_frequency=10, epochs=self.expected_num_epochs_during_training, num_workers=4, num_validation_samples=2, num_test_samples=self.num_test_samples, data_processing_function=dl.get_sensordata_and_flowfront, data_gather_function=get_filelist_within_folder_blacklisted, loss_criterion=torch.nn.BCELoss(), optimizer_path=self.checkpoint, classification_evaluator_function=lambda summary_writer: SensorToFlowfrontEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) st.start_training() after = len(st.optimizer.state.keys()) """ Optimizer has now more than 0 states, therefore was loaded """ self.assertGreater(after, 0)
def test_training_ok_notok(self): dls = DataLoaderSensor() model_trainer = ModelTrainer( lambda: ERFH5_PressureSequence_Model(), self.training_data_paths, self.training_save_path, epochs=self.expected_num_epochs_during_training, data_gather_function=dg.get_filelist_within_folder, data_processing_function=dls.sensorgrid_simulationsuccess, num_validation_samples=1, num_test_samples=1, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=lambda summary_writer: BinaryClassificationEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) model_trainer.start_training() dirs = [e for e in self.training_save_path.iterdir() if e.is_dir()] with open(dirs[0] / "output.log") as f: content = f.read() epochs = re.findall("Mean Loss on Eval", content) self.assertTrue(len(epochs) > 0)
def test_save_load_training(self): num_epochs = 2 dl = DataloaderImages((149, 117), ignore_useless_states=False) st = ModelTrainer( lambda: DeconvModelEfficient(), self.test_src_dir, self.training_save_path, load_datasets_path=self.test_split_dir, cache_path=None, batch_size=16, train_print_frequency=10, epochs=num_epochs, num_workers=4, num_validation_samples=2, num_test_samples=self.num_test_samples, data_processing_function=dl.get_sensordata_and_flowfront, data_gather_function=get_filelist_within_folder_blacklisted, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=lambda summary_writer: SensorToFlowfrontEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) st.start_training() num_epochs = 2 dl = DataloaderImages((149, 117), ignore_useless_states=False) st = ModelTrainer( lambda: DeconvModelEfficient(), self.test_src_dir, self.training_save_path, load_datasets_path=self.test_split_dir, cache_path=None, batch_size=16, train_print_frequency=10, epochs=num_epochs, num_workers=4, num_validation_samples=2, num_test_samples=self.num_test_samples, data_processing_function=dl.get_sensordata_and_flowfront, data_gather_function=get_filelist_within_folder_blacklisted, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=lambda summary_writer: SensorToFlowfrontEvaluator(summary_writer=summary_writer), data_root=test_resources.test_src_dir, ) st.start_training()
class TestTrainingDryspotFF(unittest.TestCase): def setUp(self): self.training_save_path = resources.test_training_out_dir self.training_data_paths = [ resources.test_training_src_dir / 'dry_spot_from_ff' ] self.expected_num_epochs_during_training = 1 dlds = DataloaderDryspots(image_size=(143, 111), ignore_useless_states=False) self.dt = ModelTrainer( lambda: DrySpotModel(), data_source_paths=self.training_data_paths, save_path=self.training_save_path, batch_size=10, epochs=self.expected_num_epochs_during_training, num_validation_samples=5, num_test_samples=5, data_gather_function=dg.get_filelist_within_folder, data_processing_function=dlds.get_flowfront_bool_dryspot, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=lambda summary_writer: BinaryClassificationEvaluator(summary_writer=summary_writer, save_path=self.training_save_path, skip_images=True), data_root=resources.test_src_dir, ) def test_training(self): self.dt.start_training() dirs = [e for e in self.training_save_path.iterdir() if e.is_dir()] with open(dirs[0] / "output.log") as f: content = f.read() epochs = re.findall("Mean Loss on Eval", content) self.assertTrue(len(epochs) > 0) def tearDown(self) -> None: logging.shutdown() r = logging.getLogger("") [r.removeHandler(x) for x in r.handlers] shutil.rmtree(self.training_save_path)
dataset_split_path=None, # r.dataset_split, data_root=data_root, cache_path=r.cache_path, batch_size=batch_size, epochs=num_epochs, num_workers=num_workers, num_validation_samples=num_val, num_test_samples=num_test, data_processing_function=dl.get_sensor_bool_dryspot_runlevel, data_gather_function=get_filelist_within_folder_blacklisted, loss_criterion=torch.nn.BCELoss(), optimizer_function=lambda params: torch.optim.AdamW(params, lr=lr), classification_evaluator_function=lambda: BinaryClassificationEvaluator(skip_images=not create_data_plots, max_epochs=num_epochs, data_loader=dl), # lr_scheduler_function=lambda optim: ExponentialLR(optim, 0.5), dummy_epoch=False, caching_torch=use_cache, run_name=run_name, save_in_mlflow_directly=True) if not args.run_eval: m.start_training() else: m.inference_on_test_set( Path(args.eval), Path(args.checkpoint_path), lambda: BinaryClassificationEvaluator(save_path=Path(args.eval) / "eval_on_test_set", skip_images=False))
import torch import Resources.training as r from Models.erfh5_pressuresequence_CRNN import ERFH5_PressureSequence_Model from Pipeline import data_loader_sensor as dls, data_gather as dg from Trainer.ModelTrainer import ModelTrainer from Trainer.evaluation import BinaryClassificationEvaluator if __name__ == "__main__": data_source_paths = [r.data_root / "2019-07-24_16-32-40_5000p"] save_path = r.save_path cache_path = r.cache_path trainer = ModelTrainer( lambda: ERFH5_PressureSequence_Model(), data_source_paths, save_path, None, epochs=2, data_gather_function=dg.get_filelist_within_folder, data_processing_function=dls.sensorgrid_simulationsuccess, loss_criterion=torch.nn.BCELoss(), classification_evaluator_function=BinaryClassificationEvaluator(), ) trainer.start_training() print("training finished")