def test_loss_print(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_handler = "test_logging" key_to_print = "myLoss" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up testing handler logger = logging.getLogger(key_to_handler) logger.setLevel(logging.INFO) logger.addHandler(log_handler) stats_handler = StatsHandler(iteration_log=True, epoch_log=False, name=key_to_handler, tag_name=key_to_print) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() log_handler.close() has_key_word = re.compile(f".*{key_to_print}.*") content_count = 0 for line in output_str.split("\n"): if has_key_word.match(line): content_count += 1 self.assertTrue(content_count > 0)
def test_metrics_print(self): log_stream = StringIO() logging.basicConfig(stream=log_stream, level=logging.INFO) key_to_handler = "test_logging" key_to_print = "testing_metric" # set up engine def _train_func(engine, batch): return torch.tensor(0.0) engine = Engine(_train_func) # set up dummy metric @engine.on(Events.EPOCH_COMPLETED) def _update_metric(engine): current_metric = engine.state.metrics.get(key_to_print, 0.1) engine.state.metrics[key_to_print] = current_metric + 0.1 # set up testing handler stats_handler = StatsHandler(name=key_to_handler) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() grep = re.compile(f".*{key_to_handler}.*") has_key_word = re.compile(f".*{key_to_print}.*") for idx, line in enumerate(output_str.split("\n")): if grep.match(line): if idx in [5, 10]: self.assertTrue(has_key_word.match(line))
def test_default_logger(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_print = "myLoss" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) engine.logger.addHandler(log_handler) # set up testing handler stats_handler = StatsHandler(name=None, tag_name=key_to_print) stats_handler.attach(engine) # leverage `engine.logger` to print info engine.logger.setLevel(logging.INFO) level = logging.root.getEffectiveLevel() logging.basicConfig(level=logging.INFO) engine.run(range(3), max_epochs=2) logging.basicConfig(level=level) # check logging output output_str = log_stream.getvalue() log_handler.close() has_key_word = re.compile(f".*{key_to_print}.*") content_count = 0 for line in output_str.split("\n"): if has_key_word.match(line): content_count += 1 self.assertTrue(content_count > 0)
def test_loss_print(self): log_stream = StringIO() logging.basicConfig(stream=log_stream, level=logging.INFO) key_to_handler = 'test_logging' key_to_print = 'myLoss' # set up engine def _train_func(engine, batch): return torch.tensor(0.0) engine = Engine(_train_func) # set up testing handler stats_handler = StatsHandler(name=key_to_handler, tag_name=key_to_print) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() grep = re.compile('.*{}.*'.format(key_to_handler)) has_key_word = re.compile('.*{}.*'.format(key_to_print)) for idx, line in enumerate(output_str.split('\n')): if grep.match(line): if idx in [1, 2, 3, 6, 7, 8]: self.assertTrue(has_key_word.match(line))
def test_loss_file(self): key_to_handler = "test_logging" key_to_print = "myLoss" with tempfile.TemporaryDirectory() as tempdir: filename = os.path.join(tempdir, "test_loss_stats.log") handler = logging.FileHandler(filename, mode="w") handler.setLevel(logging.INFO) # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up testing handler logger = logging.getLogger(key_to_handler) logger.setLevel(logging.INFO) logger.addHandler(handler) stats_handler = StatsHandler(name=key_to_handler, tag_name=key_to_print) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) handler.close() stats_handler.logger.removeHandler(handler) with open(filename) as f: output_str = f.read() has_key_word = re.compile(f".*{key_to_print}.*") content_count = 0 for line in output_str.split("\n"): if has_key_word.match(line): content_count += 1 self.assertTrue(content_count > 0)
def test_loss_print(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_handler = "test_logging" key_to_print = "myLoss" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up testing handler stats_handler = StatsHandler(name=key_to_handler, tag_name=key_to_print, logger_handler=log_handler) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() log_handler.close() grep = re.compile(f".*{key_to_handler}.*") has_key_word = re.compile(f".*{key_to_print}.*") for idx, line in enumerate(output_str.split("\n")): if grep.match(line): if idx in [1, 2, 3, 6, 7, 8]: self.assertTrue(has_key_word.match(line))
def test_loss_file(self): logging.basicConfig(level=logging.INFO) key_to_handler = "test_logging" key_to_print = "myLoss" with tempfile.TemporaryDirectory() as tempdir: filename = os.path.join(tempdir, "test_loss_stats.log") handler = logging.FileHandler(filename, mode="w") # set up engine def _train_func(engine, batch): return torch.tensor(0.0) engine = Engine(_train_func) # set up testing handler stats_handler = StatsHandler(name=key_to_handler, tag_name=key_to_print, logger_handler=handler) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) handler.stream.close() stats_handler.logger.removeHandler(handler) with open(filename, "r") as f: output_str = f.read() grep = re.compile(f".*{key_to_handler}.*") has_key_word = re.compile(f".*{key_to_print}.*") for idx, line in enumerate(output_str.split("\n")): if grep.match(line): if idx in [1, 2, 3, 6, 7, 8]: self.assertTrue(has_key_word.match(line))
def test_loss_dict(self): log_stream = StringIO() logging.basicConfig(stream=log_stream, level=logging.INFO) key_to_handler = "test_logging" key_to_print = "myLoss1" # set up engine def _train_func(engine, batch): return torch.tensor(0.0) engine = Engine(_train_func) # set up testing handler stats_handler = StatsHandler( name=key_to_handler, output_transform=lambda x: {key_to_print: x}) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() grep = re.compile(f".*{key_to_handler}.*") has_key_word = re.compile(f".*{key_to_print}.*") for idx, line in enumerate(output_str.split("\n")): if grep.match(line): if idx in [1, 2, 3, 6, 7, 8]: self.assertTrue(has_key_word.match(line))
def test_exception(self): # set up engine def _train_func(engine, batch): raise RuntimeError("test exception.") engine = Engine(_train_func) # set up testing handler stats_handler = StatsHandler() stats_handler.attach(engine) with self.assertRaises(RuntimeError): engine.run(range(3), max_epochs=2)
def train_handlers(self, context: Context): handlers: List[Any] = [] # LR Scheduler lr_scheduler = self.lr_scheduler_handler(context) if lr_scheduler: handlers.append(lr_scheduler) if context.local_rank == 0: handlers.extend([ StatsHandler(tag_name="train_loss", output_transform=from_engine(["loss"], first=True)), TensorBoardStatsHandler( log_dir=context.events_dir, tag_name="train_loss", output_transform=from_engine(["loss"], first=True), ), ]) if context.evaluator: logger.info( f"{context.local_rank} - Adding Validation to run every '{self._val_interval}' interval" ) handlers.append( ValidationHandler(self._val_interval, validator=context.evaluator, epoch_level=True)) return handlers
def val_handlers(self, context: Context): val_handlers = [ StatsHandler(output_transform=lambda x: None), TensorBoardStatsHandler(log_dir=context.events_dir, output_transform=lambda x: None), ] return val_handlers if context.local_rank == 0 else None
def test_attributes_print(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_handler = "test_logging" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up dummy metric @engine.on(Events.EPOCH_COMPLETED) def _update_metric(engine): if not hasattr(engine.state, "test1"): engine.state.test1 = 0.1 engine.state.test2 = 0.2 else: engine.state.test1 += 0.1 engine.state.test2 += 0.2 # set up testing handler logger = logging.getLogger(key_to_handler) logger.setLevel(logging.INFO) logger.addHandler(log_handler) stats_handler = StatsHandler( name=key_to_handler, state_attributes=["test1", "test2", "test3"]) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() log_handler.close() has_key_word = re.compile(".*State values.*") content_count = 0 for line in output_str.split("\n"): if has_key_word.match(line): content_count += 1 self.assertTrue(content_count > 0)
def test_metrics_print(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_handler = "test_logging" key_to_print = "testing_metric" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up dummy metric @engine.on(Events.EPOCH_COMPLETED) def _update_metric(engine): current_metric = engine.state.metrics.get(key_to_print, 0.1) engine.state.metrics[key_to_print] = current_metric + 0.1 # set up testing handler logger = logging.getLogger(key_to_handler) logger.setLevel(logging.INFO) logger.addHandler(log_handler) stats_handler = StatsHandler(iteration_log=False, epoch_log=True, name=key_to_handler) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() log_handler.close() has_key_word = re.compile(f".*{key_to_print}.*") content_count = 0 for line in output_str.split("\n"): if has_key_word.match(line): content_count += 1 self.assertTrue(content_count > 0)
def test_attributes_print(self): log_stream = StringIO() log_handler = logging.StreamHandler(log_stream) log_handler.setLevel(logging.INFO) key_to_handler = "test_logging" # set up engine def _train_func(engine, batch): return [torch.tensor(0.0)] engine = Engine(_train_func) # set up dummy metric @engine.on(Events.EPOCH_COMPLETED) def _update_metric(engine): if not hasattr(engine.state, "test1"): engine.state.test1 = 0.1 engine.state.test2 = 0.2 else: engine.state.test1 += 0.1 engine.state.test2 += 0.2 # set up testing handler stats_handler = StatsHandler( name=key_to_handler, state_attributes=["test1", "test2", "test3"], logger_handler=log_handler) stats_handler.attach(engine) engine.run(range(3), max_epochs=2) # check logging output output_str = log_stream.getvalue() log_handler.close() grep = re.compile(f".*{key_to_handler}.*") has_key_word = re.compile(".*State values.*") for idx, line in enumerate(output_str.split("\n")): if grep.match(line) and idx in [5, 10]: self.assertTrue(has_key_word.match(line))
def test_compute(self, data, expected): # Set up handlers handlers = [ # Mark with Ignite Event MarkHandler(Events.STARTED), # Mark with literal MarkHandler("EPOCH_STARTED"), # Mark with literal and providing the message MarkHandler("EPOCH_STARTED", "Start of the epoch"), # Define a range using one prefix (between BATCH_STARTED and BATCH_COMPLETED) RangeHandler("Batch"), # Define a range using a pair of events RangeHandler((Events.STARTED, Events.COMPLETED)), # Define a range using a pair of literals RangeHandler(("GET_BATCH_STARTED", "GET_BATCH_COMPLETED"), msg="Batching!"), # Define a range using a pair of literal and events RangeHandler(("GET_BATCH_STARTED", Events.COMPLETED)), # Define the start of range using literal RangePushHandler("ITERATION_STARTED"), # Define the start of range using event RangePushHandler(Events.ITERATION_STARTED, "Iteration 2"), # Define the start of range using literals and providing message RangePushHandler("EPOCH_STARTED", "Epoch 2"), # Define the end of range using Ignite Event RangePopHandler(Events.ITERATION_COMPLETED), RangePopHandler(Events.EPOCH_COMPLETED), # Define the end of range using literal RangePopHandler("ITERATION_COMPLETED"), # Other handlers StatsHandler(tag_name="train", output_transform=from_engine(["label"], first=True)), ] # Set up an engine engine = SupervisedEvaluator( device=torch.device("cpu:0"), val_data_loader=data, epoch_length=1, network=torch.nn.PReLU(), postprocessing=lambda x: dict(pred=x["pred"] + 1.0), decollate=True, val_handlers=handlers, ) # Run the engine engine.run() # Get the output from the engine output = engine.state.output[0] torch.testing.assert_allclose(output["pred"], expected)
def run(self, date=None) -> str: if date is not None: now = date else: now = datetime.datetime.now() datetime_string = now.strftime('%d/%m/%Y %H:%M:%S') print(f'Training started: {datetime_string}') now = datetime.datetime.now() timedate_info = str(now).split(' ')[0] + '_' + str(now.strftime("%H:%M:%S")).replace(':', '-') training_dir = os.path.join(self.out_dir, 'training') if not os.path.exists(training_dir): os.mkdir(training_dir) self.output_dir = os.path.join(training_dir, self.out_name + '_' + timedate_info) os.mkdir(self.output_dir) self.validator.output_dir = self.output_dir if self.summary_writer is None: self.summary_writer = SummaryWriter(log_dir=self.output_dir) if self.validator.summary_writer is None: self.validator.summary_writer = self.summary_writer handlers = [ MetricLogger(self.output_dir, validator=self.validator), ValidationHandler( validator=self.validator, start=self.validation_epoch, interval=self.validation_interval ), StatsHandler(tag_name="loss", output_transform=lambda x: x["loss"]), TensorBoardStatsHandler( summary_writer=self.summary_writer, tag_name="Loss", output_transform=lambda x: x["loss"] ), ] save_dict = { 'network': self.network, 'optimizer': self.optimizer } if self.lr_scheduler is not None: handlers.insert(0, LrScheduleHandler(lr_scheduler=self.lr_scheduler, print_lr=True)) save_dict['lr_scheduler'] = self.lr_scheduler handlers.append( CheckpointSaver(save_dir=self.output_dir, save_dict=save_dict, save_interval=1, n_saved=1) ) self._register_handlers(handlers) super().run() return self.output_dir
def __init__( self, device: torch.device, test_data_loader: Union[Iterable, DataLoader], network: torch.nn.Module, load_dir: str, out_dir: str, n_classes, non_blocking: bool = False, post_transform: Optional[Transform] = None, amp: bool = False, mode: Union[ForwardMode, str] = ForwardMode.EVAL, ) -> None: self.load_dir = load_dir self.out_dir = out_dir if n_classes > 1: to_onehot = AsDiscrete(to_onehot=True, n_classes=2) else: to_onehot = lambda x: x super().__init__( device, test_data_loader, network, non_blocking=non_blocking, post_transform=post_transform, key_val_metric={ "Test_AUC": ROCAUC(average="micro", output_transform=lambda x: (x["pred"], to_onehot(x["label"]))) }, additional_metrics={ "Test_ACC": Accuracy(output_transform=lambda x: (AsDiscrete( threshold_values=True)(x["pred"]), to_onehot(x["label"]))) }, amp=amp, mode=mode) load_path = glob(os.path.join(self.load_dir, 'network_key_metric*'))[0] handlers = [ StatsHandler(output_transform=lambda x: None), CheckpointLoader(load_path=load_path, load_dict={"network": self.network}), ] self._register_handlers(handlers)
def run(self, global_epoch: int) -> None: if global_epoch == 1: handlers = [ StatsHandler(), TensorBoardStatsHandler( summary_writer=self.summary_writer ), #, output_transform=lambda x: None), CheckpointSaver(save_dir=self.output_dir, save_dict={"network": self.network}, save_key_metric=True), MetricsSaver(save_dir=self.output_dir, metrics=['Valid_AUC', 'Valid_ACC']), self.early_stop_handler, ] self._register_handlers(handlers) return super().run(global_epoch=global_epoch)
def run_training(train_file_list, valid_file_list, config_info): """ Pipeline to train a dynUNet segmentation model in MONAI. It is composed of the following main blocks: * Data Preparation: Extract the filenames and prepare the training/validation processing transforms * Load Data: Load training and validation data to PyTorch DataLoader * Network Preparation: Define the network, loss function, optimiser and learning rate scheduler * MONAI Evaluator: Initialise the dynUNet evaluator, i.e. the class providing utilities to perform validation during training. Attach handlers to save the best model on the validation set. A 2D sliding window approach on the 3D volume is used at evaluation. The mean 3D Dice is used as validation metric. * MONAI Trainer: Initialise the dynUNet trainer, i.e. the class providing utilities to perform the training loop. * Run training: The MONAI trainer is run, performing training and validation during training. Args: train_file_list: .txt or .csv file (with no header) storing two-columns filenames for training: image filename in the first column and segmentation filename in the second column. The two columns should be separated by a comma. See monaifbs/config/mock_train_file_list_for_dynUnet_training.txt for an example of the expected format. valid_file_list: .txt or .csv file (with no header) storing two-columns filenames for validation: image filename in the first column and segmentation filename in the second column. The two columns should be separated by a comma. See monaifbs/config/mock_valid_file_list_for_dynUnet_training.txt for an example of the expected format. config_info: dict, contains configuration parameters for sampling, network and training. See monaifbs/config/monai_dynUnet_training_config.yml for an example of the expected fields. """ """ Read input and configuration parameters """ # print MONAI config information logging.basicConfig(stream=sys.stdout, level=logging.INFO) print_config() # print to log the parameter setups print(yaml.dump(config_info)) # extract network parameters, perform checks/set defaults if not present and print them to log if 'seg_labels' in config_info['training'].keys(): seg_labels = config_info['training']['seg_labels'] else: seg_labels = [1] nr_out_channels = len(seg_labels) print("Considering the following {} labels in the segmentation: {}".format(nr_out_channels, seg_labels)) patch_size = config_info["training"]["inplane_size"] + [1] print("Considering patch size = {}".format(patch_size)) spacing = config_info["training"]["spacing"] print("Bringing all images to spacing = {}".format(spacing)) if 'model_to_load' in config_info['training'].keys() and config_info['training']['model_to_load'] is not None: model_to_load = config_info['training']['model_to_load'] if not os.path.exists(model_to_load): raise FileNotFoundError("Cannot find model: {}".format(model_to_load)) else: print("Loading model from {}".format(model_to_load)) else: model_to_load = None # set up either GPU or CPU usage if torch.cuda.is_available(): print("\n#### GPU INFORMATION ###") print("Using device number: {}, name: {}\n".format(torch.cuda.current_device(), torch.cuda.get_device_name())) current_device = torch.device("cuda:0") else: current_device = torch.device("cpu") print("Using device: {}".format(current_device)) # set determinism if required if 'manual_seed' in config_info['training'].keys() and config_info['training']['manual_seed'] is not None: seed = config_info['training']['manual_seed'] else: seed = None if seed is not None: print("Using determinism with seed = {}\n".format(seed)) set_determinism(seed=seed) """ Setup data output directory """ out_model_dir = os.path.join(config_info['output']['out_dir'], datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + '_' + config_info['output']['out_postfix']) print("Saving to directory {}\n".format(out_model_dir)) # create cache directory to store results for Persistent Dataset if 'cache_dir' in config_info['output'].keys(): out_cache_dir = config_info['output']['cache_dir'] else: out_cache_dir = os.path.join(out_model_dir, 'persistent_cache') persistent_cache: Path = Path(out_cache_dir) persistent_cache.mkdir(parents=True, exist_ok=True) """ Data preparation """ # Read the input files for training and validation print("*** Loading input data for training...") train_files = create_data_list_of_dictionaries(train_file_list) print("Number of inputs for training = {}".format(len(train_files))) val_files = create_data_list_of_dictionaries(valid_file_list) print("Number of inputs for validation = {}".format(len(val_files))) # Define MONAI processing transforms for the training data. This includes: # - Load Nifti files and convert to format Batch x Channel x Dim1 x Dim2 x Dim3 # - CropForegroundd: Reduce the background from the MR image # - InPlaneSpacingd: Perform in-plane resampling to the desired spacing, but preserve the resolution along the # last direction (lowest resolution) to avoid introducing motion artefact resampling errors # - SpatialPadd: Pad the in-plane size to the defined network input patch size [N, M] if needed # - NormalizeIntensityd: Apply whitening # - RandSpatialCropd: Crop a random patch from the input with size [B, C, N, M, 1] # - SqueezeDimd: Convert the 3D patch to a 2D one as input to the network (i.e. bring it to size [B, C, N, M]) # - Apply data augmentation (RandZoomd, RandRotated, RandGaussianNoised, RandGaussianSmoothd, RandScaleIntensityd, # RandFlipd) # - ToTensor: convert to pytorch tensor train_transforms = Compose( [ LoadNiftid(keys=["image", "label"]), AddChanneld(keys=["image", "label"]), CropForegroundd(keys=["image", "label"], source_key="image"), InPlaneSpacingd( keys=["image", "label"], pixdim=spacing, mode=("bilinear", "nearest"), ), SpatialPadd(keys=["image", "label"], spatial_size=patch_size, mode=["constant", "edge"]), NormalizeIntensityd(keys=["image"], nonzero=False, channel_wise=True), RandSpatialCropd(keys=["image", "label"], roi_size=patch_size, random_size=False), SqueezeDimd(keys=["image", "label"], dim=-1), RandZoomd( keys=["image", "label"], min_zoom=0.9, max_zoom=1.2, mode=("bilinear", "nearest"), align_corners=(True, None), prob=0.16, ), RandRotated(keys=["image", "label"], range_x=90, range_y=90, prob=0.2, keep_size=True, mode=["bilinear", "nearest"], padding_mode=["zeros", "border"]), RandGaussianNoised(keys=["image"], std=0.01, prob=0.15), RandGaussianSmoothd( keys=["image"], sigma_x=(0.5, 1.15), sigma_y=(0.5, 1.15), sigma_z=(0.5, 1.15), prob=0.15, ), RandScaleIntensityd(keys=["image"], factors=0.3, prob=0.15), RandFlipd(["image", "label"], spatial_axis=[0, 1], prob=0.5), ToTensord(keys=["image", "label"]), ] ) # Define MONAI processing transforms for the validation data # - Load Nifti files and convert to format Batch x Channel x Dim1 x Dim2 x Dim3 # - CropForegroundd: Reduce the background from the MR image # - InPlaneSpacingd: Perform in-plane resampling to the desired spacing, but preserve the resolution along the # last direction (lowest resolution) to avoid introducing motion artefact resampling errors # - SpatialPadd: Pad the in-plane size to the defined network input patch size [N, M] if needed # - NormalizeIntensityd: Apply whitening # - ToTensor: convert to pytorch tensor # NOTE: The validation data is kept 3D as a 2D sliding window approach is used throughout the volume at inference val_transforms = Compose( [ LoadNiftid(keys=["image", "label"]), AddChanneld(keys=["image", "label"]), CropForegroundd(keys=["image", "label"], source_key="image"), InPlaneSpacingd( keys=["image", "label"], pixdim=spacing, mode=("bilinear", "nearest"), ), SpatialPadd(keys=["image", "label"], spatial_size=patch_size, mode=["constant", "edge"]), NormalizeIntensityd(keys=["image"], nonzero=False, channel_wise=True), ToTensord(keys=["image", "label"]), ] ) """ Load data """ # create training data loader train_ds = PersistentDataset(data=train_files, transform=train_transforms, cache_dir=persistent_cache) train_loader = DataLoader(train_ds, batch_size=config_info['training']['batch_size_train'], shuffle=True, num_workers=config_info['device']['num_workers']) check_train_data = misc.first(train_loader) print("Training data tensor shapes:") print("Image = {}; Label = {}".format(check_train_data["image"].shape, check_train_data["label"].shape)) # create validation data loader if config_info['training']['batch_size_valid'] != 1: raise Exception("Batch size different from 1 at validation ar currently not supported") val_ds = PersistentDataset(data=val_files, transform=val_transforms, cache_dir=persistent_cache) val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=config_info['device']['num_workers']) check_valid_data = misc.first(val_loader) print("Validation data tensor shapes (Example):") print("Image = {}; Label = {}\n".format(check_valid_data["image"].shape, check_valid_data["label"].shape)) """ Network preparation """ print("*** Preparing the network ...") # automatically extracts the strides and kernels based on nnU-Net empirical rules spacings = spacing[:2] sizes = patch_size[:2] strides, kernels = [], [] while True: spacing_ratio = [sp / min(spacings) for sp in spacings] stride = [2 if ratio <= 2 and size >= 8 else 1 for (ratio, size) in zip(spacing_ratio, sizes)] kernel = [3 if ratio <= 2 else 1 for ratio in spacing_ratio] if all(s == 1 for s in stride): break sizes = [i / j for i, j in zip(sizes, stride)] spacings = [i * j for i, j in zip(spacings, stride)] kernels.append(kernel) strides.append(stride) strides.insert(0, len(spacings) * [1]) kernels.append(len(spacings) * [3]) # initialise the network net = DynUNet( spatial_dims=2, in_channels=1, out_channels=nr_out_channels, kernel_size=kernels, strides=strides, upsample_kernel_size=strides[1:], norm_name="instance", deep_supervision=True, deep_supr_num=2, res_block=False, ).to(current_device) print(net) # define the loss function loss_function = choose_loss_function(nr_out_channels, config_info) # define the optimiser and the learning rate scheduler opt = torch.optim.SGD(net.parameters(), lr=float(config_info['training']['lr']), momentum=0.95) scheduler = torch.optim.lr_scheduler.LambdaLR( opt, lr_lambda=lambda epoch: (1 - epoch / config_info['training']['nr_train_epochs']) ** 0.9 ) """ MONAI evaluator """ print("*** Preparing the dynUNet evaluator engine...\n") # val_post_transforms = Compose( # [ # Activationsd(keys="pred", sigmoid=True), # ] # ) val_handlers = [ StatsHandler(output_transform=lambda x: None), TensorBoardStatsHandler(log_dir=os.path.join(out_model_dir, "valid"), output_transform=lambda x: None, global_epoch_transform=lambda x: trainer.state.iteration), CheckpointSaver(save_dir=out_model_dir, save_dict={"net": net, "opt": opt}, save_key_metric=True, file_prefix='best_valid'), ] if config_info['output']['val_image_to_tensorboad']: val_handlers.append(TensorBoardImageHandler(log_dir=os.path.join(out_model_dir, "valid"), batch_transform=lambda x: (x["image"], x["label"]), output_transform=lambda x: x["pred"], interval=2)) # Define customized evaluator class DynUNetEvaluator(SupervisedEvaluator): def _iteration(self, engine, batchdata): inputs, targets = self.prepare_batch(batchdata) inputs, targets = inputs.to(engine.state.device), targets.to(engine.state.device) flip_inputs_1 = torch.flip(inputs, dims=(2,)) flip_inputs_2 = torch.flip(inputs, dims=(3,)) flip_inputs_3 = torch.flip(inputs, dims=(2, 3)) def _compute_pred(): pred = self.inferer(inputs, self.network) # use random flipping as data augmentation at inference flip_pred_1 = torch.flip(self.inferer(flip_inputs_1, self.network), dims=(2,)) flip_pred_2 = torch.flip(self.inferer(flip_inputs_2, self.network), dims=(3,)) flip_pred_3 = torch.flip(self.inferer(flip_inputs_3, self.network), dims=(2, 3)) return (pred + flip_pred_1 + flip_pred_2 + flip_pred_3) / 4 # execute forward computation self.network.eval() with torch.no_grad(): if self.amp: with torch.cuda.amp.autocast(): predictions = _compute_pred() else: predictions = _compute_pred() return {"image": inputs, "label": targets, "pred": predictions} evaluator = DynUNetEvaluator( device=current_device, val_data_loader=val_loader, network=net, inferer=SlidingWindowInferer2D(roi_size=patch_size, sw_batch_size=4, overlap=0.0), post_transform=None, key_val_metric={ "Mean_dice": MeanDice( include_background=False, to_onehot_y=True, mutually_exclusive=True, output_transform=lambda x: (x["pred"], x["label"]), ) }, val_handlers=val_handlers, amp=False, ) """ MONAI trainer """ print("*** Preparing the dynUNet trainer engine...\n") # train_post_transforms = Compose( # [ # Activationsd(keys="pred", sigmoid=True), # ] # ) validation_every_n_epochs = config_info['training']['validation_every_n_epochs'] epoch_len = len(train_ds) // train_loader.batch_size validation_every_n_iters = validation_every_n_epochs * epoch_len # define event handlers for the trainer writer_train = SummaryWriter(log_dir=os.path.join(out_model_dir, "train")) train_handlers = [ LrScheduleHandler(lr_scheduler=scheduler, print_lr=True), ValidationHandler(validator=evaluator, interval=validation_every_n_iters, epoch_level=False), StatsHandler(tag_name="train_loss", output_transform=lambda x: x["loss"]), TensorBoardStatsHandler(summary_writer=writer_train, log_dir=os.path.join(out_model_dir, "train"), tag_name="Loss", output_transform=lambda x: x["loss"], global_epoch_transform=lambda x: trainer.state.iteration), CheckpointSaver(save_dir=out_model_dir, save_dict={"net": net, "opt": opt}, save_final=True, save_interval=2, epoch_level=True, n_saved=config_info['output']['max_nr_models_saved']), ] if model_to_load is not None: train_handlers.append(CheckpointLoader(load_path=model_to_load, load_dict={"net": net, "opt": opt})) # define customized trainer class DynUNetTrainer(SupervisedTrainer): def _iteration(self, engine, batchdata): inputs, targets = self.prepare_batch(batchdata) inputs, targets = inputs.to(engine.state.device), targets.to(engine.state.device) def _compute_loss(preds, label): labels = [label] + [interpolate(label, pred.shape[2:]) for pred in preds[1:]] return sum([0.5 ** i * self.loss_function(p, l) for i, (p, l) in enumerate(zip(preds, labels))]) self.network.train() self.optimizer.zero_grad() if self.amp and self.scaler is not None: with torch.cuda.amp.autocast(): predictions = self.inferer(inputs, self.network) loss = _compute_loss(predictions, targets) self.scaler.scale(loss).backward() self.scaler.step(self.optimizer) self.scaler.update() else: predictions = self.inferer(inputs, self.network) loss = _compute_loss(predictions, targets).mean() loss.backward() self.optimizer.step() return {"image": inputs, "label": targets, "pred": predictions, "loss": loss.item()} trainer = DynUNetTrainer( device=current_device, max_epochs=config_info['training']['nr_train_epochs'], train_data_loader=train_loader, network=net, optimizer=opt, loss_function=loss_function, inferer=SimpleInferer(), post_transform=None, key_train_metric=None, train_handlers=train_handlers, amp=False, ) """ Run training """ print("*** Run training...") trainer.run() print("Done!")
def main(tempdir): config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) print(f"generating synthetic data to {tempdir} (this may take a while)") for i in range(5): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) # define transforms for image and segmentation imtrans = Compose([ScaleIntensity(), AddChannel(), ToTensor()]) segtrans = Compose([AddChannel(), ToTensor()]) ds = ImageDataset(images, segs, transform=imtrans, seg_transform=segtrans, image_only=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) # define sliding window size and batch size for windows inference roi_size = (96, 96, 96) sw_batch_size = 4 post_trans = Compose( [Activations(sigmoid=True), AsDiscrete(threshold_values=True)]) def _sliding_window_processor(engine, batch): net.eval() with torch.no_grad(): val_images, val_labels = batch[0].to(device), batch[1].to(device) seg_probs = sliding_window_inference(val_images, roi_size, sw_batch_size, net) seg_probs = post_trans(seg_probs) return seg_probs, val_labels evaluator = Engine(_sliding_window_processor) # add evaluation metric to the evaluator engine MeanDice().attach(evaluator, "Mean_Dice") # StatsHandler prints loss at every iteration and print metrics at every epoch, # we don't need to print loss for evaluator, so just print metrics, user can also customize print functions val_stats_handler = StatsHandler( name="evaluator", output_transform=lambda x: None, # no need to print loss value, so disable per iteration output ) val_stats_handler.attach(evaluator) # for the array data format, assume the 3rd item of batch data is the meta_data file_saver = SegmentationSaver( output_dir="tempdir", output_ext=".nii.gz", output_postfix="seg", name="evaluator", batch_transform=lambda x: x[2], output_transform=lambda output: output[0], ) file_saver.attach(evaluator) # the model was trained by "unet_training_array" example ckpt_saver = CheckpointLoader( load_path="./runs_array/net_checkpoint_100.pt", load_dict={"net": net}) ckpt_saver.attach(evaluator) # sliding window inference for one image at every iteration loader = DataLoader(ds, batch_size=1, num_workers=1, pin_memory=torch.cuda.is_available()) state = evaluator.run(loader) print(state)
def main(tempdir): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # create a temporary directory and 40 random image, mask pairs print(f"generating synthetic data to {tempdir} (this may take a while)") for i in range(5): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) val_files = [{"image": img, "label": seg} for img, seg in zip(images, segs)] # model file path model_file = glob("./runs/net_key_metric*")[0] # define transforms for image and segmentation val_transforms = Compose( [ LoadNiftid(keys=["image", "label"]), AsChannelFirstd(keys=["image", "label"], channel_dim=-1), ScaleIntensityd(keys="image"), ToTensord(keys=["image", "label"]), ] ) # create a validation data loader val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) val_loader = monai.data.DataLoader(val_ds, batch_size=1, num_workers=4) # create UNet, DiceLoss and Adam optimizer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) val_post_transforms = Compose( [ Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold_values=True), KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), ] ) val_handlers = [ StatsHandler(output_transform=lambda x: None), CheckpointLoader(load_path=model_file, load_dict={"net": net}), SegmentationSaver( output_dir="./runs/", batch_transform=lambda batch: batch["image_meta_dict"], output_transform=lambda output: output["pred"], ), ] evaluator = SupervisedEvaluator( device=device, val_data_loader=val_loader, network=net, inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), post_transform=val_post_transforms, key_val_metric={ "val_mean_dice": MeanDice(include_background=True, output_transform=lambda x: (x["pred"], x["label"])) }, additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, val_handlers=val_handlers, # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, ) evaluator.run()
device) seg_probs = sliding_window_inference(val_images, roi_size, sw_batch_size, net) return seg_probs, val_labels evaluator = Engine(_sliding_window_processor) # add evaluation metric to the evaluator engine MeanDice(add_sigmoid=True, to_onehot_y=False).attach(evaluator, 'Mean_Dice') # StatsHandler prints loss at every iteration and print metrics at every epoch, # we don't need to print loss for evaluator, so just print metrics, user can also customize print functions val_stats_handler = StatsHandler( name='evaluator', output_transform=lambda x: None # no need to print loss value, so disable per iteration output ) val_stats_handler.attach(evaluator) # convert the necessary metadata from batch data SegmentationSaver( output_dir='tempdir', output_ext='.nii.gz', output_postfix='seg', name='evaluator', batch_transform=lambda batch: { 'filename_or_obj': batch['img.filename_or_obj'], 'affine': batch['img.affine'] }, output_transform=lambda output: predict_segmentation(output[0])).attach(
def run_training_test(root_dir, device="cuda:0"): real_images = sorted(glob(os.path.join(root_dir, "img*.nii.gz"))) train_files = [{"reals": img} for img in zip(real_images)] # prepare real data train_transforms = Compose([ LoadNiftid(keys=["reals"]), AsChannelFirstd(keys=["reals"]), ScaleIntensityd(keys=["reals"]), RandFlipd(keys=["reals"], prob=0.5), ToTensord(keys=["reals"]), ]) train_ds = monai.data.CacheDataset(data=train_files, transform=train_transforms, cache_rate=0.5) train_loader = monai.data.DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4) learning_rate = 2e-4 betas = (0.5, 0.999) real_label = 1 fake_label = 0 # create discriminator disc_net = Discriminator(in_shape=(1, 64, 64), channels=(8, 16, 32, 64, 1), strides=(2, 2, 2, 2, 1), num_res_units=1, kernel_size=5).to(device) disc_net.apply(normal_init) disc_opt = torch.optim.Adam(disc_net.parameters(), learning_rate, betas=betas) disc_loss_criterion = torch.nn.BCELoss() def discriminator_loss(gen_images, real_images): real = real_images.new_full((real_images.shape[0], 1), real_label) gen = gen_images.new_full((gen_images.shape[0], 1), fake_label) realloss = disc_loss_criterion(disc_net(real_images), real) genloss = disc_loss_criterion(disc_net(gen_images.detach()), gen) return torch.div(torch.add(realloss, genloss), 2) # create generator latent_size = 64 gen_net = Generator(latent_shape=latent_size, start_shape=(latent_size, 8, 8), channels=[32, 16, 8, 1], strides=[2, 2, 2, 1]) gen_net.apply(normal_init) gen_net.conv.add_module("activation", torch.nn.Sigmoid()) gen_net = gen_net.to(device) gen_opt = torch.optim.Adam(gen_net.parameters(), learning_rate, betas=betas) gen_loss_criterion = torch.nn.BCELoss() def generator_loss(gen_images): output = disc_net(gen_images) cats = output.new_full(output.shape, real_label) return gen_loss_criterion(output, cats) key_train_metric = None train_handlers = [ StatsHandler( name="training_loss", output_transform=lambda x: { Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS] }, ), TensorBoardStatsHandler( log_dir=root_dir, tag_name="training_loss", output_transform=lambda x: { Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS] }, ), CheckpointSaver(save_dir=root_dir, save_dict={ "g_net": gen_net, "d_net": disc_net }, save_interval=2, epoch_level=True), ] disc_train_steps = 2 num_epochs = 5 trainer = GanTrainer( device, num_epochs, train_loader, gen_net, gen_opt, generator_loss, disc_net, disc_opt, discriminator_loss, d_train_steps=disc_train_steps, latent_shape=latent_size, key_train_metric=key_train_metric, train_handlers=train_handlers, ) trainer.run() return trainer.state
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ images = [ "/workspace/data/medical/ixi/IXI-T1/IXI314-IOP-0889-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI249-Guys-1072-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI609-HH-2600-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI173-HH-1590-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI020-Guys-0700-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI342-Guys-0909-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI134-Guys-0780-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI577-HH-2661-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI066-Guys-0731-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI130-HH-1528-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI607-Guys-1097-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI175-HH-1570-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI385-HH-2078-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI344-Guys-0905-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI409-Guys-0960-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI584-Guys-1129-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI253-HH-1694-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI092-HH-1436-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI574-IOP-1156-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI585-Guys-1130-T1.nii.gz", ] # 2 binary labels for gender classification: man and woman labels = np.array( [0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0]) train_files = [{ "img": img, "label": label } for img, label in zip(images[:10], labels[:10])] val_files = [{ "img": img, "label": label } for img, label in zip(images[-10:], labels[-10:])] # define transforms for image train_transforms = Compose([ LoadNiftid(keys=["img"]), AddChanneld(keys=["img"]), ScaleIntensityd(keys=["img"]), Resized(keys=["img"], spatial_size=(96, 96, 96)), RandRotate90d(keys=["img"], prob=0.8, spatial_axes=[0, 2]), ToTensord(keys=["img"]), ]) val_transforms = Compose([ LoadNiftid(keys=["img"]), AddChanneld(keys=["img"]), ScaleIntensityd(keys=["img"]), Resized(keys=["img"], spatial_size=(96, 96, 96)), ToTensord(keys=["img"]), ]) # define dataset, data loader check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) check_data = monai.utils.misc.first(check_loader) print(check_data["img"].shape, check_data["label"]) # create DenseNet121, CrossEntropyLoss and Adam optimizer net = monai.networks.nets.densenet.densenet121( spatial_dims=3, in_channels=1, out_channels=2, ) loss = torch.nn.CrossEntropyLoss() lr = 1e-5 opt = torch.optim.Adam(net.parameters(), lr) device = torch.device("cuda:0") # Ignite trainer expects batch=(img, label) and returns output=loss at every iteration, # user can add output_transform to return other values, like: y_pred, y, etc. def prepare_batch(batch, device=None, non_blocking=False): return _prepare_batch((batch["img"], batch["label"]), device, non_blocking) trainer = create_supervised_trainer(net, opt, loss, device, False, prepare_batch=prepare_batch) # adding checkpoint handler to save models (network params and optimizer stats) during training checkpoint_handler = ModelCheckpoint("./runs/", "net", n_saved=10, require_empty=False) trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={ "net": net, "opt": opt }) # StatsHandler prints loss at every iteration and print metrics at every epoch, # we don't set metrics for trainer here, so just print loss, user can also customize print functions # and can use output_transform to convert engine.state.output if it's not loss value train_stats_handler = StatsHandler(name="trainer") train_stats_handler.attach(trainer) # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler train_tensorboard_stats_handler = TensorBoardStatsHandler() train_tensorboard_stats_handler.attach(trainer) # set parameters for validation validation_every_n_epochs = 1 metric_name = "Accuracy" # add evaluation metric to the evaluator engine val_metrics = { metric_name: Accuracy(), "AUC": ROCAUC(to_onehot_y=True, add_softmax=True) } # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, # user can add output_transform to return other values evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) # add stats event handler to print validation stats via evaluator val_stats_handler = StatsHandler( name="evaluator", output_transform=lambda x: None, # no need to print loss value, so disable per iteration output global_epoch_transform=lambda x: trainer.state.epoch, ) # fetch global epoch number from trainer val_stats_handler.attach(evaluator) # add handler to record metrics to TensorBoard at every epoch val_tensorboard_stats_handler = TensorBoardStatsHandler( output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output global_epoch_transform=lambda x: trainer.state.epoch, ) # fetch global epoch number from trainer val_tensorboard_stats_handler.attach(evaluator) # add early stopping handler to evaluator early_stopper = EarlyStopping( patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) # create a validation data loader val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) @trainer.on(Events.EPOCH_COMPLETED(every=validation_every_n_epochs)) def run_validation(engine): evaluator.run(val_loader) # create a training data loader train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4, pin_memory=torch.cuda.is_available()) train_epochs = 30 state = trainer.run(train_loader, train_epochs)
def evaluate(args): if args.local_rank == 0 and not os.path.exists(args.dir): # create 16 random image, mask paris for evaluation print(f"generating synthetic data to {args.dir} (this may take a while)") os.makedirs(args.dir) # set random seed to generate same random data for every node np.random.seed(seed=0) for i in range(16): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) # initialize the distributed evaluation process, every GPU runs in a process dist.init_process_group(backend="nccl", init_method="env://") images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) val_files = [{"image": img, "label": seg} for img, seg in zip(images, segs)] # define transforms for image and segmentation val_transforms = Compose( [ LoadImaged(keys=["image", "label"]), AsChannelFirstd(keys=["image", "label"], channel_dim=-1), ScaleIntensityd(keys="image"), ToTensord(keys=["image", "label"]), ] ) # create a evaluation data loader val_ds = Dataset(data=val_files, transform=val_transforms) # create a evaluation data sampler val_sampler = DistributedSampler(val_ds, shuffle=False) # sliding window inference need to input 1 image in every iteration val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=2, pin_memory=True, sampler=val_sampler) # create UNet, DiceLoss and Adam optimizer device = torch.device(f"cuda:{args.local_rank}") torch.cuda.set_device(device) net = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) # wrap the model with DistributedDataParallel module net = DistributedDataParallel(net, device_ids=[device]) val_post_transforms = Compose( [ Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold_values=True), KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), ] ) val_handlers = [ CheckpointLoader( load_path="./runs/checkpoint_epoch=4.pt", load_dict={"net": net}, # config mapping to expected GPU device map_location={"cuda:0": f"cuda:{args.local_rank}"}, ), ] if dist.get_rank() == 0: logging.basicConfig(stream=sys.stdout, level=logging.INFO) val_handlers.extend( [ StatsHandler(output_transform=lambda x: None), SegmentationSaver( output_dir="./runs/", batch_transform=lambda batch: batch["image_meta_dict"], output_transform=lambda output: output["pred"], ), ] ) evaluator = SupervisedEvaluator( device=device, val_data_loader=val_loader, network=net, inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), post_transform=val_post_transforms, key_val_metric={ "val_mean_dice": MeanDice( include_background=True, output_transform=lambda x: (x["pred"], x["label"]), device=device, ) }, additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]), device=device)}, val_handlers=val_handlers, # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, ) evaluator.run() dist.destroy_process_group()
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # create a temporary directory and 40 random image, mask paris tempdir = tempfile.mkdtemp() print(f"generating synthetic data to {tempdir} (this may take a while)") for i in range(40): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) images = sorted(glob(os.path.join(tempdir, "img*.nii.gz"))) segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) train_files = [{ Keys.IMAGE: img, Keys.LABEL: seg } for img, seg in zip(images[:20], segs[:20])] val_files = [{ Keys.IMAGE: img, Keys.LABEL: seg } for img, seg in zip(images[-20:], segs[-20:])] # define transforms for image and segmentation train_transforms = Compose([ LoadNiftid(keys=[Keys.IMAGE, Keys.LABEL]), AsChannelFirstd(keys=[Keys.IMAGE, Keys.LABEL], channel_dim=-1), ScaleIntensityd(keys=[Keys.IMAGE, Keys.LABEL]), RandCropByPosNegLabeld(keys=[Keys.IMAGE, Keys.LABEL], label_key=Keys.LABEL, size=[96, 96, 96], pos=1, neg=1, num_samples=4), RandRotate90d(keys=[Keys.IMAGE, Keys.LABEL], prob=0.5, spatial_axes=[0, 2]), ToTensord(keys=[Keys.IMAGE, Keys.LABEL]), ]) val_transforms = Compose([ LoadNiftid(keys=[Keys.IMAGE, Keys.LABEL]), AsChannelFirstd(keys=[Keys.IMAGE, Keys.LABEL], channel_dim=-1), ScaleIntensityd(keys=[Keys.IMAGE, Keys.LABEL]), ToTensord(keys=[Keys.IMAGE, Keys.LABEL]), ]) # create a training data loader train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4, collate_fn=list_data_collate) # create a validation data loader val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) # create UNet, DiceLoss and Adam optimizer device = torch.device("cuda:0") net = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) loss = monai.losses.DiceLoss(do_sigmoid=True) opt = torch.optim.Adam(net.parameters(), 1e-3) val_handlers = [StatsHandler(output_transform=lambda x: None)] evaluator = SupervisedEvaluator( device=device, val_data_loader=val_loader, network=net, inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), val_handlers=val_handlers, key_val_metric={ "val_mean_dice": MeanDice(include_background=True, add_sigmoid=True, output_transform=lambda x: (x[Keys.PRED], x[Keys.LABEL])) }, additional_metrics=None, ) train_handlers = [ ValidationHandler(validator=evaluator, interval=2, epoch_level=True), StatsHandler(tag_name="train_loss", output_transform=lambda x: x[Keys.INFO][Keys.LOSS]), ] trainer = SupervisedTrainer( device=device, max_epochs=5, train_data_loader=train_loader, network=net, optimizer=opt, loss_function=loss, inferer=SimpleInferer(), train_handlers=train_handlers, amp=False, key_train_metric=None, ) trainer.run() shutil.rmtree(tempdir)
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) set_determinism(12345) device = torch.device("cuda:0") # load real data mednist_url = "https://www.dropbox.com/s/5wwskxctvcxiuea/MedNIST.tar.gz?dl=1" md5_value = "0bc7306e7427e00ad1c5526a6677552d" extract_dir = "data" tar_save_path = os.path.join(extract_dir, "MedNIST.tar.gz") download_and_extract(mednist_url, tar_save_path, extract_dir, md5_value) hand_dir = os.path.join(extract_dir, "MedNIST", "Hand") real_data = [{ "hand": os.path.join(hand_dir, filename) } for filename in os.listdir(hand_dir)] # define real data transforms train_transforms = Compose([ LoadPNGD(keys=["hand"]), AddChannelD(keys=["hand"]), ScaleIntensityD(keys=["hand"]), RandRotateD(keys=["hand"], range_x=15, prob=0.5, keep_size=True), RandFlipD(keys=["hand"], spatial_axis=0, prob=0.5), RandZoomD(keys=["hand"], min_zoom=0.9, max_zoom=1.1, prob=0.5), ToTensorD(keys=["hand"]), ]) # create dataset and dataloader real_dataset = CacheDataset(real_data, train_transforms) batch_size = 300 real_dataloader = DataLoader(real_dataset, batch_size=batch_size, shuffle=True, num_workers=10) # define function to process batchdata for input into discriminator def prepare_batch(batchdata): """ Process Dataloader batchdata dict object and return image tensors for D Inferer """ return batchdata["hand"] # define networks disc_net = Discriminator(in_shape=(1, 64, 64), channels=(8, 16, 32, 64, 1), strides=(2, 2, 2, 2, 1), num_res_units=1, kernel_size=5).to(device) latent_size = 64 gen_net = Generator(latent_shape=latent_size, start_shape=(latent_size, 8, 8), channels=[32, 16, 8, 1], strides=[2, 2, 2, 1]) # initialize both networks disc_net.apply(normal_init) gen_net.apply(normal_init) # input images are scaled to [0,1] so enforce the same of generated outputs gen_net.conv.add_module("activation", torch.nn.Sigmoid()) gen_net = gen_net.to(device) # create optimizers and loss functions learning_rate = 2e-4 betas = (0.5, 0.999) disc_opt = torch.optim.Adam(disc_net.parameters(), learning_rate, betas=betas) gen_opt = torch.optim.Adam(gen_net.parameters(), learning_rate, betas=betas) disc_loss_criterion = torch.nn.BCELoss() gen_loss_criterion = torch.nn.BCELoss() real_label = 1 fake_label = 0 def discriminator_loss(gen_images, real_images): """ The discriminator loss is calculated by comparing D prediction for real and generated images. """ real = real_images.new_full((real_images.shape[0], 1), real_label) gen = gen_images.new_full((gen_images.shape[0], 1), fake_label) realloss = disc_loss_criterion(disc_net(real_images), real) genloss = disc_loss_criterion(disc_net(gen_images.detach()), gen) return (genloss + realloss) / 2 def generator_loss(gen_images): """ The generator loss is calculated by determining how realistic the discriminator classifies the generated images. """ output = disc_net(gen_images) cats = output.new_full(output.shape, real_label) return gen_loss_criterion(output, cats) # initialize current run dir run_dir = "model_out" print("Saving model output to: %s " % run_dir) # create workflow handlers handlers = [ StatsHandler( name="batch_training_loss", output_transform=lambda x: { Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS] }, ), CheckpointSaver( save_dir=run_dir, save_dict={ "g_net": gen_net, "d_net": disc_net }, save_interval=10, save_final=True, epoch_level=True, ), ] # define key metric key_train_metric = None # create adversarial trainer disc_train_steps = 5 num_epochs = 50 trainer = GanTrainer( device, num_epochs, real_dataloader, gen_net, gen_opt, generator_loss, disc_net, disc_opt, discriminator_loss, d_prepare_batch=prepare_batch, d_train_steps=disc_train_steps, latent_shape=latent_size, key_train_metric=key_train_metric, train_handlers=handlers, ) # run GAN training trainer.run() # Training completed, save a few random generated images. print("Saving trained generator sample output.") test_img_count = 10 test_latents = make_latent(test_img_count, latent_size).to(device) fakes = gen_net(test_latents) for i, image in enumerate(fakes): filename = "gen-fake-final-%d.png" % (i) save_path = os.path.join(run_dir, filename) img_array = image[0].cpu().data.numpy() png_writer.write_png(img_array, save_path, scale=255)
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # create a temporary directory and 40 random image, mask paris tempdir = tempfile.mkdtemp() print('generating synthetic data to {} (this may take a while)'.format(tempdir)) for i in range(40): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, 'im%i.nii.gz' % i)) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, 'seg%i.nii.gz' % i)) images = sorted(glob(os.path.join(tempdir, 'im*.nii.gz'))) segs = sorted(glob(os.path.join(tempdir, 'seg*.nii.gz'))) # define transforms for image and segmentation train_imtrans = Compose([ ScaleIntensity(), AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), ToTensor() ]) train_segtrans = Compose([ AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), ToTensor() ]) val_imtrans = Compose([ ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor() ]) val_segtrans = Compose([ AddChannel(), Resize((96, 96, 96)), ToTensor() ]) # define nifti dataset, data loader check_ds = NiftiDataset(images, segs, transform=train_imtrans, seg_transform=train_segtrans) check_loader = DataLoader(check_ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()) im, seg = monai.utils.misc.first(check_loader) print(im.shape, seg.shape) # create a training data loader train_ds = NiftiDataset(images[:20], segs[:20], transform=train_imtrans, seg_transform=train_segtrans) train_loader = DataLoader(train_ds, batch_size=5, shuffle=True, num_workers=8, pin_memory=torch.cuda.is_available()) # create a validation data loader val_ds = NiftiDataset(images[-20:], segs[-20:], transform=val_imtrans, seg_transform=val_segtrans) val_loader = DataLoader(val_ds, batch_size=5, num_workers=8, pin_memory=torch.cuda.is_available()) # create UNet, DiceLoss and Adam optimizer net = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ) loss = monai.losses.DiceLoss(do_sigmoid=True) lr = 1e-3 opt = torch.optim.Adam(net.parameters(), lr) device = torch.device('cuda:0') # ignite trainer expects batch=(img, seg) and returns output=loss at every iteration, # user can add output_transform to return other values, like: y_pred, y, etc. trainer = create_supervised_trainer(net, opt, loss, device, False) # adding checkpoint handler to save models (network params and optimizer stats) during training checkpoint_handler = ModelCheckpoint('./runs/', 'net', n_saved=10, require_empty=False) trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={'net': net, 'opt': opt}) # StatsHandler prints loss at every iteration and print metrics at every epoch, # we don't set metrics for trainer here, so just print loss, user can also customize print functions # and can use output_transform to convert engine.state.output if it's not a loss value train_stats_handler = StatsHandler(name='trainer') train_stats_handler.attach(trainer) # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler train_tensorboard_stats_handler = TensorBoardStatsHandler() train_tensorboard_stats_handler.attach(trainer) validation_every_n_epochs = 1 # Set parameters for validation metric_name = 'Mean_Dice' # add evaluation metric to the evaluator engine val_metrics = {metric_name: MeanDice(add_sigmoid=True, to_onehot_y=False)} # ignite evaluator expects batch=(img, seg) and returns output=(y_pred, y) at every iteration, # user can add output_transform to return other values evaluator = create_supervised_evaluator(net, val_metrics, device, True) @trainer.on(Events.EPOCH_COMPLETED(every=validation_every_n_epochs)) def run_validation(engine): evaluator.run(val_loader) # add early stopping handler to evaluator early_stopper = EarlyStopping(patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) # add stats event handler to print validation stats via evaluator val_stats_handler = StatsHandler( name='evaluator', output_transform=lambda x: None, # no need to print loss value, so disable per iteration output global_epoch_transform=lambda x: trainer.state.epoch) # fetch global epoch number from trainer val_stats_handler.attach(evaluator) # add handler to record metrics to TensorBoard at every validation epoch val_tensorboard_stats_handler = TensorBoardStatsHandler( output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output global_epoch_transform=lambda x: trainer.state.epoch) # fetch global epoch number from trainer val_tensorboard_stats_handler.attach(evaluator) # add handler to draw the first image and the corresponding label and model output in the last batch # here we draw the 3D output as GIF format along Depth axis, at every validation epoch val_tensorboard_image_handler = TensorBoardImageHandler( batch_transform=lambda batch: (batch[0], batch[1]), output_transform=lambda output: predict_segmentation(output[0]), global_iter_transform=lambda x: trainer.state.epoch ) evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=val_tensorboard_image_handler) train_epochs = 30 state = trainer.run(train_loader, train_epochs) shutil.rmtree(tempdir)
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ images = [ "/workspace/data/medical/ixi/IXI-T1/IXI607-Guys-1097-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI175-HH-1570-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI385-HH-2078-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI344-Guys-0905-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI409-Guys-0960-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI584-Guys-1129-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI253-HH-1694-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI092-HH-1436-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI574-IOP-1156-T1.nii.gz", "/workspace/data/medical/ixi/IXI-T1/IXI585-Guys-1130-T1.nii.gz", ] # 2 binary labels for gender classification: man and woman labels = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 0]) val_files = [{"img": img, "label": label} for img, label in zip(images, labels)] # define transforms for image val_transforms = Compose( [ LoadNiftid(keys=["img"]), AddChanneld(keys=["img"]), ScaleIntensityd(keys=["img"]), Resized(keys=["img"], spatial_size=(96, 96, 96)), ToTensord(keys=["img"]), ] ) # create DenseNet121 net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2,) device = torch.device("cuda:0") def prepare_batch(batch, device=None, non_blocking=False): return _prepare_batch((batch["img"], batch["label"]), device, non_blocking) metric_name = "Accuracy" # add evaluation metric to the evaluator engine val_metrics = {metric_name: Accuracy()} # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, # user can add output_transform to return other values evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) # add stats event handler to print validation stats via evaluator val_stats_handler = StatsHandler( name="evaluator", output_transform=lambda x: None, # no need to print loss value, so disable per iteration output ) val_stats_handler.attach(evaluator) # for the array data format, assume the 3rd item of batch data is the meta_data prediction_saver = ClassificationSaver( output_dir="tempdir", name="evaluator", batch_transform=lambda batch: {"filename_or_obj": batch["img.filename_or_obj"]}, output_transform=lambda output: output[0].argmax(1), ) prediction_saver.attach(evaluator) # the model was trained by "densenet_training_dict" example CheckpointLoader(load_path="./runs/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator) # create a validation data loader val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) state = evaluator.run(val_loader) print(state)
def main(tempdir): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) ################################ DATASET ################################ # create a temporary directory and 40 random image, mask pairs print(f"generating synthetic data to {tempdir} (this may take a while)") for i in range(40): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) images = sorted(glob(os.path.join(tempdir, "img*.nii.gz"))) segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) train_files = [{"image": img, "label": seg} for img, seg in zip(images[:20], segs[:20])] val_files = [{"image": img, "label": seg} for img, seg in zip(images[-20:], segs[-20:])] # define transforms for image and segmentation train_transforms = Compose( [ LoadImaged(keys=["image", "label"]), AsChannelFirstd(keys=["image", "label"], channel_dim=-1), ScaleIntensityd(keys="image"), RandCropByPosNegLabeld( keys=["image", "label"], label_key="label", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 ), RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[0, 2]), ToTensord(keys=["image", "label"]), ] ) val_transforms = Compose( [ LoadImaged(keys=["image", "label"]), AsChannelFirstd(keys=["image", "label"], channel_dim=-1), ScaleIntensityd(keys="image"), ToTensord(keys=["image", "label"]), ] ) # create a training data loader train_ds = monai.data.CacheDataset(data=train_files, transform=train_transforms, cache_rate=0.5) # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training train_loader = monai.data.DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4) # create a validation data loader val_ds = monai.data.CacheDataset(data=val_files, transform=val_transforms, cache_rate=1.0) val_loader = monai.data.DataLoader(val_ds, batch_size=1, num_workers=4) ################################ DATASET ################################ ################################ NETWORK ################################ # create UNet, DiceLoss and Adam optimizer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) ################################ NETWORK ################################ ################################ LOSS ################################ loss = monai.losses.DiceLoss(sigmoid=True) ################################ LOSS ################################ ################################ OPT ################################ opt = torch.optim.Adam(net.parameters(), 1e-3) ################################ OPT ################################ ################################ LR ################################ lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=2, gamma=0.1) ################################ LR ################################ val_post_transforms = Compose( [ Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold_values=True), KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), ] ) val_handlers = [ StatsHandler(output_transform=lambda x: None), TensorBoardStatsHandler(log_dir="./runs/", output_transform=lambda x: None), TensorBoardImageHandler( log_dir="./runs/", batch_transform=lambda x: (x["image"], x["label"]), output_transform=lambda x: x["pred"], ), CheckpointSaver(save_dir="./runs/", save_dict={"net": net}, save_key_metric=True), ] evaluator = SupervisedEvaluator( device=device, val_data_loader=val_loader, network=net, inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), post_transform=val_post_transforms, key_val_metric={ "val_mean_dice": MeanDice(include_background=True, output_transform=lambda x: (x["pred"], x["label"])) }, additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, val_handlers=val_handlers, # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation amp=True if monai.utils.get_torch_version_tuple() >= (1, 6) else False, ) train_post_transforms = Compose( [ Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold_values=True), KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), ] ) train_handlers = [ LrScheduleHandler(lr_scheduler=lr_scheduler, print_lr=True), ValidationHandler(validator=evaluator, interval=2, epoch_level=True), StatsHandler(tag_name="train_loss", output_transform=lambda x: x["loss"]), TensorBoardStatsHandler(log_dir="./runs/", tag_name="train_loss", output_transform=lambda x: x["loss"]), CheckpointSaver(save_dir="./runs/", save_dict={"net": net, "opt": opt}, save_interval=2, epoch_level=True), ] trainer = SupervisedTrainer( device=device, max_epochs=5, train_data_loader=train_loader, network=net, optimizer=opt, loss_function=loss, inferer=SimpleInferer(), post_transform=train_post_transforms, key_train_metric={"train_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, train_handlers=train_handlers, # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP training amp=True if monai.utils.get_torch_version_tuple() >= (1, 6) else False, ) trainer.run()