def run_training(self): dct = OrderedDict() for k in self.__dir__(): if not k.startswith("__"): if not callable(getattr(self, k)): dct[k] = str(getattr(self, k)) del dct['plans'] del dct['intensity_properties'] del dct['dataset'] del dct['dataset_tr'] del dct['dataset_val'] save_json(dct, join(self.output_folder, "debug.json")) import shutil shutil.copy(self.plans_file, join(self.output_folder_base, "plans.pkl")) super(nnUNetTrainer, self).run_training()
def update_fold(self, fold): """ used to swap between folds for inference (ensemble of models from cross-validation) DO NOT USE DURING TRAINING AS THIS WILL NOT UPDATE THE DATASET SPLIT AND THE DATA AUGMENTATION GENERATORS :param fold: :return: """ if fold is not None: if isinstance(fold, str): assert fold == "all", "if self.fold is a string then it must be \'all\'" if self.output_folder.endswith("%s" % str(self.fold)): self.output_folder = self.output_folder_base self.output_folder = join(self.output_folder, "%s" % str(fold)) else: if self.output_folder.endswith("fold_%s" % str(self.fold)): self.output_folder = self.output_folder_base self.output_folder = join(self.output_folder, "fold_%s" % str(fold)) self.fold = fold
def crawl_and_remove_hidden_from_decathlon(folder): folder = remove_trailing_slash(folder) assert folder.split('/')[-1].startswith("Task"), "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" subf = subfolders(folder, join=False) assert 'imagesTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" assert 'imagesTs' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" assert 'labelsTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ "folder that starts with TaskXX and has the subfolders imagesTr, " \ "labelsTr and imagesTs" _ = [os.remove(i) for i in subfiles(folder, prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTr'), prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'labelsTr'), prefix=".")] _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTs'), prefix=".")]
def create_lists_from_splitted_dataset(base_folder_splitted): lists = [] json_file = join(base_folder_splitted, "dataset.json") with open(json_file) as jsn: d = json.load(jsn) training_files = d['training'] num_modalities = len(d['modality'].keys()) for tr in training_files: cur_pat = [] for mod in range(num_modalities): cur_pat.append( join(base_folder_splitted, "imagesTr", Path(tr['image']).parts[-1][:-7] + "_%04.0d.nii.gz" % mod)) cur_pat.append( join(base_folder_splitted, "labelsTr", Path(tr['label']).parts[-1])) lists.append(cur_pat) return lists, {int(i): d['modality'][str(i)] for i in d['modality'].keys()}
def plot_network_architecture(self): try: from batchgenerators.utilities.file_and_folder_operations import join import hiddenlayer as hl g = hl.build_graph(self.network, torch.rand((1, self.num_input_channels, *self.patch_size)).cuda(), transforms=None) g.save(join(self.output_folder, "network_architecture.pdf")) del g except Exception as e: self.print_to_log_file("Unable to plot network architecture:") self.print_to_log_file(e) finally: torch.cuda.empty_cache()
def run_preprocessing(self, num_threads): if os.path.isdir( join(self.preprocessed_output_folder, "gt_segmentations")): shutil.rmtree( join(self.preprocessed_output_folder, "gt_segmentations")) shutil.copytree( join(self.folder_with_cropped_data, "gt_segmentations"), join(self.preprocessed_output_folder, "gt_segmentations")) normalization_schemes = self.plans['normalization_schemes'] use_nonzero_mask_for_normalization = self.plans['use_mask_for_norm'] intensityproperties = self.plans['dataset_properties'][ 'intensityproperties'] preprocessor = PreprocessorFor2D(normalization_schemes, use_nonzero_mask_for_normalization, self.transpose_forward, intensityproperties) target_spacings = [ i["current_spacing"] for i in self.plans_per_stage.values() ] preprocessor.run(target_spacings, self.folder_with_cropped_data, self.preprocessed_output_folder, self.plans['data_identifier'], num_threads)
def evaluate_folder(folder_with_gts, folder_with_predictions, labels): """ writes a summary.json to folder_with_predictions :param folder_with_gts: :param folder_with_predictions: :return: """ files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False) files_pred = subfiles(folder_with_predictions, suffix=".nii.gz", join=False) assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions" assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts" test_ref_pairs = [(join(folder_with_predictions, i), join(folder_with_gts, i)) for i in files_pred] res = aggregate_scores(test_ref_pairs, json_output_file=join(folder_with_predictions, "summary.json"), num_threads=8, labels=labels) return res
def evaluate_folder(folder_with_gts: str, folder_with_predictions: str, labels: tuple, **metric_kwargs): """ writes a summary.json to folder_with_predictions :param folder_with_gts: folder where the ground truth segmentations are saved. Must be nifti files. :param folder_with_predictions: folder where the predicted segmentations are saved. Must be nifti files. :param labels: tuple of int with the labels in the dataset. For example (0, 1, 2, 3) for Task001_BrainTumour. :return: """ files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False) files_pred = subfiles(folder_with_predictions, suffix=".nii.gz", join=False) # assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions" # assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts" test_ref_pairs = [(join(folder_with_predictions, i), join(folder_with_gts, i)) for i in files_pred] res = aggregate_scores(test_ref_pairs, json_output_file=join(folder_with_predictions, "summary.json"), num_threads=8, labels=labels, **metric_kwargs) return res
def initialize(self, training=True, force_load_plans=False): """ For prediction of test cases just set training=False, this will prevent loading of training data and training batchgenerator initialization :param training: :return: """ maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.AutoAugment = AutoAugment( self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params) self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: self.print_to_log_file("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) self.print_to_log_file("done") else: self.print_to_log_file( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") self.tr_gen, self.val_gen = get_default_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network_optimizer_and_scheduler() # assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel)) self.was_initialized = True
def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): print("This model expects %d input modalities for each image" % expected_num_modalities) files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) maybe_case_ids = np.unique([i[:-12] for i in files]) remaining = deepcopy(files) missing = [] assert len( files ) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" # now check if all required files are present and that no unexpected files are remaining for c in maybe_case_ids: for n in range(expected_num_modalities): expected_output_file = c + "_%04.0d.nii.gz" % n if not isfile(join(input_folder, expected_output_file)): missing.append(expected_output_file) else: remaining.remove(expected_output_file) print( "Found %d unique case ids, here are some examples:" % len(maybe_case_ids), np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) print( "If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc" ) if len(remaining) > 0: print( "found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), np.random.choice(remaining, min(len(remaining), 10))) if len(missing) > 0: print("Some files are missing:") print(missing) raise RuntimeError("missing files in input_folder") return maybe_case_ids
def download_and_install_from_url(url): assert network_training_output_dir is not None, "Cannot install model because network_training_output_dir is not " \ "set (RESULTS_FOLDER missing as environment variable, see " \ "Installation instructions)" print('Downloading pretrained model from url:', url) import http.client http.client.HTTPConnection._http_vsn = 10 http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0' import os home = os.path.expanduser('~') random_number = int(time() * 1e7) tempfile = join(home, '.nnunetdownload_%s' % str(random_number)) try: download_file(url=url, local_filename=tempfile, chunk_size=8192 * 16) print("Download finished. Extracting...") install_model_from_zip_file(tempfile) print("Done") except Exception as e: raise e finally: if isfile(tempfile): os.remove(tempfile)
# network_output_dir_base = "/media/zhx/My Passport/lung_lobe_seg/galaNet_trained_models" # 网络存放处 raw_data_base_dir = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_raw_data" # 原始数据保存文件夹 preprocessed_output_dir = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_preprocessed" # 预处理后数据存放处 network_output_dir_base = "/data/fox_cloud/data/hexiang/lung_lobe_seg/galaNet_trained_models" # 网络存放处 # raw_data_base_dir = "/data0/mzs/zhx/lung_lobe_seg/galaNet_raw_data" # 原始数据保存文件夹 # preprocessed_output_dir = "/data0/mzs/zhx/lung_lobe_seg/galaNet_preprocessed" # 预处理后数据存放处 # network_output_dir_base = "/data0/mzs/zhx/lung_lobe_seg/galaNet_trained_models" # 网络存放处 # raw_data_base_dir = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_raw_data" # 原始数据保存文件夹 # preprocessed_output_dir = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_preprocessed" # 预处理后数据存放处 # network_output_dir_base = "/home/zenghexiang/data/zenghexiang/lung_lobe_seg/galaNet_trained_models" # 网络存放处 if raw_data_base_dir is not None: raw_dicom_data_dir = join(raw_data_base_dir, "dicom_data") # dicom原始数据存放文件夹 raw_cropped_data_dir = join(raw_data_base_dir, "cropped_data") # 原始数据被crop后存放的文件夹 raw_splited_dir = join(raw_data_base_dir, "splited_data") maybe_mkdir_p(raw_data_base_dir) maybe_mkdir_p(raw_cropped_data_dir) else: raise AssertionError( "Attention! raw_data_base_dir is not defined! Please set raw_data_base_dir in paths.py." ) if preprocessed_output_dir is not None: maybe_mkdir_p(preprocessed_output_dir) maybe_mkdir_p(join(preprocessed_output_dir, preprocessed_data_identifer)) maybe_mkdir_p(join(preprocessed_output_dir, preprocessed_net_inputs)) else:
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True): """ if debug=True then the temporary files generated for postprocessing determination will be kept """ current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params']['force_separate_z'] interpolation_order = self.plans['segmentation_export_params']['interpolation_order'] interpolation_order_z = self.plans['segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs['interpolation_order'] interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = {'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step_size': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'segmentation_export_kwargs': segmentation_export_kwargs, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError("We did not train with mirroring so you cannot do inference with mirroring enabled") mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(default_num_threads) results = [] for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(data[:-1], do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append(export_pool.starmap_async(save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, self.regions_class_order, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ) ) ) pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz")]) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(use_sliding_window)), json_author="Fabian", json_task=task, num_threads=default_num_threads) if run_postprocessing_on_folds: # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e self.network.train(current_mode)
def run_training(self): """ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first continued epoch with self.initial_lr we also need to make sure deep supervision in the network is enabled for training, thus the wrapper :return: """ self.maybe_update_lr( self.epoch ) # if we dont overwrite epoch then self.epoch+1 is used which is not what we # want at the start of the training if isinstance(self.network, DDP): net = self.network.module else: net = self.network ds = net.do_ds net.do_ds = True _ = self.tr_gen.next() _ = self.val_gen.next() if torch.cuda.is_available(): torch.cuda.empty_cache() self._maybe_init_amp() maybe_mkdir_p(self.output_folder) self.plot_network_architecture() if cudnn.benchmark and cudnn.deterministic: warn( "torch.backends.cudnn.deterministic is True indicating a deterministic training is desired. " "But torch.backends.cudnn.benchmark is True as well and this will prevent deterministic training! " "If you want deterministic then set benchmark=False") if not self.was_initialized: self.initialize(True) while self.epoch < self.max_num_epochs: self.print_to_log_file("\nepoch: ", self.epoch) epoch_start_time = time() train_losses_epoch = [] # train one epoch self.network.train() if self.use_progress_bar: with trange(self.num_batches_per_epoch) as tbar: for b in tbar: tbar.set_description("Epoch {}/{}".format( self.epoch + 1, self.max_num_epochs)) l = self.run_iteration(self.tr_gen, True) tbar.set_postfix(loss=l) train_losses_epoch.append(l) else: for _ in range(self.num_batches_per_epoch): l = self.run_iteration(self.tr_gen, True) train_losses_epoch.append(l) self.all_tr_losses.append(np.mean(train_losses_epoch)) self.print_to_log_file("train loss : %.4f" % self.all_tr_losses[-1]) with torch.no_grad(): # validation with train=False self.network.eval() val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False, True) val_losses.append(l) self.all_val_losses.append(np.mean(val_losses)) self.print_to_log_file("validation loss: %.4f" % self.all_val_losses[-1]) if self.also_val_in_tr_mode: self.network.train() # validation with train=True val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False) val_losses.append(l) self.all_val_losses_tr_mode.append(np.mean(val_losses)) self.print_to_log_file( "validation loss (train=True): %.4f" % self.all_val_losses_tr_mode[-1]) self.update_train_loss_MA( ) # needed for lr scheduler and stopping of training continue_training = self.on_epoch_end() epoch_end_time = time() if not continue_training: # allows for early stopping break self.epoch += 1 self.print_to_log_file("This epoch took %f s\n" % (epoch_end_time - epoch_start_time)) self.epoch -= 1 # if we don't do this we can get a problem with loading model_final_checkpoint. if self.save_final_checkpoint: self.save_checkpoint( join(self.output_folder, "model_final_checkpoint.model")) if self.local_rank == 0: # now we can delete latest as it will be identical with final if isfile(join(self.output_folder, "model_latest.model")): os.remove(join(self.output_folder, "model_latest.model")) if isfile(join(self.output_folder, "model_latest.model.pkl")): os.remove(join(self.output_folder, "model_latest.model.pkl")) net.do_ds = ds
def initialize(self, training=True, force_load_plans=False): """ :param training: :return: """ if not self.was_initialized: maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: if self.local_rank == 0: print("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) print("done") distributed.barrier() else: print( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") # setting weights for deep supervision losses net_numpool = len(self.net_num_pool_op_kernel_sizes) # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases # this gives higher resolution outputs more weight in the loss weights = np.array([1 / (2**i) for i in range(net_numpool)]) # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 mask = np.array([ True if i < net_numpool - 1 else False for i in range(net_numpool) ]) weights[~mask] = 0 weights = weights / weights.sum() self.ds_loss_weights = weights seeds_train = np.random.random_integers( 0, 99999, self.data_aug_params.get('num_threads')) seeds_val = np.random.random_integers( 0, 99999, max(self.data_aug_params.get('num_threads') // 2, 1)) print("seeds train", seeds_train) print("seeds_val", seeds_val) self.tr_gen, self.val_gen = get_moreDA_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params, deep_supervision_scales=self.deep_supervision_scales, seeds_train=seeds_train, seeds_val=seeds_val, pin_memory=self.pin_memory) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() self.network = DDP(self.network, device_ids=[self.local_rank]) else: self.print_to_log_file( 'self.was_initialized is True, not running self.initialize again' ) self.was_initialized = True
def initialize(self, training=True, force_load_plans=False): """ For prediction of test cases just set training=False, this will prevent loading of training data and training batchgenerator initialization :param training: :return: """ if not self.was_initialized: maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: if self.local_rank == 0: print("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) print("done") else: # we need to wait until worker 0 has finished unpacking npz_files = subfiles( self.folder_with_preprocessed_data, suffix=".npz", join=False) case_ids = [i[:-4] for i in npz_files] all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) while not all_present: print("worker", self.local_rank, "is waiting for unpacking") sleep(3) all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) # there is some slight chance that there may arise some error because dataloader are loading a file # that is still being written by worker 0. We ignore this for now an address it only if it becomes # relevant # (this can occur because while worker 0 writes the file is technically present so the other workers # will proceed and eventually try to read it) else: print( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") # setting weights for deep supervision losses net_numpool = len(self.net_num_pool_op_kernel_sizes) # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases # this gives higher resolution outputs more weight in the loss weights = np.array([1 / (2**i) for i in range(net_numpool)]) # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 mask = np.array([ True if i < net_numpool - 1 else False for i in range(net_numpool) ]) weights[~mask] = 0 weights = weights / weights.sum() self.ds_loss_weights = weights seeds_train = np.random.random_integers( 0, 99999, self.data_aug_params.get('num_threads')) seeds_val = np.random.random_integers( 0, 99999, max(self.data_aug_params.get('num_threads') // 2, 1)) print("seeds train", seeds_train) print("seeds_val", seeds_val) self.tr_gen, self.val_gen = get_moreDA_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params, deep_supervision_scales=self.deep_supervision_scales, seeds_train=seeds_train, seeds_val=seeds_val) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() self._maybe_init_amp() self.network = DDP(self.network) else: self.print_to_log_file( 'self.was_initialized is True, not running self.initialize again' ) self.was_initialized = True
def main(): parser = argparse.ArgumentParser() parser.add_argument( "-i", '--input_folder', help="Must contain all modalities for each patient in the correct" " order (same as training). Files must be named " "CASENAME_XXXX.nii.gz where XXXX is the modality " "identifier (0000, 0001, etc)", required=True) parser.add_argument('-o', "--output_folder", required=True, help="folder for saving predictions") parser.add_argument('-t', '--task_name', help='task name or task ID, required.', default=default_plans_identifier, required=True) parser.add_argument( '-tr', '--trainer_class_name', help= 'Name of the nnUNetTrainer used for 2D U-Net, full resolution 3D U-Net and low resolution ' 'U-Net. The default is %s. If you are running inference with the cascade and the folder ' 'pointed to by --lowres_segmentations does not contain the segmentation maps generated by ' 'the low resolution U-Net then the low resolution segmentation maps will be automatically ' 'generated. For this case, make sure to set the trainer class here that matches your ' '--cascade_trainer_class_name (this part can be ignored if defaults are used).' % default_trainer, required=False, default=default_trainer) parser.add_argument( '-ctr', '--cascade_trainer_class_name', help= "Trainer class name used for predicting the 3D full resolution U-Net part of the cascade." "Default is %s" % default_cascade_trainer, required=False, default=default_cascade_trainer) parser.add_argument( '-m', '--model', help= "2d, 3d_lowres, 3d_fullres or 3d_cascade_fullres. Default: 3d_fullres", default="3d_fullres", required=False) parser.add_argument( '-p', '--plans_identifier', help='do not touch this unless you know what you are doing', default=default_plans_identifier, required=False) parser.add_argument( '-f', '--folds', nargs='+', default='None', help= "folds to use for prediction. Default is None which means that folds will be detected " "automatically in the model output folder") parser.add_argument( '-z', '--save_npz', required=False, action='store_true', help= "use this if you want to ensemble these predictions with those of other models. Softmax " "probabilities will be saved as compressed numpy arrays in output_folder and can be " "merged between output_folders with nnUNet_ensemble_predictions") parser.add_argument( '-l', '--lowres_segmentations', required=False, default='None', help= "if model is the highres stage of the cascade then you can use this folder to provide " "predictions from the low resolution 3D U-Net. If this is left at default, the " "predictions will be generated automatically (provided that the 3D low resolution U-Net " "network weights are present") parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " "the folder over several GPUs. If you " "want to use n GPUs to predict this " "folder you need to run this command " "n times with --part_id=0, ... n-1 and " "--num_parts=n (each with a different " "GPU (for example via " "CUDA_VISIBLE_DEVICES=X)") parser.add_argument("--num_parts", type=int, required=False, default=1, help="Used to parallelize the prediction of " "the folder over several GPUs. If you " "want to use n GPUs to predict this " "folder you need to run this command " "n times with --part_id=0, ... n-1 and " "--num_parts=n (each with a different " "GPU (via " "CUDA_VISIBLE_DEVICES=X)") parser.add_argument( "--num_threads_preprocessing", required=False, default=6, type=int, help= "Determines many background processes will be used for data preprocessing. Reduce this if you " "run into out of memory (RAM) problems. Default: 6") parser.add_argument( "--num_threads_nifti_save", required=False, default=2, type=int, help= "Determines many background processes will be used for segmentation export. Reduce this if you " "run into out of memory (RAM) problems. Default: 2") parser.add_argument( "--disable_tta", required=False, default=False, action="store_true", help= "set this flag to disable test time data augmentation via mirroring. Speeds up inference " "by roughly factor 4 (2D) or 8 (3D)") parser.add_argument( "--overwrite_existing", required=False, default=False, action="store_true", help= "Set this flag if the target folder contains predictions that you would like to overwrite" ) parser.add_argument("--mode", type=str, default="normal", required=False, help="Hands off!") parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True. " "Do not touch.") parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") # parser.add_argument("--interp_order", required=False, default=3, type=int, # help="order of interpolation for segmentations, has no effect if mode=fastest. Do not touch this.") # parser.add_argument("--interp_order_z", required=False, default=0, type=int, # help="order of interpolation along z is z is done differently. Do not touch this.") # parser.add_argument("--force_separate_z", required=False, default="None", type=str, # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest. " # "Do not touch this.") parser.add_argument( '-chk', help='checkpoint name, default: model_final_checkpoint', required=False, default='model_final_checkpoint') parser.add_argument( '--disable_mixed_precision', default=False, action='store_true', required=False, help= 'Predictions are done with mixed precision by default. This improves speed and reduces ' 'the required vram. If you want to disable mixed precision you can set this flag. Note ' 'that yhis is not recommended (mixed precision is ~2x faster!)') ### ----------- added by Camila parser.add_argument( '--disable_sliding_window', default=False, action='store_true', required=False, help='Disable sliding window to predict the whole image') ### ----------- end added by Camila args = parser.parse_args() input_folder = args.input_folder output_folder = args.output_folder part_id = args.part_id num_parts = args.num_parts folds = args.folds save_npz = args.save_npz lowres_segmentations = args.lowres_segmentations num_threads_preprocessing = args.num_threads_preprocessing num_threads_nifti_save = args.num_threads_nifti_save disable_tta = args.disable_tta step_size = args.step_size # interp_order = args.interp_order # interp_order_z = args.interp_order_z # force_separate_z = args.force_separate_z overwrite_existing = args.overwrite_existing mode = args.mode all_in_gpu = args.all_in_gpu model = args.model trainer_class_name = args.trainer_class_name cascade_trainer_class_name = args.cascade_trainer_class_name ### ----------- added by Camila disable_sliding_window = args.disable_sliding_window ### ----------- end added by Camila task_name = args.task_name if not task_name.startswith("Task"): task_id = int(task_name) task_name = convert_id_to_task_name(task_id) assert model in ["2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"], "-m must be 2d, 3d_lowres, 3d_fullres or " \ "3d_cascade_fullres" # if force_separate_z == "None": # force_separate_z = None # elif force_separate_z == "False": # force_separate_z = False # elif force_separate_z == "True": # force_separate_z = True # else: # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) if lowres_segmentations == "None": lowres_segmentations = None if isinstance(folds, list): if folds[0] == 'all' and len(folds) == 1: pass else: folds = [int(i) for i in folds] elif folds == "None": folds = None else: raise ValueError("Unexpected value for argument folds") assert all_in_gpu in ['None', 'False', 'True'] if all_in_gpu == "None": all_in_gpu = None elif all_in_gpu == "True": all_in_gpu = True elif all_in_gpu == "False": all_in_gpu = False # we need to catch the case where model is 3d cascade fullres and the low resolution folder has not been set. # In that case we need to try and predict with 3d low res first if model == "3d_cascade_fullres" and lowres_segmentations is None: print( "lowres_segmentations is None. Attempting to predict 3d_lowres first..." ) assert part_id == 0 and num_parts == 1, "if you don't specify a --lowres_segmentations folder for the " \ "inference of the cascade, custom values for part_id and num_parts " \ "are not supported. If you wish to have multiple parts, please " \ "run the 3d_lowres inference first (separately)" model_folder_name = join( network_training_output_dir, "3d_lowres", task_name, trainer_class_name + "__" + args.plans_identifier) assert isdir( model_folder_name ), "model output folder not found. Expected: %s" % model_folder_name lowres_output_folder = join(output_folder, "3d_lowres_predictions") predict_from_folder(model_folder_name, input_folder, lowres_output_folder, folds, False, num_threads_preprocessing, num_threads_nifti_save, None, part_id, num_parts, not disable_tta, overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, mixed_precision=not args.disable_mixed_precision, step_size=step_size, disable_sliding_window=disable_sliding_window) lowres_segmentations = lowres_output_folder torch.cuda.empty_cache() print("3d_lowres done") if model == "3d_cascade_fullres": trainer = cascade_trainer_class_name else: trainer = trainer_class_name model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + args.plans_identifier) print("using model stored in ", model_folder_name) assert isdir( model_folder_name ), "model output folder not found. Expected: %s" % model_folder_name predict_from_folder(model_folder_name, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, part_id, num_parts, not disable_tta, overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, mixed_precision=not args.disable_mixed_precision, step_size=step_size, checkpoint_name=args.chk, disable_sliding_window=disable_sliding_window)
def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True, use_gaussian=True, compute_global_dice=True, override=True, validation_folder_name='validation'): """ 2018_12_05: I added global accumulation of TP, FP and FN for the validation in here. This is because I believe that selecting models is easier when computing the Dice globally instead of independently for each case and then averaging over cases. The Lung dataset in particular is very unstable because of the small size of the Lung Lesions. My theory is that even though the global Dice is different than the acutal target metric it is still a good enough substitute that allows us to get a lot more stable results when rerunning the same experiment twice. FYI: computer vision community uses the global jaccard for the evaluation of Cityscapes etc, not the per-image jaccard averaged over images. The reason I am accumulating TP/FP/FN here and not from the nifti files (which are used by our Evaluator) is that all predictions made here will have identical voxel spacing whereas voxel spacings in the nifti files will be different (which we could compensate for by using the volume per voxel but that would require the evaluator to understand spacings which is does not at this point) :param do_mirroring: :param use_train_mode: :param mirror_axes: :param tiled: :param tile_in_z: :param step: :param use_nifti: :param save_softmax: :param use_gaussian: :param use_temporal_models: :return: """ assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(4) results = [] global_tp = OrderedDict() global_fp = OrderedDict() global_fn = OrderedDict() for k in self.dataset_val.keys(): print(k) properties = self.dataset[k]['properties'] fname = properties['list_of_data_files'][0].split("/")[-1][:-12] if override or (not isfile(join(output_folder, fname + ".nii.gz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_softmax( data[:-1], do_mirroring, 1, use_train_mode, 1, mirror_axes, tiled, True, step, self.patch_size, use_gaussian=use_gaussian) if compute_global_dice: predicted_segmentation = softmax_pred.argmax(0) gt_segmentation = data[-1] labels = properties['classes'] labels = [int(i) for i in labels if i > 0] for l in labels: if l not in global_fn.keys(): global_fn[l] = 0 if l not in global_fp.keys(): global_fp[l] = 0 if l not in global_tp.keys(): global_tp[l] = 0 conf = ConfusionMatrix( (predicted_segmentation == l).astype(int), (gt_segmentation == l).astype(int)) conf.compute() global_fn[l] += conf.fn global_fp[l] += conf.fp global_tp[l] += conf.tp softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, 3, None, None, None, softmax_fname, None), ))) # save_segmentation_nifti_from_softmax(softmax_pred, join(output_folder, fname + ".nii.gz"), # properties, 3, None, None, # None, # softmax_fname, # None) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) _ = [i.get() for i in results] print("finished prediction, now evaluating...") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores( pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(tiled)), json_author="Fabian", json_task=task, num_threads=3) if compute_global_dice: global_dice = OrderedDict() all_labels = list(global_fn.keys()) for l in all_labels: global_dice[int(l)] = float( 2 * global_tp[l] / (2 * global_tp[l] + global_fn[l] + global_fp[l])) write_json(global_dice, join(output_folder, "global_dice.json"))
# You need to set the following folders: base, preprocessing_output_dir and network_training_output_dir. See below for details. # do not modify these unless you know what you are doing my_output_identifier = "nnUNet" default_plans_identifier = "nnUNetPlans" default_data_identifier = 'nnUNet' try: # base is the folder where the raw data is stored. You just need to set base only, the others will be created # automatically (they are subfolders of base). # Here I use environment variables to set the base folder. Environment variables allow me to use the same code on # different systems (and our compute cluster) base = '/vol/medic02/users/zl9518/KiTS19/kits19/nnunet_output' # raw_dataset_dir = join(base, "nnUNet_raw") raw_dataset_dir = base splitted_4d_output_dir = join(base, "nnUNet_raw_splitted") cropped_output_dir = join(base, "nnUNet_raw_cropped") maybe_mkdir_p(splitted_4d_output_dir) maybe_mkdir_p(raw_dataset_dir) maybe_mkdir_p(raw_dataset_dir) except KeyError: cropped_output_dir = splitted_4d_output_dir = raw_dataset_dir = base = None # preprocessing_output_dir is where the preprocessed data is stored. If you run a training I very strongly recommend # this is a SSD! try: # preprocessing_output_dir = os.environ ['nnUNet_preprocessed'] preprocessing_output_dir = join(base, "nnUNet_preprocessed") except KeyError: preprocessing_output_dir = None
def __init__(self, folder_with_cropped_data, preprocessed_output_folder): super(ExperimentPlanner2D, self).__init__(folder_with_cropped_data, preprocessed_output_folder) self.data_identifier = default_data_identifier + "_2D" self.plans_fname = join(self.preprocessed_output_folder, default_plans_identifier + "_plans_2D.pkl")
def analyze_dataset(task_string, override=False, collect_intensityproperties=True, num_processes=default_num_threads): cropped_out_dir = join(nnUNet_cropped_data, task_string) dataset_analyzer = DatasetAnalyzer(cropped_out_dir, overwrite=override, num_processes=num_processes) _ = dataset_analyzer.analyze_dataset(collect_intensityproperties)
Description: change to local ''' import yaml from batchgenerators.utilities.file_and_folder_operations import maybe_mkdir_p, join config = yaml.load(open('./configs/default.yaml', 'r'), Loader=yaml.FullLoader) default_plans_identifier = config['default_plans_identifier'] default_data_identifier = config['default_data_identifier'] default_trainer = config['default_trainer'] default_cascade_trainer = config['default_cascade_trainer'] DATASET_DIR = config['DATASET_DIR'] my_output_identifier = config['output_identifier'] pretrain_identifier = config['pretrain_identifier'] base = join(DATASET_DIR, "nnUNet_raw") if DATASET_DIR else None preprocessing_output_dir = join(DATASET_DIR, "nnUNet_preprocessed") if DATASET_DIR else None network_training_output_dir_base = join( DATASET_DIR, "nnUNet_trained_models") if DATASET_DIR else None if base is not None: maybe_mkdir_p(base) nnUNet_raw_data = join(base, "nnUNet_raw_data") nnUNet_cropped_data = join(base, "nnUNet_cropped_data") maybe_mkdir_p(nnUNet_raw_data) maybe_mkdir_p(nnUNet_cropped_data) else: print( "the path of nnUNet_raw_data_base is not defined, please check configs.yaml." )
args = parser.parse_args() input_folder = args.input_folder output_folder = args.output_folder part_id = args.part_id num_parts = args.num_parts folds = args.folds save_npz = args.save_npz lowres_segmentations = args.lowres_segmentations num_threads_preprocessing = args.num_threads_preprocessing num_threads_nifti_save = args.num_threads_nifti_save tta = args.tta overwrite = args.overwrite_existing output_folder_name = join( network_training_output_dir, args.model, args.task_name, args.nnunet_trainer + "__" + args.plans_identifier) print("using model stored in ", output_folder_name) assert isdir(output_folder_name ), "model output folder not found: %s" % output_folder_name if lowres_segmentations == "None": lowres_segmentations = None if isinstance(folds, list): if folds[0] == 'all' and len(folds) == 1: pass else: folds = [int(i) for i in folds] elif folds == "None": folds = None
def initialize(self, training=True, force_load_plans=False): if not self.was_initialized: maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() ################# Here we wrap the loss for deep supervision ############ # we need to know the number of outputs of the network net_numpool = len(self.net_num_pool_op_kernel_sizes) # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases # this gives higher resolution outputs more weight in the loss weights = np.array([1 / (2 ** i) for i in range(net_numpool)]) # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 mask = np.array([True if i < net_numpool - 1 else False for i in range(net_numpool)]) weights[~mask] = 0 weights = weights / weights.sum() # now wrap the loss # self.loss = MultipleOutputLoss2(self.loss, weights) ################# END ################### self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: print("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) print("done") else: print( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") self.tr_gen, self.val_gen = get_insaneDA_augmentation(self.dl_tr, self.dl_val, self.data_aug_params[ 'patch_size_for_spatialtransform'], self.data_aug_params, deep_supervision_scales=self.deep_supervision_scales, pin_memory=self.pin_memory) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() assert isinstance(self.network, (SegmentationNetwork, nn.DataParallel)) else: self.print_to_log_file('self.was_initialized is True, not running self.initialize again') self.was_initialized = True
# -*- coding: utf-8 -*- """ Created on Sat Jul 10 16:11:54 2021 @author: linhai """ import sys import inspect import os from pathlib import Path from batchgenerators.utilities.file_and_folder_operations import join, isdir, maybe_mkdir_p, subfiles, subdirs, isfile #print (sys.path) curDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parDir = os.path.dirname(curDir) sys.path.insert(0, parDir) #sys.path.insert(0, ) p1 = 'C:\\Research\\IMA_on_segmentation\\nnUnet\\nnUNet\\rawData\\nnUNet_raw_data\\Task05_Prostate\\imagesTr' p2 = 'C:/Research/IMA_on_segmentation/nnUnet/nnUNet/rawData/nnUNet_raw_data\\Task05_Prostate' p3 = 'C:\\Research\\IMA_on_segmentation\\aaa' p4 = 'C:/Research/IMA_on_segmentation/333/aaab' print (os.path.join(p1, "aaa")+"\\") print (isdir(join(p1, "aaa")+"\\")) print(p1) print (isdir(p2)) #os.mkdir(p4) maybe_mkdir_p(p4) #os.makedirs(p4, exist_ok=True)
default_trainer = "nnUNetTrainerV2" default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes" """ PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP """ # base = os.environ['nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys() else None # preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None # network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None base = "/home1/mksun/nnunet_dataset/nnUNet_raw/" #raw data path preprocessing_output_dir = "/home1/mksun/nnunet_dataset/nnUNet_preprocessed/" network_training_output_dir_base = "/home1/mksun/experiment/ckpt/TMI/checkpoint" image_validation_output_dir = "/home1/mksun/experiment/image/" if base is not None: nnUNet_raw_data = join(base, "nnUNet_raw_splitted") nnUNet_cropped_data = join(base, "nnUNet_cropped_data") maybe_mkdir_p(nnUNet_raw_data) maybe_mkdir_p(nnUNet_cropped_data) else: print( "nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files " "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like " "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly." ) nnUNet_cropped_data = nnUNet_raw_data = None if preprocessing_output_dir is not None: maybe_mkdir_p(preprocessing_output_dir) else: print(
def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, unpack_data=True, deterministic=True, fp16=False, lam=2, gpu="0", save_dir=None): """ :param deterministic: :param fold: can be either [0 ... 5) for cross-validation, 'all' to train on all available training data or None if you wish to load some checkpoint and do inference only :param plans_file: the pkl file generated by preprocessing. This file will determine all design choices :param subfolder_with_preprocessed_data: must be a subfolder of dataset_directory (just the name of the folder, not the entire path). This is where the preprocessed data lies that will be used for network training. We made this explicitly available so that differently preprocessed data can coexist and the user can choose what to use. Can be None if you are doing inference only. :param output_folder: where to store parameters, plot progress and to the validation :param dataset_directory: the parent directory in which the preprocessed Task data is stored. This is required because the split information is stored in this directory. For running prediction only this input is not required and may be set to None :param batch_dice: compute dice loss for each sample and average over all samples in the batch or pretend the batch is a pseudo volume? :param stage: The plans file may contain several stages (used for lowres / highres / pyramid). Stage must be specified for training: if stage 1 exists then stage 1 is the high resolution stage, otherwise it's 0 :param unpack_data: if False, npz preprocessed data will not be unpacked to npy. This consumes less space but is considerably slower! Running unpack_data=False with 2d should never be done! IMPORTANT: If you inherit from nnUNetTrainer and the init args change then you need to redefine self.init_args in your init accordingly. Otherwise checkpoints won't load properly! """ super(nnUNetTrainer, self).__init__(deterministic, fp16, lam, gpu, save_dir) self.unpack_data = unpack_data self.init_args = (plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, deterministic, fp16) # set through arguments from init self.stage = stage self.experiment_name = self.__class__.__name__ self.plans_file = plans_file self.output_folder = output_folder self.dataset_directory = dataset_directory self.output_folder_base = self.output_folder self.fold = fold self.plans = None # if we are running inference only then the self.dataset_directory is set (due to checkpoint loading) but it # irrelevant if self.dataset_directory is not None and isdir( self.dataset_directory): self.gt_niftis_folder = join(self.dataset_directory, "gt_segmentations") else: self.gt_niftis_folder = None self.folder_with_preprocessed_data = None # set in self.initialize() self.dl_tr = self.dl_val = None self.num_input_channels = self.num_classes = self.net_pool_per_axis = self.patch_size = self.batch_size = \ self.threeD = self.base_num_features = self.intensity_properties = self.normalization_schemes = \ self.net_num_pool_op_kernel_sizes = self.net_conv_kernel_sizes = None # loaded automatically from plans_file self.basic_generator_patch_size = self.data_aug_params = self.transpose_forward = self.transpose_backward = None self.batch_dice = batch_dice self.loss = DC_and_CE_loss( { 'batch_dice': self.batch_dice, 'smooth': 1e-5, 'do_bg': False, 'square': False }, {}) self.online_eval_foreground_dc = [] self.online_eval_tp = [] self.online_eval_fp = [] self.online_eval_fn = [] self.classes = self.do_dummy_2D_aug = self.use_mask_for_norm = self.only_keep_largest_connected_component = \ self.min_region_size_per_class = self.min_size_per_class = None self.inference_pad_border_mode = "constant" self.inference_pad_kwargs = {'constant_values': 0} self.update_fold(fold) self.pad_all_sides = None self.lr_scheduler_eps = 1e-3 self.lr_scheduler_patience = 30 self.initial_lr = 3e-4 self.weight_decay = 3e-5 self.oversample_foreground_percent = 0.33
my_output_identifier = "nnUNet" default_plans_identifier = "nnUNetPlansv2.1" default_data_identifier = 'nnUNet' default_trainer = "nnUNetTrainerV2" default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes" """ PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP """ base = os.environ['nnUNet_raw_data_base'] if "nnUNet_raw_data_base" in os.environ.keys() else None preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None if base is not None: nnUNet_raw_data = join(base, "nnUNet_raw_data") nnUNet_cropped_data = join(base, "nnUNet_cropped_data") maybe_mkdir_p(nnUNet_raw_data) maybe_mkdir_p(nnUNet_cropped_data) else: print("nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files " "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like " "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly.") nnUNet_cropped_data = nnUNet_raw_data = None if preprocessing_output_dir is not None: maybe_mkdir_p(preprocessing_output_dir) else: print("nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing " "or training. If this is not intended, please read nnunet/pathy.md for information on how to set this up.") preprocessing_output_dir = None
# You need to set the following folders: base, preprocessing_output_dir and network_training_output_dir. See below for details. # do not modify these unless you know what you are doing my_output_identifier = "nnUNet" default_plans_identifier = "nnUNetPlans" default_data_identifier = 'nnUNet' try: # base is the folder where the raw data is stored. You just need to set base only, the others will be created # automatically (they are subfolders of base). # Here I use environment variables to set the base folder. Environment variables allow me to use the same code on # different systems (and our compute cluster). You can replace this line with something like: # base = "/path/to/my/folder" base = os.environ['nnUNet_base'] raw_dataset_dir = join(base, "nnUNet_raw") splitted_4d_output_dir = join(base, "nnUNet_raw_splitted") cropped_output_dir = join(base, "nnUNet_raw_cropped") maybe_mkdir_p(splitted_4d_output_dir) maybe_mkdir_p(raw_dataset_dir) maybe_mkdir_p(cropped_output_dir) except KeyError: cropped_output_dir = splitted_4d_output_dir = raw_dataset_dir = base = None # preprocessing_output_dir is where the preprocessed data is stored. If you run a training I very strongly recommend # this is a SSD! try: # Here I use environment variables to set the folder. Environment variables allow me to use the same code on # different systems (and our compute cluster). You can replace this line with something like: # preprocessing_output_dir = "/path/to/my/folder_with_preprocessed_data" preprocessing_output_dir = os.environ['nnUNet_preprocessed']
# do not modify these unless you know what you are doing my_output_identifier = "nnUNet" default_plans_identifier = "nnUNetPlansv2.1" default_data_identifier = 'nnUNet' default_trainer = "nnUNetTrainerV2" default_cascade_trainer = "nnUNetTrainerV2CascadeFullRes" """ PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP """ base = '.../COVID-19-CT-Seg/nnunet2_COVID19_FAB' #preprocessing_output_dir = os.environ['nnUNet_preprocessed'] if "nnUNet_preprocessed" in os.environ.keys() else None #network_training_output_dir_base = os.path.join(os.environ['RESULTS_FOLDER']) if "RESULTS_FOLDER" in os.environ.keys() else None preprocessing_output_dir = join(base, 'nnUNet_preprocessed') network_training_output_dir_base = join(base, 'RESULTS_FOLDER') if base is not None: nnUNet_raw_data = join(base, "nnUNet_raw_data") nnUNet_cropped_data = join(base, "nnUNet_cropped_data") maybe_mkdir_p(nnUNet_raw_data) maybe_mkdir_p(nnUNet_cropped_data) else: print("nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files " "are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like " "this. If this is not intended, please read nnunet/paths.md for information on how to set this up properly.") nnUNet_cropped_data = nnUNet_raw_data = None if preprocessing_output_dir is not None: maybe_mkdir_p(preprocessing_output_dir) else: