def download_and_install_from_url(url): assert network_training_output_dir is not None, "Cannot install model because network_training_output_dir is not " \ "set (RESULTS_FOLDER missing as environment variable, see " \ "Installation instructions)" import http.client http.client.HTTPConnection._http_vsn = 10 http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0' import os home = os.path.expanduser('~') random_number = int(time() * 1e7) tempfile = home + "/" + '.tuframeworkdownload_%s' % str(random_number) try: with open(tempfile, 'wb') as f: with requests.get(url, stream=True) as r: r.raise_for_status() for chunk in r.iter_content(chunk_size=8192 * 16): # If you have chunk encoded response uncomment if # and set chunk_size parameter to None. # if chunk: f.write(chunk) print("Download finished. Extracting...") install_model_from_zip_file(tempfile) print("Done") except Exception as e: raise e finally: if isfile(tempfile): os.remove(tempfile)
def split_4d(input_folder, num_processes=default_num_threads, overwrite_task_output_id=None): assert isdir(join(input_folder, "imagesTr")) and isdir(join(input_folder, "labelsTr")) and \ isfile(join(input_folder, "dataset.json")), \ "The input folder must be a valid Task folder from the Medical Segmentation Decathlon with at least the " \ "imagesTr and labelsTr subfolders and the dataset.json file" while input_folder.endswith("/") or input_folder.endswith("\\"): input_folder = input_folder[:-1] full_task_name = Path(input_folder).parts[-1] assert full_task_name.startswith( "Task" ), "The input folder must point to a folder that starts with TaskXX_" first_underscore = full_task_name.find("_") assert first_underscore == 6, "Input folder start with TaskXX with XX being a 3-digit id: 00, 01, 02 etc" input_task_id = int(full_task_name[4:6]) if overwrite_task_output_id is None: overwrite_task_output_id = input_task_id task_name = full_task_name[7:] output_folder = join(nnUNet_raw_data, "Task%03.0d_" % overwrite_task_output_id + task_name) if isdir(output_folder): shutil.rmtree(output_folder) files = [] output_dirs = [] maybe_mkdir_p(output_folder) for subdir in ["imagesTr", "imagesTs"]: curr_out_dir = join(output_folder, subdir) if not isdir(curr_out_dir): maybe_mkdir_p(curr_out_dir) curr_dir = join(input_folder, subdir) nii_files = [ join(curr_dir, i) for i in os.listdir(curr_dir) if i.endswith(".nii.gz") ] nii_files.sort() for n in nii_files: files.append(n) output_dirs.append(curr_out_dir) shutil.copytree(join(input_folder, "labelsTr"), join(output_folder, "labelsTr")) p = Pool(num_processes) p.starmap(split_4d_nifti, zip(files, output_dirs)) p.close() p.join() shutil.copy(join(input_folder, "dataset.json"), output_folder)
def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): print("This model expects %d input modalities for each image" % expected_num_modalities) files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) maybe_case_ids = np.unique([i[:-12] for i in files]) remaining = deepcopy(files) missing = [] assert len( files ) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" # now check if all required files are present and that no unexpected files are remaining for c in maybe_case_ids: for n in range(expected_num_modalities): expected_output_file = c + "_%04.0d.nii.gz" % n if not isfile(join(input_folder, expected_output_file)): missing.append(expected_output_file) else: remaining.remove(expected_output_file) print( "Found %d unique case ids, here are some examples:" % len(maybe_case_ids), np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) print( "If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc" ) if len(remaining) > 0: print( "found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), np.random.choice(remaining, min(len(remaining), 10))) if len(missing) > 0: print("Some files are missing:") print(missing) raise RuntimeError("missing files in input_folder") return maybe_case_ids
def download_and_install_from_url(url): assert network_training_output_dir is not None, "Cannot install model because network_training_output_dir is not " \ "set (RESULTS_FOLDER missing as environment variable, see " \ "Installation instructions)" print('Downloading pretrained model from url:', url) import http.client http.client.HTTPConnection._http_vsn = 10 http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0' import os home = os.path.expanduser('~') random_number = int(time() * 1e7) tempfile = join(home, '.nnunetdownload_%s' % str(random_number)) try: download_file(url=url, local_filename=tempfile, chunk_size=8192 * 16) print("Download finished. Extracting...") install_model_from_zip_file(tempfile) print("Done") except Exception as e: raise e finally: if isfile(tempfile): os.remove(tempfile)
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True): if isinstance(self.network, DDP): net = self.network.module else: net = self.network ds = net.do_ds net.do_ds = False current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] # predictions as they come from the network go here output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) # this is for debug purposes my_input_args = { 'do_mirroring': do_mirroring, 'use_sliding_window': use_sliding_window, 'step_size': step_size, 'save_softmax': save_softmax, 'use_gaussian': use_gaussian, 'overwrite': overwrite, 'validation_folder_name': validation_folder_name, 'debug': debug, 'all_in_gpu': all_in_gpu, 'segmentation_export_kwargs': segmentation_export_kwargs, } save_json(my_input_args, join(output_folder, "validation_args.json")) if do_mirroring: if not self.data_aug_params['do_mirror']: raise RuntimeError( "We did not train with mirroring so you cannot do inference with mirroring enabled" ) mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(default_num_threads) results = [] all_keys = list(self.dataset_val.keys()) my_keys = all_keys[self.local_rank::dist.get_world_size()] # we cannot simply iterate over all_keys because we need to know pred_gt_tuples and valid_labels of all cases # for evaluation (which is done by local rank 0) for k in my_keys: properties = load_pickle(self.dataset[k]['properties_file']) fname = properties['list_of_data_files'][0].split("/")[-1][:-12] pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) if k in my_keys: if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ (save_softmax and not isfile(join(output_folder, fname + ".npz"))): data = np.load(self.dataset[k]['data_file'])['data'] print(k, data.shape) data[-1][data[-1] == -1] = 0 softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data[:-1], do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in self.transpose_backward]) if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > ( 2e9 / 4 * 0.85): # *0.85 just to be save np.save(join(output_folder, fname + ".npy"), softmax_pred) softmax_pred = join(output_folder, fname + ".npy") results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, self.regions_class_order, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) _ = [i.get() for i in results] self.print_to_log_file("finished prediction") distributed.barrier() if self.local_rank == 0: # evaluate raw predictions self.print_to_log_file("evaluation of raw predictions") task = self.dataset_directory.split("/")[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name + " val tiled %s" % (str(use_sliding_window)), json_author="Fabian", json_task=task, num_threads=default_num_threads) if run_postprocessing_on_folds: # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing( self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 e = None while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError as e: attempts += 1 sleep(1) if not success: print("Could not copy gt nifti file %s into folder %s" % (f, gt_nifti_folder)) if e is not None: raise e self.network.train(current_mode) net.do_ds = ds
def run_training(self): """ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first continued epoch with self.initial_lr we also need to make sure deep supervision in the network is enabled for training, thus the wrapper :return: """ self.maybe_update_lr( self.epoch ) # if we dont overwrite epoch then self.epoch+1 is used which is not what we # want at the start of the training if isinstance(self.network, DDP): net = self.network.module else: net = self.network ds = net.do_ds net.do_ds = True _ = self.tr_gen.next() _ = self.val_gen.next() if torch.cuda.is_available(): torch.cuda.empty_cache() self._maybe_init_amp() maybe_mkdir_p(self.output_folder) self.plot_network_architecture() if cudnn.benchmark and cudnn.deterministic: warn( "torch.backends.cudnn.deterministic is True indicating a deterministic training is desired. " "But torch.backends.cudnn.benchmark is True as well and this will prevent deterministic training! " "If you want deterministic then set benchmark=False") if not self.was_initialized: self.initialize(True) while self.epoch < self.max_num_epochs: self.print_to_log_file("\nepoch: ", self.epoch) epoch_start_time = time() train_losses_epoch = [] # train one epoch self.network.train() if self.use_progress_bar: with trange(self.num_batches_per_epoch) as tbar: for b in tbar: tbar.set_description("Epoch {}/{}".format( self.epoch + 1, self.max_num_epochs)) l = self.run_iteration(self.tr_gen, True) tbar.set_postfix(loss=l) train_losses_epoch.append(l) else: for _ in range(self.num_batches_per_epoch): l = self.run_iteration(self.tr_gen, True) train_losses_epoch.append(l) self.all_tr_losses.append(np.mean(train_losses_epoch)) self.print_to_log_file("train loss : %.4f" % self.all_tr_losses[-1]) with torch.no_grad(): # validation with train=False self.network.eval() val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False, True) val_losses.append(l) self.all_val_losses.append(np.mean(val_losses)) self.print_to_log_file("validation loss: %.4f" % self.all_val_losses[-1]) if self.also_val_in_tr_mode: self.network.train() # validation with train=True val_losses = [] for b in range(self.num_val_batches_per_epoch): l = self.run_iteration(self.val_gen, False) val_losses.append(l) self.all_val_losses_tr_mode.append(np.mean(val_losses)) self.print_to_log_file( "validation loss (train=True): %.4f" % self.all_val_losses_tr_mode[-1]) self.update_train_loss_MA( ) # needed for lr scheduler and stopping of training continue_training = self.on_epoch_end() epoch_end_time = time() if not continue_training: # allows for early stopping break self.epoch += 1 self.print_to_log_file("This epoch took %f s\n" % (epoch_end_time - epoch_start_time)) self.epoch -= 1 # if we don't do this we can get a problem with loading model_final_checkpoint. if self.save_final_checkpoint: self.save_checkpoint( join(self.output_folder, "model_final_checkpoint.model")) if self.local_rank == 0: # now we can delete latest as it will be identical with final if isfile(join(self.output_folder, "model_latest.model")): os.remove(join(self.output_folder, "model_latest.model")) if isfile(join(self.output_folder, "model_latest.model.pkl")): os.remove(join(self.output_folder, "model_latest.model.pkl")) net.do_ds = ds
def initialize(self, training=True, force_load_plans=False): """ For prediction of test cases just set training=False, this will prevent loading of training data and training batchgenerator initialization :param training: :return: """ if not self.was_initialized: maybe_mkdir_p(self.output_folder) if force_load_plans or (self.plans is None): self.load_plans_file() self.process_plans(self.plans) self.setup_DA_params() self.folder_with_preprocessed_data = join( self.dataset_directory, self.plans['data_identifier'] + "_stage%d" % self.stage) if training: self.dl_tr, self.dl_val = self.get_basic_generators() if self.unpack_data: if self.local_rank == 0: print("unpacking dataset") unpack_dataset(self.folder_with_preprocessed_data) print("done") else: # we need to wait until worker 0 has finished unpacking npz_files = subfiles( self.folder_with_preprocessed_data, suffix=".npz", join=False) case_ids = [i[:-4] for i in npz_files] all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) while not all_present: print("worker", self.local_rank, "is waiting for unpacking") sleep(3) all_present = all([ isfile( join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids ]) # there is some slight chance that there may arise some error because dataloader are loading a file # that is still being written by worker 0. We ignore this for now an address it only if it becomes # relevant # (this can occur because while worker 0 writes the file is technically present so the other workers # will proceed and eventually try to read it) else: print( "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " "will wait all winter for your model to finish!") # setting weights for deep supervision losses net_numpool = len(self.net_num_pool_op_kernel_sizes) # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases # this gives higher resolution outputs more weight in the loss weights = np.array([1 / (2**i) for i in range(net_numpool)]) # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 mask = np.array([ True if i < net_numpool - 1 else False for i in range(net_numpool) ]) weights[~mask] = 0 weights = weights / weights.sum() self.ds_loss_weights = weights seeds_train = np.random.random_integers( 0, 99999, self.data_aug_params.get('num_threads')) seeds_val = np.random.random_integers( 0, 99999, max(self.data_aug_params.get('num_threads') // 2, 1)) print("seeds train", seeds_train) print("seeds_val", seeds_val) self.tr_gen, self.val_gen = get_moreDA_augmentation( self.dl_tr, self.dl_val, self.data_aug_params['patch_size_for_spatialtransform'], self.data_aug_params, deep_supervision_scales=self.deep_supervision_scales, seeds_train=seeds_train, seeds_val=seeds_val) self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), also_print_to_console=False) self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), also_print_to_console=False) else: pass self.initialize_network() self.initialize_optimizer_and_scheduler() self._maybe_init_amp() self.network = DDP(self.network) else: self.print_to_log_file( 'self.was_initialized is True, not running self.initialize again' ) self.was_initialized = True