Пример #1
0
def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files,
                             segs_from_prev_stage, classes, transpose_forward):
    # suppress output
    # sys.stdout = open(os.devnull, 'w')

    errors_in = []
    for i, l in enumerate(list_of_lists):
        try:
            output_file = output_files[i]
            print("preprocessing", output_file)
            d, _, dct = preprocess_fn(l)
            # print(output_file, dct)
            if segs_from_prev_stage[i] is not None:
                assert isfile(segs_from_prev_stage[i]) and segs_from_prev_stage[i].endswith(
                    ".nii.gz"), "segs_from_prev_stage" \
                                " must point to a " \
                                "segmentation file"
                seg_prev = sitk.GetArrayFromImage(
                    sitk.ReadImage(segs_from_prev_stage[i]))
                # check to see if shapes match
                img = sitk.GetArrayFromImage(sitk.ReadImage(l[0]))
                assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \
                                                                                 "stage don't have the same pixel array " \
                                                                                 "shape! image: %s, seg_prev: %s" % \
                                                                                 (l[0], segs_from_prev_stage[i])
                seg_prev = seg_prev.transpose(transpose_forward)
                seg_reshaped = resize_segmentation(seg_prev,
                                                   d.shape[1:],
                                                   order=1,
                                                   cval=0)
                seg_reshaped = to_one_hot(seg_reshaped, classes)
                d = np.vstack((d, seg_reshaped)).astype(np.float32)
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            print(d.shape)
            if np.prod(d.shape) > (
                    2e9 / 4 * 0.85
            ):  # *0.85 just to be save, 4 because float32 is 4 bytes
                print(
                    "This output is too large for python process-process communication. "
                    "Saving output temporarily to disk")
                np.save(output_file[:-7] + ".npy", d)
                d = output_file[:-7] + ".npy"
            q.put((output_file, (d, dct)))
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            print("error in", l)
            print(e)
    q.put("end")
    if len(errors_in) > 0:
        print("There were some errors in the following cases:", errors_in)
        print("These cases were ignored.")
    else:
        print("This worker has ended successfully, no errors to report")
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None):
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"

        current_mode = self.network.training
        self.network.eval()
        # save whether network is in deep supervision mode or not
        ds = self.network.do_ds
        # disable deep supervision
        self.network.do_ds = False

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)
        # this is for debug purposes
        my_input_args = {
            'do_mirroring': do_mirroring,
            'use_sliding_window': use_sliding_window,
            'step': step_size,
            'save_softmax': save_softmax,
            'use_gaussian': use_gaussian,
            'overwrite': overwrite,
            'validation_folder_name': validation_folder_name,
            'debug': debug,
            'all_in_gpu': all_in_gpu,
            'segmentation_export_kwargs': segmentation_export_kwargs,
        }
        save_json(my_input_args, join(output_folder, "validation_args.json"))

        if do_mirroring:
            if not self.data_aug_params['do_mirror']:
                raise RuntimeError(
                    "We did not train with mirroring so you cannot do inference with mirroring enabled"
                )
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(default_num_threads)
        results = []

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \
                    (save_softmax and not isfile(join(output_folder, fname + ".npz"))):
                data = np.load(self.dataset[k]['data_file'])['data']

                # concat segmentation of previous step
                seg_from_prev_stage = np.load(
                    join(self.folder_with_segs_from_prev_stage,
                         k + "_segFromPrevStage.npz"))['data'][None]

                print(k, data.shape)
                data[-1][data[-1] == -1] = 0

                data_for_net = np.concatenate(
                    (data[:-1],
                     to_one_hot(seg_from_prev_stage[0],
                                range(1, self.num_classes))))

                softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                    data_for_net,
                    do_mirroring,
                    mirror_axes,
                    use_sliding_window,
                    step_size,
                    use_gaussian,
                    all_in_gpu=all_in_gpu,
                    mixed_precision=self.fp16)[1]

                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in self.transpose_backward])

                if save_softmax:
                    softmax_fname = join(output_folder, fname + ".npz")
                else:
                    softmax_fname = None
                """There is a problem with python process communication that prevents us from communicating obejcts 
                larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
                communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
                enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
                patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
                then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
                filename or np.ndarray and will handle this automatically"""
                if np.prod(softmax_pred.shape) > (
                        2e9 / 4 * 0.85):  # *0.85 just to be save
                    np.save(join(output_folder, fname + ".npy"), softmax_pred)
                    softmax_pred = join(output_folder, fname + ".npy")

                results.append(
                    export_pool.starmap_async(
                        save_segmentation_nifti_from_softmax,
                        ((softmax_pred, join(output_folder,
                                             fname + ".nii.gz"), properties,
                          interpolation_order, None, None, None, softmax_fname,
                          None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]
        self.print_to_log_file("finished prediction")

        # evaluate raw predictions
        self.print_to_log_file("evaluation of raw predictions")
        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(
            pred_gt_tuples,
            labels=list(range(self.num_classes)),
            json_output_file=join(output_folder, "summary.json"),
            json_name=job_name + " val tiled %s" % (str(use_sliding_window)),
            json_author="Fabian",
            json_task=task,
            num_threads=default_num_threads)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            e = None
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError as e:
                    attempts += 1
                    sleep(1)
            if not success:
                print("Could not copy gt nifti file %s into folder %s" %
                      (f, gt_nifti_folder))
                if e is not None:
                    raise e

        # restore network deep supervision mode
        self.network.train(current_mode)
        self.network.do_ds = ds
Пример #3
0
    def validate(self, do_mirroring=True, use_train_mode=False, tiled=True, step=2, save_softmax=True,
                 use_gaussian=True, validation_folder_name='validation'):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(join(self.folder_with_segs_from_prev_stage,
                                               k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate((data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes))))
            softmax_pred = self.predict_preprocessed_data_return_softmax(data_for_net, do_mirroring, 1,
                                                                         use_train_mode, 1, mirror_axes, tiled,
                                                                         True, step, self.patch_size,
                                                                         use_gaussian=use_gaussian)

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None

            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.9): # *0.9 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(process_manager.starmap_async(save_segmentation_nifti_from_softmax,
                                                         ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                                                           properties, 1, None, None, None, softmax_fname, None),
                                                          )
                                                         )
                           )

            pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"),
                                   join(self.gt_niftis_folder, fname + ".nii.gz")])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder, "summary.json"), json_name=job_name,
                             json_author="Fabian", json_description="",
                             json_task=task)
Пример #4
0
    def validate(self,
                 do_mirroring=True,
                 use_train_mode=False,
                 tiled=True,
                 step=2,
                 save_softmax=True,
                 use_gaussian=True,
                 overwrite=True,
                 validation_folder_name="validation_raw",
                 debug=False):
        """

        :param do_mirroring:
        :param use_train_mode:
        :param mirror_axes:
        :param tiled:
        :param tile_in_z:
        :param step:
        :param use_nifti:
        :param save_softmax:
        :param use_gaussian:
        :param use_temporal_models:
        :return:
        """
        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        process_manager = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = self.dataset[k]['properties']
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                join(self.folder_with_segs_from_prev_stage,
                     k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))
            softmax_pred = self.predict_preprocessed_data_return_softmax(
                data_for_net,
                do_mirroring,
                1,
                use_train_mode,
                1,
                mirror_axes,
                tiled,
                True,
                step,
                self.patch_size,
                use_gaussian=use_gaussian)

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = properties['list_of_data_files'][0].split("/")[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.85):  # *0.85 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"
            results.append(
                process_manager.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, join(
                        output_folder, fname + ".nii.gz"), properties, 3, None,
                      None, None, softmax_fname, None), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]

        task = self.dataset_directory.split("/")[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder,
                                                   "summary.json"),
                             json_name=job_name,
                             json_author="Fabian",
                             json_description="",
                             json_task=task)

        # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
        # except the largest connected component for each class. To see if this improves results, we do this for all
        # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
        # have this applied during inference as well
        self.print_to_log_file("determining postprocessing")
        determine_postprocessing(self.output_folder,
                                 self.gt_niftis_folder,
                                 validation_folder_name,
                                 final_subf_name=validation_folder_name +
                                 "_postprocessed",
                                 debug=debug)
        # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
        # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError:
                    attempts += 1
                    sleep(1)