def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        print("Testing...")
        self.model.eval()

        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the 
        # clinical environment. 

        
        inference_agent = UNetInferenceAgent(model=self.model, device=self.device)

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []

        # for every in test set
        for i, x in enumerate(self.test_data):
            pred_label = inference_agent.single_volume_inference(x["image"])

            # We compute and report Dice and Jaccard similarity coefficients which 
            # assess how close our volumes are to each other

            
            # on Wikipedia. If you completed it
            # correctly (and if you picked your train/val/test split right ;)),
            # your average Jaccard on your test set should be around 0.80

            dc = Dice3d(pred_label, x["seg"])
            jc = Jaccard3d(pred_label, x["seg"])
            dc_list.append(dc)
            jc_list.append(jc)

            # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
            # * Sensitivity and specificity (and explain semantic meaning in terms of 
            #   under/over segmenting)
            # * Dice-per-slice and render combined slices with lowest and highest DpS
            # * Dice per class (anterior/posterior)

            out_dict["volume_stats"].append({
                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc
                })
            print(f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete")

        out_dict["overall"] = {
            "mean_dice": np.mean(dc_list),
            "mean_jaccard": np.mean(jc_list)}

        print("\nTesting complete.")
        return out_dict
示例#2
0
    def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        print("Testing...")
        self.model.eval()

        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the
        # clinical environment.

        # Instantiate inference agent
        inference_agent = UNetInferenceAgent(model=self.model, device=self.device)

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []

        # for every in test set
        for i, x in enumerate(self.test_data):
            pred_label = inference_agent.single_volume_inference(x["image"])

            # We compute and report Dice and Jaccard similarity coefficients which
            # assess how close our volumes are to each other

            dc = Dice3d(pred_label, x["seg"])
            jc = Jaccard3d(pred_label, x["seg"])
            dc_list.append(dc)
            jc_list.append(jc)

            out_dict["volume_stats"].append({
                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc
                })
            print(f"{x['filename']} Dice {dc:.4f} Jaccard {dc:.4f} {100*(i+1)/len(self.test_data):.2f}% complete")

        mean_dice = np.mean(dc_list)
        mean_jaccard = np.mean(jc_list)

        print(f" Mean Dice {mean_dice:.4f} Mean Jaccard {mean_jaccard:.4f}")

        out_dict["overall"] = {
            "mean_dice": mean_dice,
            "mean_jaccard": mean_jaccard}

        print("\nTesting complete.")
        return out_dict
示例#3
0
    def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        print("Testing...")
        self.model.eval()

        inference_agent = UNetInferenceAgent(model=self.model,
                                             device=self.device)

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []

        # for every in test set
        for i, x in enumerate(self.test_data):
            pred_label = inference_agent.single_volume_inference(x["image"])

            # We compute and report Dice and Jaccard similarity coefficients which
            # assess how close our volumes are to each other

            dc = Dice3d(pred_label, x["seg"])
            jc = Jaccard3d(pred_label, x["seg"])
            dc_list.append(dc)
            jc_list.append(jc)

            # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
            # * Sensitivity and specificity (and explain semantic meaning in terms of
            #   under/over segmenting)
            # * Dice-per-slice and render combined slices with lowest and highest DpS
            # * Dice per class (anterior/posterior)

            out_dict["volume_stats"].append({
                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc
            })
            print(
                f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete"
            )

        out_dict["overall"] = {
            "mean_dice": np.mean(dc_list),
            "mean_jaccard": np.mean(jc_list)
        }

        print("\nTesting complete.")
        return out_dict
示例#4
0
    def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        print("Testing...")
        # load_model_parameters('/home/dev/Documents/github/nd320-c3-3d-imaging-starter/section2/src/2020-06-08_1647_Basic_unet/model.pth')

        self.model.eval()

        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the
        # clinical environment.

        # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class
        # in a module of your own by running it against one of the data samples
        inference_agent = UNetInferenceAgent(model=self.model,
                                             device=self.device)

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []
        # print('self.test_data.shape: ', self.test_data.shape)
        # for every in test set
        for i, x in enumerate(self.test_data):
            print('filename being tested: ', x["filename"])
            if (x["filename"] == 'hippocampus_150.nii.gz'):

                print('1')

                pred_label = inference_agent.single_volume_inference(
                    x["image"])

                pickle.dump(x["image"], open("image_150.p", "wb"))
                pickle.dump(pred_label, open("label_150.p", "wb"))

                # We compute and report Dice and Jaccard similarity coefficients which
                # assess how close our volumes are to each other

                # TASK: Dice3D and Jaccard3D functions are not implemented.
                #  Complete the implementation as we discussed
                # in one of the course lessons, you can look up definition of Jaccard index
                # on Wikipedia. If you completed it
                # correctly (and if you picked your train/val/test split right ;)),
                # your average Jaccard on your test set should be around 0.80

                dc = Dice3d(pred_label, x["seg"])
                jc = Jaccard3d(pred_label, x["seg"])
                dc_list.append(dc)
                jc_list.append(jc)

                # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
                # * Sensitivity and specificity (and explain semantic meaning in terms of
                #   under/over segmenting)
                # * Dice-per-slice and render combined slices with lowest and highest DpS
                # * Dice per class (anterior/posterior)

                out_dict["volume_stats"].append({
                    "filename": x['filename'],
                    "dice": dc,
                    "jaccard": jc
                })
                print(
                    f"{x['filename']} Dice {dc:.4f} and Jaccard: {jc:.4f} . {100*(i+1)/len(self.test_data):.2f}% complete"
                )
                #break

                out_dict["overall"] = {
                    "mean_dice": np.mean(dc_list),
                    "mean_jaccard": np.mean(jc_list)
                }

        print("\nTesting complete.")
        return out_dict
示例#5
0
    def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        self.model.eval()
        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the
        # clinical environment.

        # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class
        # in a module of your own by running it against one of the data samples

        inference_agent = UNetInferenceAgent(model=self.model,
                                             device=self.device)

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []
        sens_list = []
        spec_list = []
        # f1_list = []

        # for every in test set
        for i, x in enumerate(self.test_data):

            gt = x["seg"]  # test image ground truth
            ti = x["image"]  # test image data
            original_filename = x['filename']  # test image file name
            pred_filename = 'predicted_' + x[
                'filename']  # test image file name

            file_path = os.path.join("..\data", "images", original_filename)

            original_images = nib.load(file_path)

            mask3d = np.zeros(ti.shape)
            pred = inference_agent.single_volume_inference(ti)
            mask3d = np.array(torch.argmax(pred, dim=1))

            # Save predicted labels to local environment for further verification
            # with the original image NIFTI coordinate system
            pred_coord = nib.Nifti1Image(mask3d, original_images.affine)
            pred_out_path = os.path.join("..\data", "preds")
            pred_out_file = os.path.join(pred_out_path, pred_filename)

            if not os.path.exists(pred_out_path):
                os.makedirs(pred_out_path)

            nib.save(pred_coord, pred_out_file)

            # We compute and report Dice and Jaccard similarity coefficients which
            # assess how close our volumes are to each other

            # TASK: Dice3D and Jaccard3D functions are not implemented.
            # Complete the implementation as we discussed
            # in one of the course lessons, you can look up definition of Jaccard index
            # on Wikipedia. If you completed it
            # correctly (and if you picked your train/val/test split right ;)),
            # your average Jaccard on your test set should be around 0.80

            # a - prediction
            # b - ground truth
            dc = Dice3d(mask3d, gt)
            dc_list.append(dc)

            jc = Jaccard3d(mask3d, gt)
            jc_list.append(jc)

            sens = Sensitivity(mask3d, gt)
            sens_list.append(sens)

            spec = Specificity(mask3d, gt)
            spec_list.append(spec)

            # f1 = F1_score(mask3d, gt)
            # f1_list.append(f1)

            # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
            # * Sensitivity and specificity (and explain semantic meaning in terms of
            #   under/over segmenting)
            # * Dice-per-slice and render combined slices with lowest and highest DpS
            # * Dice per class (anterior/posterior)

            out_dict["volume_stats"].append({
                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc,
                "sensitivity": sens,
                "specificity": spec,
                # "f1": f1,
            })

            print(
                f"{x['filename']} Dice {dc:.4f}, Jaccard {jc:.4f}, Sensitivity {sens:.4f}, and Specificity {spec:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete"
            )

        avg_dc = np.mean(dc_list)
        avg_jc = np.mean(jc_list)
        avg_sens = np.mean(sens_list)
        avg_spec = np.mean(spec_list)
        # avg_f1 = np.mean(f1_list)

        out_dict["overall"] = {
            "mean_dice": avg_dc,
            "mean_jaccard": avg_jc,
            "mean_sensitivity": avg_sens,
            "mean_specificity": avg_spec,
            # "mean_f1": avg_f1,
        }

        print("\nTesting complete.")
        print("------------------------------")
        print(
            f"Average Dice {avg_dc:.4f}, Average Jaccard {avg_jc:.4f}, Average Sensitivity {avg_sens:.4f}, and Average Specificity {avg_spec:.4f}"
        )

        return out_dict
示例#6
0
    def run_test(self):
        """
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        """
        print("Testing...")

        model_dir = "C://Data"
        self.load_model_parameters(path=model_dir)
        self.model.eval()

        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the
        # clinical environment.

        # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class
        # in a module of your own by running it against one of the data samples
        inference_agent = UNetInferenceAgent(model=self.model,
                                             device=self.device)
        print("Testing...2")
        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []
        lr_list = []
        print(len((self.test_data)))
        # for every in test set
        for i, x in enumerate(self.test_data):
            print("Testing...loop")
            pred_label = inference_agent.single_volume_inference(x["image"])
            #print(np.nonzero(x["seg"]))
            #print(np.nonzero(pred_label))
            # We compute and report Dice and Jaccard similarity coefficients which
            # assess how close our volumes are to each other

            # TASK: Dice3D and Jaccard3D functions are not implemented.
            #  Complete the implementation as we discussed
            # in one of the course lessons, you can look up definition of Jaccard index
            # on Wikipedia. If you completed it
            # correctly (and if you picked your train/val/test split right ;)),
            # your average Jaccard on your test set should be around 0.80

            dc = Dice3d(pred_label, x["seg"])
            jc = Jaccard3d(pred_label, x["seg"])
            lr = Likelihoodratio(pred_label, x["seg"])
            dc_list.append(dc)
            jc_list.append(jc)
            lr_list.append(lr)

            # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
            # * Sensitivity and specificity (and explain semantic meaning in terms of
            #   under/over segmenting)
            # * Dice-per-slice and render combined slices with lowest and highest DpS
            # * Dice per class (anterior/posterior)

            out_dict["volume_stats"].append({
                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc,
                "likelihood": lr
            })
            print(
                f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete"
            )

        out_dict["overall"] = {
            "mean_dice": np.mean(dc_list),
            "mean_jaccard": np.mean(jc_list),
            "mean_likelihood": np.mean(lr_list)
        }

        print("\nTesting complete.")
        return out_dict