    def run_test(self):
        This runs test cycle on the test dataset.
        Note that process and evaluations are quite different
        Here we are computing a lot more metrics and returning
        a dictionary that could later be persisted as JSON
        # In this method we will be computing metrics that are relevant to the task of 3D volume
        # segmentation. Therefore, unlike train and validation methods, we will do inferences
        # on full 3D volumes, much like we will be doing it when we deploy the model in the
        # clinical environment.

        # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class
        # in a module of your own by running it against one of the data samples

        inference_agent = UNetInferenceAgent(model=self.model,

        out_dict = {}
        out_dict["volume_stats"] = []
        dc_list = []
        jc_list = []
        sens_list = []
        spec_list = []
        # f1_list = []

        # for every in test set
        for i, x in enumerate(self.test_data):

            gt = x["seg"]  # test image ground truth
            ti = x["image"]  # test image data
            original_filename = x['filename']  # test image file name
            pred_filename = 'predicted_' + x[
                'filename']  # test image file name

            file_path = os.path.join("..\data", "images", original_filename)

            original_images = nib.load(file_path)

            mask3d = np.zeros(ti.shape)
            pred = inference_agent.single_volume_inference(ti)
            mask3d = np.array(torch.argmax(pred, dim=1))

            # Save predicted labels to local environment for further verification
            # with the original image NIFTI coordinate system
            pred_coord = nib.Nifti1Image(mask3d, original_images.affine)
            pred_out_path = os.path.join("..\data", "preds")
            pred_out_file = os.path.join(pred_out_path, pred_filename)

            if not os.path.exists(pred_out_path):

  , pred_out_file)

            # We compute and report Dice and Jaccard similarity coefficients which
            # assess how close our volumes are to each other

            # TASK: Dice3D and Jaccard3D functions are not implemented.
            # Complete the implementation as we discussed
            # in one of the course lessons, you can look up definition of Jaccard index
            # on Wikipedia. If you completed it
            # correctly (and if you picked your train/val/test split right ;)),
            # your average Jaccard on your test set should be around 0.80

            # a - prediction
            # b - ground truth
            dc = Dice3d(mask3d, gt)

            jc = Jaccard3d(mask3d, gt)

            sens = Sensitivity(mask3d, gt)

            spec = Specificity(mask3d, gt)

            # f1 = F1_score(mask3d, gt)
            # f1_list.append(f1)

            # STAND-OUT SUGGESTION: By way of exercise, consider also outputting:
            # * Sensitivity and specificity (and explain semantic meaning in terms of
            #   under/over segmenting)
            # * Dice-per-slice and render combined slices with lowest and highest DpS
            # * Dice per class (anterior/posterior)

                "filename": x['filename'],
                "dice": dc,
                "jaccard": jc,
                "sensitivity": sens,
                "specificity": spec,
                # "f1": f1,

                f"{x['filename']} Dice {dc:.4f}, Jaccard {jc:.4f}, Sensitivity {sens:.4f}, and Specificity {spec:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete"

        avg_dc = np.mean(dc_list)
        avg_jc = np.mean(jc_list)
        avg_sens = np.mean(sens_list)
        avg_spec = np.mean(spec_list)
        # avg_f1 = np.mean(f1_list)

        out_dict["overall"] = {
            "mean_dice": avg_dc,
            "mean_jaccard": avg_jc,
            "mean_sensitivity": avg_sens,
            "mean_specificity": avg_spec,
            # "mean_f1": avg_f1,

        print("\nTesting complete.")
            f"Average Dice {avg_dc:.4f}, Average Jaccard {avg_jc:.4f}, Average Sensitivity {avg_sens:.4f}, and Average Specificity {avg_spec:.4f}"

        return out_dict
