def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ print("Testing...") self.model.eval() # In this method we will be computing metrics that are relevant to the task of 3D volume # segmentation. Therefore, unlike train and validation methods, we will do inferences # on full 3D volumes, much like we will be doing it when we deploy the model in the # clinical environment. inference_agent = UNetInferenceAgent(model=self.model, device=self.device) out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] # for every in test set for i, x in enumerate(self.test_data): pred_label = inference_agent.single_volume_inference(x["image"]) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other # on Wikipedia. If you completed it # correctly (and if you picked your train/val/test split right ;)), # your average Jaccard on your test set should be around 0.80 dc = Dice3d(pred_label, x["seg"]) jc = Jaccard3d(pred_label, x["seg"]) dc_list.append(dc) jc_list.append(jc) # STAND-OUT SUGGESTION: By way of exercise, consider also outputting: # * Sensitivity and specificity (and explain semantic meaning in terms of # under/over segmenting) # * Dice-per-slice and render combined slices with lowest and highest DpS # * Dice per class (anterior/posterior) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc }) print(f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete") out_dict["overall"] = { "mean_dice": np.mean(dc_list), "mean_jaccard": np.mean(jc_list)} print("\nTesting complete.") return out_dict
def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ print("Testing...") self.model.eval() # In this method we will be computing metrics that are relevant to the task of 3D volume # segmentation. Therefore, unlike train and validation methods, we will do inferences # on full 3D volumes, much like we will be doing it when we deploy the model in the # clinical environment. # Instantiate inference agent inference_agent = UNetInferenceAgent(model=self.model, device=self.device) out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] # for every in test set for i, x in enumerate(self.test_data): pred_label = inference_agent.single_volume_inference(x["image"]) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other dc = Dice3d(pred_label, x["seg"]) jc = Jaccard3d(pred_label, x["seg"]) dc_list.append(dc) jc_list.append(jc) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc }) print(f"{x['filename']} Dice {dc:.4f} Jaccard {dc:.4f} {100*(i+1)/len(self.test_data):.2f}% complete") mean_dice = np.mean(dc_list) mean_jaccard = np.mean(jc_list) print(f" Mean Dice {mean_dice:.4f} Mean Jaccard {mean_jaccard:.4f}") out_dict["overall"] = { "mean_dice": mean_dice, "mean_jaccard": mean_jaccard} print("\nTesting complete.") return out_dict
def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ print("Testing...") self.model.eval() inference_agent = UNetInferenceAgent(model=self.model, device=self.device) out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] # for every in test set for i, x in enumerate(self.test_data): pred_label = inference_agent.single_volume_inference(x["image"]) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other dc = Dice3d(pred_label, x["seg"]) jc = Jaccard3d(pred_label, x["seg"]) dc_list.append(dc) jc_list.append(jc) # STAND-OUT SUGGESTION: By way of exercise, consider also outputting: # * Sensitivity and specificity (and explain semantic meaning in terms of # under/over segmenting) # * Dice-per-slice and render combined slices with lowest and highest DpS # * Dice per class (anterior/posterior) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc }) print( f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete" ) out_dict["overall"] = { "mean_dice": np.mean(dc_list), "mean_jaccard": np.mean(jc_list) } print("\nTesting complete.") return out_dict
def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ print("Testing...") # load_model_parameters('/home/dev/Documents/github/nd320-c3-3d-imaging-starter/section2/src/2020-06-08_1647_Basic_unet/model.pth') self.model.eval() # In this method we will be computing metrics that are relevant to the task of 3D volume # segmentation. Therefore, unlike train and validation methods, we will do inferences # on full 3D volumes, much like we will be doing it when we deploy the model in the # clinical environment. # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class # in a module of your own by running it against one of the data samples inference_agent = UNetInferenceAgent(model=self.model, device=self.device) out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] # print('self.test_data.shape: ', self.test_data.shape) # for every in test set for i, x in enumerate(self.test_data): print('filename being tested: ', x["filename"]) if (x["filename"] == 'hippocampus_150.nii.gz'): print('1') pred_label = inference_agent.single_volume_inference( x["image"]) pickle.dump(x["image"], open("image_150.p", "wb")) pickle.dump(pred_label, open("label_150.p", "wb")) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other # TASK: Dice3D and Jaccard3D functions are not implemented. # Complete the implementation as we discussed # in one of the course lessons, you can look up definition of Jaccard index # on Wikipedia. If you completed it # correctly (and if you picked your train/val/test split right ;)), # your average Jaccard on your test set should be around 0.80 dc = Dice3d(pred_label, x["seg"]) jc = Jaccard3d(pred_label, x["seg"]) dc_list.append(dc) jc_list.append(jc) # STAND-OUT SUGGESTION: By way of exercise, consider also outputting: # * Sensitivity and specificity (and explain semantic meaning in terms of # under/over segmenting) # * Dice-per-slice and render combined slices with lowest and highest DpS # * Dice per class (anterior/posterior) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc }) print( f"{x['filename']} Dice {dc:.4f} and Jaccard: {jc:.4f} . {100*(i+1)/len(self.test_data):.2f}% complete" ) #break out_dict["overall"] = { "mean_dice": np.mean(dc_list), "mean_jaccard": np.mean(jc_list) } print("\nTesting complete.") return out_dict
def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ self.model.eval() # In this method we will be computing metrics that are relevant to the task of 3D volume # segmentation. Therefore, unlike train and validation methods, we will do inferences # on full 3D volumes, much like we will be doing it when we deploy the model in the # clinical environment. # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class # in a module of your own by running it against one of the data samples inference_agent = UNetInferenceAgent(model=self.model, device=self.device) out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] sens_list = [] spec_list = [] # f1_list = [] # for every in test set for i, x in enumerate(self.test_data): gt = x["seg"] # test image ground truth ti = x["image"] # test image data original_filename = x['filename'] # test image file name pred_filename = 'predicted_' + x[ 'filename'] # test image file name file_path = os.path.join("..\data", "images", original_filename) original_images = nib.load(file_path) mask3d = np.zeros(ti.shape) pred = inference_agent.single_volume_inference(ti) mask3d = np.array(torch.argmax(pred, dim=1)) # Save predicted labels to local environment for further verification # with the original image NIFTI coordinate system pred_coord = nib.Nifti1Image(mask3d, original_images.affine) pred_out_path = os.path.join("..\data", "preds") pred_out_file = os.path.join(pred_out_path, pred_filename) if not os.path.exists(pred_out_path): os.makedirs(pred_out_path) nib.save(pred_coord, pred_out_file) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other # TASK: Dice3D and Jaccard3D functions are not implemented. # Complete the implementation as we discussed # in one of the course lessons, you can look up definition of Jaccard index # on Wikipedia. If you completed it # correctly (and if you picked your train/val/test split right ;)), # your average Jaccard on your test set should be around 0.80 # a - prediction # b - ground truth dc = Dice3d(mask3d, gt) dc_list.append(dc) jc = Jaccard3d(mask3d, gt) jc_list.append(jc) sens = Sensitivity(mask3d, gt) sens_list.append(sens) spec = Specificity(mask3d, gt) spec_list.append(spec) # f1 = F1_score(mask3d, gt) # f1_list.append(f1) # STAND-OUT SUGGESTION: By way of exercise, consider also outputting: # * Sensitivity and specificity (and explain semantic meaning in terms of # under/over segmenting) # * Dice-per-slice and render combined slices with lowest and highest DpS # * Dice per class (anterior/posterior) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc, "sensitivity": sens, "specificity": spec, # "f1": f1, }) print( f"{x['filename']} Dice {dc:.4f}, Jaccard {jc:.4f}, Sensitivity {sens:.4f}, and Specificity {spec:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete" ) avg_dc = np.mean(dc_list) avg_jc = np.mean(jc_list) avg_sens = np.mean(sens_list) avg_spec = np.mean(spec_list) # avg_f1 = np.mean(f1_list) out_dict["overall"] = { "mean_dice": avg_dc, "mean_jaccard": avg_jc, "mean_sensitivity": avg_sens, "mean_specificity": avg_spec, # "mean_f1": avg_f1, } print("\nTesting complete.") print("------------------------------") print( f"Average Dice {avg_dc:.4f}, Average Jaccard {avg_jc:.4f}, Average Sensitivity {avg_sens:.4f}, and Average Specificity {avg_spec:.4f}" ) return out_dict
def run_test(self): """ This runs test cycle on the test dataset. Note that process and evaluations are quite different Here we are computing a lot more metrics and returning a dictionary that could later be persisted as JSON """ print("Testing...") model_dir = "C://Data" self.load_model_parameters(path=model_dir) self.model.eval() # In this method we will be computing metrics that are relevant to the task of 3D volume # segmentation. Therefore, unlike train and validation methods, we will do inferences # on full 3D volumes, much like we will be doing it when we deploy the model in the # clinical environment. # TASK: Inference Agent is not complete. Go and finish it. Feel free to test the class # in a module of your own by running it against one of the data samples inference_agent = UNetInferenceAgent(model=self.model, device=self.device) print("Testing...2") out_dict = {} out_dict["volume_stats"] = [] dc_list = [] jc_list = [] lr_list = [] print(len((self.test_data))) # for every in test set for i, x in enumerate(self.test_data): print("Testing...loop") pred_label = inference_agent.single_volume_inference(x["image"]) #print(np.nonzero(x["seg"])) #print(np.nonzero(pred_label)) # We compute and report Dice and Jaccard similarity coefficients which # assess how close our volumes are to each other # TASK: Dice3D and Jaccard3D functions are not implemented. # Complete the implementation as we discussed # in one of the course lessons, you can look up definition of Jaccard index # on Wikipedia. If you completed it # correctly (and if you picked your train/val/test split right ;)), # your average Jaccard on your test set should be around 0.80 dc = Dice3d(pred_label, x["seg"]) jc = Jaccard3d(pred_label, x["seg"]) lr = Likelihoodratio(pred_label, x["seg"]) dc_list.append(dc) jc_list.append(jc) lr_list.append(lr) # STAND-OUT SUGGESTION: By way of exercise, consider also outputting: # * Sensitivity and specificity (and explain semantic meaning in terms of # under/over segmenting) # * Dice-per-slice and render combined slices with lowest and highest DpS # * Dice per class (anterior/posterior) out_dict["volume_stats"].append({ "filename": x['filename'], "dice": dc, "jaccard": jc, "likelihood": lr }) print( f"{x['filename']} Dice {dc:.4f}. {100*(i+1)/len(self.test_data):.2f}% complete" ) out_dict["overall"] = { "mean_dice": np.mean(dc_list), "mean_jaccard": np.mean(jc_list), "mean_likelihood": np.mean(lr_list) } print("\nTesting complete.") return out_dict