f'train/{image_id}.dicom', path='D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm ' 'Workspace/vbd_cxr/7_POC/DICOMs') # %% -------------------- # extract zip extract_files( f"D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace" f"/vbd_cxr/7_POC/DICOMs/{image_id}.dicom.zip", "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace" f"/vbd_cxr/7_POC/DICOMs/", True) # %% -------------------- raw_img_arr = dicom2array( f"D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace/" f"vbd_cxr/7_POC/DICOMs/{image_id}.dicom") raw_img_bb_info = get_bb_info(train_df, image_id, ["x_min", "y_min", "x_max", "y_max", "class_id"]) # %% -------------------- # view image as DICOM w/ bounding boxes bounding_box_plotter(raw_img_arr, image_id, raw_img_bb_info, get_label_2_color_dict()) # %% -------------------- # transform image using albumentations augmentor = albumentations.Compose( [ # augmentation operations albumentations.augmentations.transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3,
gt = pd.read_csv( "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace/vbd_cxr/2_data_split/512/unmerged/10_percent_holdout/holdout_df.csv" ) predictions = pd.read_csv( "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace/vbd_cxr/5_inference_on_holdout_10_percent/0_predictions/holdout_ensemble_classification_object_detection.csv" ) # %% -------------------- label2color = get_label_2_color_dict() # %% -------------------- original_image_ids = gt["image_id"].unique() # %% -------------------- for image_id in original_image_ids[:10]: img_as_arr = get_image_as_array( f"D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm Workspace/vbd_cxr/input_data/512x512/train/{image_id}.png" ) # %% -------------------- left = get_bb_info(gt, image_id, ['x_min', 'y_min', 'x_max', 'y_max', "class_id"]) right = get_bb_info(predictions, image_id, ['x_min', 'y_min', 'x_max', 'y_max', "label"]) # %% -------------------- bounding_box_plotter_side_to_side(img_as_arr, image_id, left, right, "Ground Truth", "Predictions", label2color)
13: ("Pulmonary fibrosis", "#e75480"), 14: ("No finding", "#ffffff") } # %% -------------------- # IMAGE DIR img_dir = "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm " \ "Workspace/vbd_cxr/9_data/512/transformed_data/train" # %% -------------------- # ANNOTATION DIR train_data = pd.read_csv( "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm " "Workspace/vbd_cxr/1_merger/wbf_merged/90_percent_train/object_detection/95_percent" "/80_percent/train_df_0.csv") # %% -------------------- # image_ids = train_data["image_id"].unique() image_ids = ["e1a4353d3e747a7150cb06cac73f4d6f"] # shuffle is inplace operation random.shuffle(image_ids) for img in image_ids[:10]: img_array = get_image_as_array(f"{img_dir}/{img}.jpeg") # get bounding box info img_bb_info = get_bb_info(train_data, img, ['x_min', 'y_min', 'x_max', 'y_max', "class_id"]) # plot image with bounding boxes bounding_box_plotter(img_array, img, img_bb_info, label2color, save_title_or_plot="plot")
def rescaler(predicted_df, df_with_original_dimension, source_height_col, source_width_col, target_height_col, target_width_col, columns=[ "x_min", "y_min", "x_max", "y_max", "label", "confidence_score" ]): # get the dimensions from data frame containing repeated rows of image id extracted_dimension_df = extract_dimension_df(df_with_original_dimension) image_id_arr = [] x_min_arr = [] y_min_arr = [] x_max_arr = [] y_max_arr = [] label_arr = [] score_arr = [] for img in predicted_df["image_id"].unique(): target_width, target_height = extracted_dimension_df.loc[ img, [target_width_col, target_height_col]] source_width, source_height = extracted_dimension_df.loc[ img, [source_width_col, source_height_col]] bounding_boxes_info = get_bb_info(predicted_df, img, columns) # upscale the predicted bounding boxes based on original scale and visualize it bounding_boxes_info[:, [0, 1, 2, 3]] = resize_bb_w_h( bounding_boxes_info[:, [0, 1, 2, 3, 4]], source_width, source_height, target_width, target_height) for i in range(len(bounding_boxes_info)): # class 14 is no findings class and should be represented as 0,0,1,1 if bounding_boxes_info[i][4] == 14: image_id_arr.append(img) x_min_arr.append(0) y_min_arr.append(0) x_max_arr.append(1) y_max_arr.append(1) label_arr.append(bounding_boxes_info[i][4]) score_arr.append(bounding_boxes_info[i][5]) else: image_id_arr.append(img) x_min_arr.append(bounding_boxes_info[i][0]) y_min_arr.append(bounding_boxes_info[i][1]) x_max_arr.append(bounding_boxes_info[i][2]) y_max_arr.append(bounding_boxes_info[i][3]) label_arr.append(bounding_boxes_info[i][4]) score_arr.append(bounding_boxes_info[i][5]) scaled_data = pd.DataFrame({ "image_id": image_id_arr, "x_min": x_min_arr, "y_min": y_min_arr, "x_max": x_max_arr, "y_max": y_max_arr, "label": label_arr, "confidence_score": score_arr }) return scaled_data