def generate_blob_npz(mhd_file): global images_path patient_id = os.path.basename(mhd_file)[:-4] if not os.path.exists(BLOB_IMG + patient_id +'.npz'): lung_images = helpers.load_patient_images(patient_id, images_path, "*_i.png")#z,y,x lung_masks = helpers.load_patient_images(patient_id, images_path, "*_m.png")#z,y,x print(patient_id,"shape",lung_images.shape,lung_masks.shape) #this api will save all predicted probobility cube whith npz unet_predict_api.get_coordzyx_candidate(model,patient_id, lung_images, lung_masks, False)
def make_pos_annotation_images(): src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/" dst_dir = settings.BASE_DIR_SSD + "luna16_train_cubes_pos/" for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "") # print(patient_id) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) diam_mm = int(row["diameter"] * images.shape[2]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual(): src_dir = "resources/luna16_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*_manual.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4" not in patient_id: continue print(patient_id) # if not "172845185165807139298420209778" in patient_id: # continue df_annos = pandas.read_csv(csv_file) # if len(df_annos) == 0: # continue try: images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") except: continue for index, row in df_annos.iterrows(): coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) diameter = int(row["d"] * images.shape[2]) node_type = int(row["id"]) malscore = int(diameter) malscore = min(25, malscore) malscore = max(16, malscore) anno_index = index cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_candidate_auto_images(candidate_types=[]): dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for candidate_type in candidate_types: for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"): os.remove(file_path) for candidate_type in candidate_types: if candidate_type == "falsepos": src_dir = "resources/luna16_falsepos_labels/" else: src_dir = settings.TRAIN_EXTRACTED_IMAGE_DIR + "_labels/" for index, csv_file in enumerate( glob.glob(src_dir + "*_candidates_" + candidate_type + ".csv")): patient_id = ntpath.basename(csv_file).replace( "_candidates_" + candidate_type + ".csv", "") print(index, ",patient: ", patient_id, " type:", candidate_type) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.TRAIN_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png", exclude_wildcards=[]) row_no = 0 for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 48) if cube_img.sum() < 10: print("Skipping ", coord_x, coord_y, coord_z) continue # print(cube_img.sum()) try: save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_0_" + candidate_type + ".png", cube_img, 6, 8) except Exception as ex: print(ex) row_no += 1 max_item = 240 if candidate_type == "white" else 200 if candidate_type == "luna": max_item = 500 if row_no > max_item: break
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame, patient_id, view_size, luna16=False): src_dir = settings.LUNA_16_TRAIN_DIR2D2 if luna16 else settings.NDSB3_EXTRACTED_IMAGE_DIR patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png") delete_indices = [] for index, row in df_nodule_predictions.iterrows(): z_perc = row["coord_z"] y_perc = row["coord_y"] center_x = int(round(row["coord_x"] * patient_mask.shape[2])) center_y = int(round(y_perc * patient_mask.shape[1])) center_z = int(round(z_perc * patient_mask.shape[0])) mal_score = row["diameter_mm"] start_y = center_y - view_size / 2 start_x = center_x - view_size / 2 nodule_in_mask = False for z_index in [-1, 0, 1]: img = patient_mask[z_index + center_z] start_x = int(start_x) start_y = int(start_y) view_size = int(view_size) img_roi = img[start_y:start_y + view_size, start_x:start_x + view_size] if img_roi.sum() > 255: # more than 1 pixel of mask. logger.info( "More than 1 pixel of mask. nodule_in_mask is true") nodule_in_mask = True if not nodule_in_mask: logger.info("Nodule not in mask: {0} {1} {2}".format( center_x, center_y, center_z)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score else: if center_z < 30: logger.info("Z < 30: {0} center z: {1} y_perc: {2} ".format( patient_id, center_z, y_perc)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85: logger.info( "SUSPICIOUS FALSEPOSITIVE: {0} center z: {1} y_perc: {2}" .format(patient_id, center_z, y_perc)) if center_z < 50 and y_perc < 0.30: logger.info( "SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: {0} center z: {1} y_perc: {2}" .format(patient_id, center_z, y_perc)) df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True) return df_nodule_predictions
def make_negative_train_data_based_on_predicted_luna_nodules(): src_dir = settings.LUNA_NODULE_DETECTION_DIR pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) pos_annos_manual = None manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True) pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv") logger.info("csv_index {0} : patient_id {1} , pos {2}".format(csv_index, patient_id, len(pos_labels))) patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png") for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) diam_mm = nod_pred_row["diameter_mm"] for label_index, label_row in pos_labels.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs) dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz - pz, 2)) if dist < keep_dist: if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm break if pos_annos_manual is not None: for index, label_row in pos_annos_manual.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs) diameter = label_row["d"] * patient_imgs[0].shape[1] # print((pos_coord_x, pos_coord_y, pos_coord_z)) # print(center_float_rescaled) dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2)) if dist < (diameter + 72): # make sure we have a big margin if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm logger.info("#Too close: {0} {1} {2}".format(nx, ny, nz)) break df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False) total_false_pos += len(df_nodule_predictions) logger.info("Total false pos: {0}".format(total_false_pos))
def make_pos_annotation_images_manual_ndsb3(): src_dir = "resources/ndsb3_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) train_label_df = pandas.read_csv("resources/stage1_labels.csv") train_label_df.set_index(["id"], inplace=True) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4.1" in patient_id: continue cancer_label = train_label_df.loc[patient_id]["cancer"] df_annos = pandas.read_csv(csv_file) # if len(df_annos) == 0: # continue try: images = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") except: continue anno_index = 0 for index, row in df_annos.iterrows(): pos_neg = "pos" if row["id"] == 0 else "neg" coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) malscore = int(round(row["dmm"])) anno_index += 1 cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue print(patient_id) assert malscore > 0 or pos_neg == "neg" save_cube_img( dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_annotation_images_lidc(): src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) # 結節の座標などを示すCSVを読み込む for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue # 患者のCT画像を読み込む images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): # 座標 coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) # 悪性腫瘍スコア malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace( ".", "xpointx").replace("_", "xunderscorex") # 画像リストから64x64x64のキューブを作るう cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue # ファイル名: 患者ID_注釈index_悪性腫瘍スコア^2_1_post.png save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_negative_train_data_based_on_predicted_luna_nodules(): src_dir = settings.LUNA_NODULE_DETECTION_DIR pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) pos_annos_manual = None manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True) pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv") print(csv_index, ": ", patient_id, ", pos", len(pos_labels)) patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png") for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) diam_mm = nod_pred_row["diameter_mm"] for label_index, label_row in pos_labels.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs) dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz- pz, 2)) if dist < keep_dist: if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm break if pos_annos_manual is not None: for index, label_row in pos_annos_manual.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs) diameter = label_row["d"] * patient_imgs[0].shape[1] # print((pos_coord_x, pos_coord_y, pos_coord_z)) # print(center_float_rescaled) dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2)) if dist < (diameter + 72): # make sure we have a big margin if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm print("#Too close", (nx, ny, nz)) break df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False) total_false_pos += len(df_nodule_predictions) print("Total false pos:", total_false_pos)
def make_annotation_images_lidc(): #https://github.com/juliandewit/kaggle_ndsb2017/issues/2 #src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) #pdb.set_trace() for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace( ".", "xpointx").replace("_", "xunderscorex") cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame, patient_id, view_size): src_dir = '' patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed', src_dir, "*_m.png") delete_indices = [] for index, row in df_nodule_predictions.iterrows(): z_perc = row["coord_z"] y_perc = row["coord_y"] center_x = int(round(row["coord_x"] * patient_mask.shape[2])) center_y = int(round(y_perc * patient_mask.shape[1])) center_z = int(round(z_perc * patient_mask.shape[0])) mal_score = row["diameter_mm"] start_y = center_y - view_size / 2 start_x = center_x - view_size / 2 nodule_in_mask = False for z_index in [-1, 0, 1]: img = patient_mask[z_index + center_z] start_x = int(start_x) start_y = int(start_y) view_size = int(view_size) img_roi = img[start_y:start_y + view_size, start_x:start_x + view_size] if img_roi.sum() > 255: # more than 1 pixel of mask. nodule_in_mask = True if not nodule_in_mask: print("Nodule not in mask: ", (center_x, center_y, center_z)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score else: if center_z < 30: print("Z < 30: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85: print("SUSPICIOUS FALSEPOSITIVE: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) if center_z < 50 and y_perc < 0.30: print("SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True) return df_nodule_predictions
def make_candidate_auto_images(candidate_types=[]): dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for candidate_type in candidate_types: for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"): os.remove(file_path) for candidate_type in candidate_types: if candidate_type == "falsepos": src_dir = "resources/luna16_falsepos_labels/" else: src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" for index, csv_file in enumerate(glob.glob(src_dir + "*_candidates_" + candidate_type + ".csv")): patient_id = ntpath.basename(csv_file).replace("_candidates_" + candidate_type + ".csv", "") print(index, ",patient: ", patient_id, " type:", candidate_type) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png", exclude_wildcards=[]) row_no = 0 for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 48) if cube_img.sum() < 10: print("Skipping ", coord_x, coord_y, coord_z) continue # print(cube_img.sum()) try: save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_0_" + candidate_type + ".png", cube_img, 6, 8) except Exception as ex: print(ex) row_no += 1 max_item = 240 if candidate_type == "white" else 200 if candidate_type == "luna": max_item = 500 if row_no > max_item: break
def make_pos_annotation_images_manual(): src_dir = "resources/luna16_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*_manual.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4" not in patient_id: continue print(patient_id) # if not "172845185165807139298420209778" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) diameter = int(row["d"] * images.shape[2]) node_type = int(row["id"]) malscore = int(diameter) malscore = min(25, malscore) malscore = max(16, malscore) anno_index = index cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual_ndsb3(): src_dir = "resources/ndsb3_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) train_label_df = pandas.read_csv("resources/stage1_labels.csv") train_label_df.set_index(["id"], inplace=True) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4.1" in patient_id: continue cancer_label = train_label_df.loc[patient_id]["cancer"] df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") anno_index = 0 for index, row in df_annos.iterrows(): pos_neg = "pos" if row["id"] == 0 else "neg" coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) malscore = int(round(row["dmm"])) anno_index += 1 cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue print(patient_id) assert malscore > 0 or pos_neg == "neg" save_cube_img(dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame, patient_id, view_size, luna16=False): src_dir = settings.LUNA_16_TRAIN_DIR2D2 if luna16 else settings.NDSB3_EXTRACTED_IMAGE_DIR patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png") delete_indices = [] for index, row in df_nodule_predictions.iterrows(): z_perc = row["coord_z"] y_perc = row["coord_y"] center_x = int(round(row["coord_x"] * patient_mask.shape[2])) center_y = int(round(y_perc * patient_mask.shape[1])) center_z = int(round(z_perc * patient_mask.shape[0])) mal_score = row["diameter_mm"] start_y = center_y - view_size / 2 start_x = center_x - view_size / 2 nodule_in_mask = False for z_index in [-1, 0, 1]: img = patient_mask[z_index + center_z] start_x = int(start_x) start_y = int(start_y) view_size = int(view_size) img_roi = img[start_y:start_y+view_size, start_x:start_x + view_size] if img_roi.sum() > 255: # more than 1 pixel of mask. nodule_in_mask = True if not nodule_in_mask: print("Nodule not in mask: ", (center_x, center_y, center_z)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score else: if center_z < 30: print("Z < 30: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85: print("SUSPICIOUS FALSEPOSITIVE: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) if center_z < 50 and y_perc < 0.30: print("SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: ", patient_id, " center z:", center_z, " y_perc: ", y_perc) df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True) return df_nodule_predictions
def filter_patient_nodules_predictions(df_nodule_predictions, patient_id, view_size): src_dir = LUNA16_EXTRACTED_IMAGE_DIR patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png") delete_indices = [] for index, row in df_nodule_predictions.iterrows(): z_perc = row["coord_z"] y_perc = row["coord_y"] center_x = int(round(row["coord_x"] * patient_mask.shape[2])) center_y = int(round(y_perc * patient_mask.shape[1])) center_z = int(round(z_perc * patient_mask.shape[0])) mal_score = row["diameter_mm"] start_y = center_y - view_size / 2 start_x = center_x - view_size / 2 nodule_in_mask = False for z_index in [-1, 0, 1]: img = patient_mask[z_index + center_z] start_x = int(start_x) start_y = int(start_y) view_size = int(view_size) img_roi = img[start_y:start_y + view_size, start_x:start_x + view_size] if img_roi.sum() > 255: # more than 1 pixel of mask. nodule_in_mask = True if not nodule_in_mask: if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score else: if center_z < 30: if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True) return df_nodule_predictions
def make_annotation_images_lidc(): src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace(".", "xpointx").replace("_", "xunderscorex") cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def get_48X48_cube(patient_name, nodule_info, origin, plot=False): images = helpers.load_patient_images(patient_name, traindata_path, "*_i.png") #z,y,x mask_images = np.zeros(images.shape) lung_cube_py = [] nodule_cube_mask_py = [] negative_cube_py = [] #step1:create image mask for index, row in nodule_info.iterrows(): node_x = abs(int(round(row["coordX"] - origin[0]))) #need abs node_y = abs(int(round(row["coordY"] - origin[1]))) node_z = abs(int(round(row["coordZ"] - origin[2]))) image_coord = np.array([node_z, node_y, node_x]) radius = int(round(row["diameter_mm"] / 2 + 1)) for z in np.arange(-radius, radius + 1): for y in np.arange(-radius, radius + 1): for x in np.arange(-radius, radius + 1): coord = np.array([z + node_z, y + node_y, x + node_x]) if (np.linalg.norm(coord - image_coord)) < radius: mask_images[z + node_z, y + node_y, x + node_x] = int(1) #os.mkdir(LUNG_48X48_IMAGE_PATH + patient_name+ '/') #os.mkdir(NODULE_48X48_MASK_PATH + patient_name+ '/') #os.mkdir(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/') #step2: get pos sample nodule for index, row in nodule_info.iterrows(): node_x = abs(int(round(row["coordX"] - origin[0]))) node_y = abs(int(round(row["coordY"] - origin[1]))) node_z = abs(int(round(row["coordZ"] - origin[2]))) radius = int(round(row["diameter_mm"] / 2 + 1)) num_per_nodule = int( 3 * math.sqrt(row["diameter_mm"] + 100)) # this param can be tuning print(patient_name, "num_per_nodule:", num_per_nodule) for j in range(num_per_nodule): #noting:unet pos sample get lung_cube, lung_cube_mask = get_lung_cube(mask_images, images, node_x, node_y, node_z, radius) if plot: for i in range(lung_cube.shape[0]): #noting me: rjust(4,0) is important, otherwise files sequence is wrong cv2.imwrite( LUNG_48X48_IMAGE_PATH + patient_name + "/" + "img_" + str(index * 3 + j) + "_" + str(i).rjust(4, '0') + "_i.png", lung_cube[i]) cv2.imwrite( NODULE_48X48_MASK_PATH + patient_name + "/" + "img_" + str(index * 3 + j) + "_" + str(i).rjust(4, '0') + "_i.png", lung_cube_mask[i] * 255) if lung_cube.sum() > 2000: #lung_cube pixel value: 0~255 lung_cube_py.append(lung_cube) nodule_cube_mask_py.append(lung_cube_mask) #step3: get negative sample nodule print(patient_name, "pos+ nodule num:", len(lung_cube_py)) lung_mask = helpers.load_patient_images(patient_name, traindata_path, "*_m.png") #z,y,x lung_mask_shape = lung_mask.shape #z,y,x for i in range(len(lung_cube_py)): ok = False while (ok == False): #get lung mask edge x,y,z coord_z = int( np.random.normal(lung_mask_shape[0] / 2, lung_mask_shape[0] / 6)) coord_z = max(coord_z, 0) coord_z = min(coord_z, lung_mask_shape[0] - 1) candidate_map = lung_mask[coord_z] candidate_map = cv2.Canny(candidate_map.copy(), 100, 200) non_zero_indices = np.nonzero(candidate_map) if len(non_zero_indices[0]) == 0: continue nonzero_index = np.random.randint(0, len(non_zero_indices[0]) - 1) coord_y = non_zero_indices[0][nonzero_index] if coord_y > lung_mask_shape[1] * 0.85: continue coord_x = non_zero_indices[1][nonzero_index] real_candidate = True #xyz has enough distance to nodule for index, row in nodule_info.iterrows(): node_x = abs(int(round(row["coordX"] - origin[0]))) node_y = abs(int(round(row["coordY"] - origin[1]))) node_z = abs(int(round(row["coordZ"] - origin[2]))) image_coord = np.array([node_x, node_y, node_z]) radius = int(round(row["diameter_mm"] / 2 + 1)) if coord_x != node_x: coord_x = np.random.randint(min(coord_x, node_x), max(coord_x, node_x)) if coord_y != node_y: coord_y = np.random.randint(min(node_y, coord_y), max(node_y, coord_y)) if coord_z != node_z: coord_z = np.random.randint(min(node_z, coord_z), max(node_z, coord_z)) coord = np.array([coord_x, coord_y, coord_z]) #随机获取的候选负样本 要保证其中心和正样本中心距离大于 radius+24 #其中radius为正样本的半径 24为立方体边长的一半 #其实也就是保证负样本与正样本不会有重合的部分 if (np.linalg.norm(coord - image_coord) < radius + 24): real_candidate = False break else: real_candidate = True if real_candidate: start_x = max(coord_x - 24, 0) #coordx is we will find negative sample start_y = max(coord_y - 24, 0) start_z = max(coord_z - 24, 0) if (coord_x + 24 > lung_mask.shape[2] - 1): start_x = lung_mask.shape[2] - 48 if (coord_y + 24 > lung_mask.shape[1] - 1): start_y = lung_mask.shape[1] - 48 if (coord_z + 24 > lung_mask.shape[0] - 1): start_z = lung_mask.shape[0] - 48 if (lung_mask[start_z:start_z + 48, start_y:start_y + 48, start_x:start_x + 48].sum() > 2000): #we should guarantee the neg 48*48*48 cube is in lung mask neg_candidate = images[start_z:start_z + 48, start_y:start_y + 48, start_x:start_x + 48] assert (neg_candidate.shape == (48, 48, 48)) if plot: for j in range(len(neg_candidate)): cv2.imwrite( LUNG_48X48_IMAGE_NEG_PATH + patient_name + '/' + "img_" + str(i).rjust(4, '0') + '_' + str(j) + ".png", neg_candidate[j]) negative_cube_py.append(neg_candidate) ok = True assert (len(lung_cube_py) == len(negative_cube_py)) return lung_cube_py, nodule_cube_mask_py, negative_cube_py
def view_image(patient): p = helpers.load_patient_images(patient_ids[patient], src_dir, "*_i.png") #plt.imshow(p[0], cmap=plt.cm.gray_r, interpolation='nearest') #plt.show() slice_images(patient, p)
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2): if luna16: dst_dir = settings.LUNA_NODULE_DETECTION_DIR else: dst_dir = settings.NDSB3_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" # if holdout_no is not None: # holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else "" flip_ext = "" if flip: flip_ext = "_flip" dst_dir += "predictions" + str(int( magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) if not luna16: if train_data: labels_df = pandas.read_csv("resources/stage1_labels.csv") labels_df.set_index(["id"], inplace=True) else: #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv") labels_df = pandas.read_csv("resources/tc_sample_submission.csv") labels_df.set_index(["id"], inplace=True) patient_ids = [] for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR): if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name): continue patient_ids.append(file_name) all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if not luna16: if patient_id not in labels_df.index: continue if "metadata" in patient_id: continue if only_patient_id is not None and only_patient_id != patient_id: continue if holdout_no is not None and train_data: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count if patient_fold != holdout_no: continue print(patient_index, ": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" if continue_job and only_patient_id is None: if os.path.exists(csv_target_path): continue patient_img = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images( patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round( p_z / patient_img.shape[0], 4) p_y_perc = round( p_y / patient_img.shape[1], 4) p_x_perc = round( p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv_line = [ annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm, p_x, p_y, p_z ] #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm", "abs_x", "abs_y", "abs_z" ]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def get_48X48_cube(patient_name, cand_df, origin_df, plot=False): #病人的CT扫描图像是一个立体的结构 相当于对肺部做了多个切面 每个切面都是一幅图像 #从对应病人ID号的文件夹中读取CT扫描图像 包含多行 #这里的images做了reshape 具体的形状待考证 images = helpers.load_patient_images(patient_name, traindata_path, "*_i.png") #z,y,x #掩模 大小与原始CT扫描图像相同 mask_images = np.zeros(images.shape) lung_cube_py = [] nodule_cube_mask_py = [] negative_cube_py = [] #step1:create image mask #创建图像掩模 for index, row in origin_df.iterrows(): node_x = int(row["coordX"]) #need abs node_y = int(row["coordY"]) node_z = int(row["coordZ"]) image_coord = np.array([node_z, node_y, node_x]) #某一个结节的中心点 radius = int(round(row["diameter_mm"] / 2 + 1)) #某一个结节的半径 #对以image_coord为球心 radius为半径的区域内所有点 填充1 形成掩模 for z in np.arange(-radius, radius + 1): for y in np.arange(-radius, radius + 1): for x in np.arange(-radius, radius + 1): coord = np.array([z + node_z, y + node_y, x + node_x]) if (np.linalg.norm(coord - image_coord)) < radius: mask_images[z + node_z, y + node_y, x + node_x] = int(1) #if not os.path.exists(LUNG_48X48_IMAGE_PATH + patient_name+ '/'): #os.mkdir(LUNG_48X48_IMAGE_PATH + patient_name+ '/') #if not os.path.exists(NODULE_48X48_MASK_PATH + patient_name+ '/'): #os.mkdir(NODULE_48X48_MASK_PATH + patient_name+ '/') #if not os.path.exists(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/'): #os.mkdir(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/') #step2: get pos/neg sample nodule for index, row in origin_df.iterrows(): node_x = int(row["coordX"]) #need abs node_y = int(row["coordY"]) node_z = int(row["coordZ"]) radius = row["diameter_mm"] / 2 #对每个结节 设置生成正样本的个数 num_per_nodule = int( TIMER * math.sqrt(row["diameter_mm"] + 100)) # this param can be tuning print(patient_name, "num_per_nodule:", num_per_nodule) for j in range(num_per_nodule): #noting:unet pos sample get #生成结节的正样本 得到正样本图像和对应的掩模 #有几个需要注意的点: #1.生成正样本,其本身就是相当于数据增强的过程,在ration控制的范围内扰动球心,然后要检查即将生成的cube是否会超出图像边缘限制 #2.生成的结节图像和掩模大小形状都是立方体,这也解释了根号3的由来? #3.最后还有一个数据增强的过程 水平垂直翻转 旋转等 lung_cube, lung_cube_mask = helpers.get_lung_cube( mask_images, images, node_x, node_y, node_z, radius, radius / np.sqrt(3)) if plot: for i in range(lung_cube.shape[0]): #noting me: rjust(4,0) is important, otherwise files sequence is wrong cv2.imwrite( LUNG_48X48_IMAGE_PATH + patient_name + "/" + "img_" + str(index * 3 + j) + "_" + str(i).rjust(4, '0') + "_i.png", lung_cube[i]) cv2.imwrite( NODULE_48X48_MASK_PATH + patient_name + "/" + "img_" + str(index * 3 + j) + "_" + str(i).rjust(4, '0') + "_i.png", lung_cube_mask[i] * 255) #这一步是保证正样本中的结节大小不至于过小? #忽略一些过小的结节? if lung_cube.sum() > 2000: #lung_cube pixel value: 0~255 lung_cube_py.append(lung_cube) nodule_cube_mask_py.append(lung_cube_mask) #for j in range(num_per_nodule // 2): ##noting:unet neg sample get #lung_cube, lung_cube_mask = helpers.get_lung_cube(mask_images, images, node_x,node_y,node_z,radius, 0, radius/np.sqrt(3)+1, radius + 3,type= 1) #if lung_cube.sum() > 2000:#lung_cube pixel value: 0~255 #negative_cube_py.append(lung_cube) #if plot: #for i in range(len(lung_cube)): #cv2.imwrite(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/' + "imgpos-_" + str(index*3+j) + "_"+ str(i).rjust(4, '0') +".png", lung_cube[i]) #cv2.imwrite(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/' + "imgpos-_" + str(index*3+j) + "_"+ str(i).rjust(4, '0') +"_m.png", lung_cube_mask[i]*255) #step3: get negative sample nodule #获取结节负样本 就是不包含结节的区域 print(patient_name, "pos+ nodule num:", len(lung_cube_py)) lung_mask = helpers.load_patient_images(patient_name, traindata_path, "*_m.png") #z,y,x lung_mask_shape = lung_mask.shape #z,y,x for index, row in cand_df.iterrows(): #候选负样本球心 cand_orgin = np.array([row['coordx'], row['coordy'], row['coordz']]) x = int(round(cand_orgin[0])) y = int(round(cand_orgin[1])) z = int(round(cand_orgin[2])) #候选负样本球心如果处于某个正样本的范围内 #说明该负样本不符合要求,直接pass掉 cand_flag = 1 for orgin_index, origin_row in origin_df.iterrows(): radius = origin_row['diameter_mm'] / 2 origin = np.array([ origin_row['coordX'], origin_row['coordY'], origin_row['coordZ'] ]) if np.linalg.norm(origin - cand_orgin) < radius: cand_flag = 0 break #若cand_flag为1 说明候选负样本的球心不在任何结节范围内 #可以生成相应的负样本 负样本生成以及数据增强过程与生成正样本的过程一致 #负样本生成数量为num_per_noule//4 统一设置负样本的半径为5 if cand_flag: for j in range(num_per_nodule // 4): lung_cube, _ = helpers.get_lung_cube(mask_images, images, x, y, z, 5, 5 / np.sqrt(3)) negative_cube_py.append(lung_cube) if plot: for z in range(len(lung_cube)): cv2.imwrite( LUNG_48X48_IMAGE_NEG_PATH + patient_name + '/' + "img_" + str(index).rjust(4, '0') + '_' + str(j) + ".png", lung_cube[z]) print('lung_cube_py', len(lung_cube_py), 'negative_cube_py', len(negative_cube_py)) return lung_cube_py, nodule_cube_mask_py, negative_cube_py
def predict(csv_file_path): patient_id = os.path.basename(csv_file_path).replace("_candidate.csv", "") print(patient_id + 'start predict ...') # read img according .mhd file itk_img = sitk.ReadImage(images_raw_mhd + patient_id + '.mhd') #img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering) #num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = np.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) spacing = np.array(itk_img.GetSpacing()) # x,y,z spacing direction = np.array(itk_img.GetDirection()) #get img shape(N, y, x) ,noting this img is real shape. patient_img = helpers.load_patient_images(patient_id, images_path, "*_i.png", []) patient_img_mask = helpers.load_patient_images(patient_id, images_path, "*_m.png", []) patient_unet_csv = pd.read_csv(csv_file_path) #coordz,coordy,coordx predict_csv = [] sub_csv = [] if patient_unet_csv is None: print(patient_id + "has no candidate") for candidate_idx, candidate_zyx in patient_unet_csv.iterrows(): coord_z = candidate_zyx["coordz"] coord_y = candidate_zyx["coordy"] coord_x = candidate_zyx["coordx"] if coord_y >= patient_img.shape[0] * 0.85: continue coord_z = round(coord_z, 4) #real z coord_y = round(coord_y, 4) #real y coord_x = round(coord_x, 4) #real x coord_z_debug = coord_z / spacing[2] coord_y_debug = coord_y / spacing[1] coord_x_debug = coord_x / spacing[0] submission_x = (direction[0] * coord_x + origin[0]) / direction[0] submission_y = (direction[4] * coord_y + origin[1]) / direction[4] submission_z = coord_z + origin[2] #modify x,y,z to prevent outsize start_z, start_y, start_x = modify_yxz(coord_z, coord_y, coord_x, patient_img.shape, cube_size) #get patient cube img cube_img = patient_img[start_z:start_z + cube_size, start_y:start_y + cube_size, start_x:start_x + cube_size] cube_img_mask = patient_img_mask[start_z:start_z + cube_size, start_y:start_y + cube_size, start_x:start_x + cube_size] if cube_img_mask.sum() < 2000: continue img_prep = prepare_image_for_net3D(cube_img) p = model.predict(img_prep) nodule_chance = p[0][0] predict_csv.append( [patient_id, coord_x, coord_y, coord_z, nodule_chance]) sub_csv.append([ patient_id, submission_x, submission_y, submission_z, nodule_chance ]) print(patient_id + 'predict over...') return predict_csv, sub_csv
def make_predicted_luna_nodules(): src_dir = settings.TEST_NODULE_DETECTION_DIR + 'predictions10_luna16_fs/' pos_labels_dir = settings.TEST_NODULE_DETECTION_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) df_nodule_predictions = filter_patient_nodules_predictions( df_nodule_predictions, patient_id, CUBE_SIZE, luna16=False) patient_imgs = helpers.load_patient_images( patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_m.png") # patient_space=pandas.read_csv('../DSB2017/data/ndsb3_extracted_images/patient_spacing.csv') # p_pace = patient_space.loc[patient_space.patient_id==patient_id] df_nodule_predictions.sort_values(by='nodule_chance', ascending=False, inplace=True) df_nodule_predictions_copy = df_nodule_predictions.copy() for nod_pred_index, nod_pred_row in df_nodule_predictions_copy.iterrows( ): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) candidate_diameter = 6 for index, row in df_nodule_predictions.iterrows(): x, y, z = helpers.percentage_to_pixels(row["coord_x"], row["coord_y"], row["coord_z"], patient_imgs) dist = math.sqrt( math.pow(nx - x, 2) + math.pow(ny - y, 2) + math.pow(nz - z, 2)) if dist < (candidate_diameter + 48) and dist > 1: # make sure we have a big margin ok = False print("# Too close") mal_score = row["diameter_mm"] if nod_pred_row['nodule_chance'] > row['nodule_chance']: if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score continue # for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): # if nod_pred_row["diameter_mm"] < 0: # continue # nx, ny, nz = helpers.percentage_to_orig(nod_pred_row["coord_x"],p_pace['spacing_x'], nod_pred_row["coord_y"],p_pace['spacing_y'], nod_pred_row["coord_z"],p_pace['spacing_z'], patient_imgs) # df_nodule_predictions.loc[nod_pred_index, "coord_x"] = nx # df_nodule_predictions.loc[nod_pred_index, "coord_y"] = ny # df_nodule_predictions.loc[nod_pred_index, "coord_z"] = nz # # diam_mm = nod_pred_row["diameter_mm"] # df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_1.csv", index=False) df_nodule_predictions = df_nodule_predictions[ df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions = df_nodule_predictions[ df_nodule_predictions["nodule_chance"] >= 0.9] # df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["nodule_chance"] >= 0.9] del df_nodule_predictions['diameter'] del df_nodule_predictions['diameter_mm'] del df_nodule_predictions['anno_index'] df_nodule_predictions['seriesuid'] = patient_id df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates.csv", index=False) total_false_pos += len(df_nodule_predictions) print("Total false pos:", total_false_pos)
def get_patient_xyz_do(src_path, patient_id, f_path): df_node = pandas.read_csv(f_path) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) df_patient = df_node[df_node["seriesuid"] == patient_id] print("Annos: ", len(df_patient)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array( itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array( itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 patient_imgs = helpers.load_patient_images( patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_i.png") pos_annos = [] df_patient = df_node[df_node["seriesuid"] == patient_id] anno_index = 0 for index, annotation in df_patient.iterrows(): node_percent = numpy.array( [annotation["coordX"], annotation["coordY"], annotation["coordZ"]]) node_scaled = node_percent * (patient_imgs.swapaxes(0, 2).shape) node_float = (node_scaled * settings.TARGET_VOXEL_MM) + origin node_x = node_float[0] if flip_direction_x: node_x *= -1 node_y = node_float[1] if flip_direction_y: node_y *= -1 node_z = node_float[2] print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2))) pos_annos.append([ patient_id, round(node_x, 4), round(node_y, 4), round(node_z, 4), annotation['probability'] ]) anno_index += 1 # df_annos = pandas.DataFrame(pos_annos, columns=["patient_id", "coord_x", "coord_y", "coord_z","probability"]) # df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv", index=False) return pos_annos
def predict_cubes(path, model_path, magnification=1, holdout_no=-1, ext_name="", fold_count=2): dst_dir = settings.LUNA_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" dst_dir += "predictions" + str(int( magnification * 10)) + holdout_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) patient_id = path all_predictions_csv = [] if holdout_no is not None: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count print(": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" print(patient_id) try: patient_img = helpers.load_patient_images(patient_id + '_Preprocessed', '', "*_i.png", []) except: print('Please Re-Process the dicom file again') if magnification != 1: patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed', '', "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round(p_z / patient_img.shape[0], 4) p_y_perc = round(p_y / patient_img.shape[1], 4) p_x_perc = round(p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [ annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm ] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm" ]) print("Started Filtering") print(all_predictions_csv) #print(batch_data) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def process_luna_candidates_patient(src_path, patient_id): dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" img_dir = dst_dir + patient_id + "/" df_pos_annos = pandas.read_csv("../../data/csv/train/annotations.csv") if not os.path.exists(dst_dir): os.mkdir(dst_dir) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) #print("Img array: ", img_array.shape) #print("Pos annos: ", len(df_pos_annos)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) #print("Origin (x,y,z): ", origin) spacing = numpy.array( itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) #print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM #print("Rescale: ", rescale) direction = numpy.array( itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) #print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True #print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True #print("Swappint y origin") #print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 src_df = pandas.read_csv("../../data/csv/train/annotations.csv") src_df = src_df[src_df["seriesuid"] == patient_id] #src_df = src_df[src_df["class"] == 0] patient_imgs = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") candidate_list = [] for df_index, candiate_row in src_df.iterrows(): node_x = candiate_row["coordX"] if flip_direction_x: node_x *= -1 node_y = candiate_row["coordY"] if flip_direction_y: node_y *= -1 node_z = candiate_row["coordZ"] candidate_diameter = candiate_row["diameter_mm"] # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float - origin) / spacing) # center_int = numpy.rint((center_float - origin) ) # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes( 0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) coord_x = center_float_rescaled[0] coord_y = center_float_rescaled[1] coord_z = center_float_rescaled[2] candidate_list.append([ len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0 ]) df_candidates = pandas.DataFrame(candidate_list, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore" ]) df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
def generate_fp_npy(fp_df_all, annotation): negative_cube_py = [] uids = sorted(fp_df_all['seriesuid'].unique()) print('uid num:', len(uids)) for id in tqdm(uids): print(id) images = helpers.load_patient_images(id, traindata_path, "*_i.png") #z,y,x fp_df = fp_df_all[fp_df_all['seriesuid'] == id] #origin_df = annotation[annotation['seriesuid'] == id] for index, row in fp_df.iterrows(): # false positive if row['class'] == 0: for _ in range(num_per_nodule): x = int(row['coordX']) y = int(row['coordY']) z = int(row['coordZ']) lung_cube = helpers.get_lung_cube(None, images, x, y, z, fp_radius, fp_radius / np.sqrt(3)) negative_cube_py.append(lung_cube) #fp_orgin = np.array([row['coordX'], row['coordY'], row['coordZ']]) #x = int(round(fp_orgin[0])) #y = int(round(fp_orgin[1])) #z = int(round(fp_orgin[2])) #fp_flag = 1 #for orgin_index, origin_row in origin_df.iterrows():#really positive #radius = origin_row['diameter_mm'] / 2 #origin = np.array([origin_row['coordX'], origin_row['coordY'], origin_row['coordZ']]) #if np.linalg.norm(origin - fp_orgin) < radius: #print(fp_orgin) #fp_flag = 0 #break #if fp_flag: #for _ in range(num_per_nodule): #lung_cube = helpers.get_lung_cube(None, images, x,y,z,fp_radius, fp_radius/np.sqrt(3)) #negative_cube_py.append(lung_cube) negative_cube_py = np.array(negative_cube_py, dtype=np.uint8) #neg sample neg_shape = negative_cube_py.shape negative_cube_py = np.expand_dims(negative_cube_py, axis=-1) print(negative_cube_py.shape) #shuffle rand_ii = np.random.choice(range(neg_shape[0]), size=neg_shape[0], replace=False) negative_cube_py = negative_cube_py[rand_ii] #generate some litter sample if traindata_path == setting.LUNA_IMG: average_neg_index = np.array(np.linspace(0, neg_shape[0], 31), dtype=np.int) # 30000+ / 30 print(average_neg_index) for i in trange(30): start_neg = average_neg_index[i] end_neg = average_neg_index[i + 1] np.savez_compressed(false_positive_path + "trainImages_neg600_" + str(i).rjust(4, '0') + ".npz", arr_0=negative_cube_py[start_neg:end_neg]) else: np.savez_compressed(false_positive_path + "trainImages_neg_val.npz", arr_0=negative_cube_py)
def process_luna_candidates_patient(src_path, patient_id): dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" img_dir = dst_dir + patient_id + "/" df_pos_annos = pandas.read_csv("../../data/csv/train/annotations.csv") if not os.path.exists(dst_dir): os.mkdir(dst_dir) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) #print("Img array: ", img_array.shape) #print("Pos annos: ", len(df_pos_annos)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) #print("Origin (x,y,z): ", origin) spacing = numpy.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) #print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM #print("Rescale: ", rescale) direction = numpy.array(itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) #print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True #print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True #print("Swappint y origin") #print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 src_df = pandas.read_csv("../../data/csv/train/annotations.csv") src_df = src_df[src_df["seriesuid"] == patient_id] #src_df = src_df[src_df["class"] == 0] patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") candidate_list = [] for df_index, candiate_row in src_df.iterrows(): node_x = candiate_row["coordX"] if flip_direction_x: node_x *= -1 node_y = candiate_row["coordY"] if flip_direction_y: node_y *= -1 node_z = candiate_row["coordZ"] candidate_diameter = candiate_row["diameter_mm"] # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float-origin) / spacing) # center_int = numpy.rint((center_float - origin) ) # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) coord_x = center_float_rescaled[0] coord_y = center_float_rescaled[1] coord_z = center_float_rescaled[2] candidate_list.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0]) df_candidates = pandas.DataFrame(candidate_list, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"]) df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2): if luna16: dst_dir = settings.LUNA_NODULE_DETECTION_DIR else: dst_dir = settings.NDSB3_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" # if holdout_no is not None: # holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else "" flip_ext = "" if flip: flip_ext = "_flip" dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) if not luna16: if train_data: labels_df = pandas.read_csv("resources/stage1_labels.csv") labels_df.set_index(["id"], inplace=True) else: labels_df = pandas.read_csv("resources/stage2_sample_submission.csv") labels_df.set_index(["id"], inplace=True) patient_ids = [] for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR): if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name): continue patient_ids.append(file_name) all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if not luna16: if patient_id not in labels_df.index: continue if "metadata" in patient_id: continue if only_patient_id is not None and only_patient_id != patient_id: continue if holdout_no is not None and train_data: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count if patient_fold != holdout_no: continue print(patient_index, ": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" if continue_job and only_patient_id is None: if os.path.exists(csv_target_path): continue patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE] cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round(p_z / patient_img.shape[0], 4) p_y_perc = round(p_y / patient_img.shape[1], 4) p_x_perc = round(p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv.append(patient_predictions_csv_line) all_predictions_csv.append([patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def process_luna_candidates_patient(src_path, patient_id): dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/" img_dir = dst_dir + patient_id + "/" df_pos_annos = pandas.read_csv(dst_dir + patient_id + "_annos_pos_lidc.csv") if not os.path.exists(dst_dir): os.mkdir(dst_dir) pos_annos_manual = None manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) print("Pos annos: ", len(df_pos_annos)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array(itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 src_df = pandas.read_csv("resources/luna16_annotations/" + "candidates_V2.csv") src_df = src_df[src_df["seriesuid"] == patient_id] src_df = src_df[src_df["class"] == 0] patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") candidate_list = [] for df_index, candiate_row in src_df.iterrows(): node_x = candiate_row["coordX"] if flip_direction_x: node_x *= -1 node_y = candiate_row["coordY"] if flip_direction_y: node_y *= -1 node_z = candiate_row["coordZ"] candidate_diameter = 6 # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float-origin) / spacing) # center_int = numpy.rint((center_float - origin) ) # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) coord_x = center_float_rescaled[0] coord_y = center_float_rescaled[1] coord_z = center_float_rescaled[2] ok = True for index, row in df_pos_annos.iterrows(): pos_coord_x = row["coord_x"] * patient_imgs.shape[2] pos_coord_y = row["coord_y"] * patient_imgs.shape[1] pos_coord_z = row["coord_z"] * patient_imgs.shape[0] diameter = row["diameter"] * patient_imgs.shape[2] dist = math.sqrt(math.pow(pos_coord_x - coord_x, 2) + math.pow(pos_coord_y - coord_y, 2) + math.pow(pos_coord_z - coord_z, 2)) if dist < (diameter + 64): # make sure we have a big margin ok = False print("################### Too close", (coord_x, coord_y, coord_z)) break if pos_annos_manual is not None and ok: for index, row in pos_annos_manual.iterrows(): pos_coord_x = row["x"] * patient_imgs.shape[2] pos_coord_y = row["y"] * patient_imgs.shape[1] pos_coord_z = row["z"] * patient_imgs.shape[0] diameter = row["d"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 72): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if not ok: continue candidate_list.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0]) df_candidates = pandas.DataFrame(candidate_list, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"]) df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
def predict_cubes(patient_ids, z0, model_path, magnification=1, flip=False, holdout_no=-1, ext_name="", fold_count=2): sw = helpers.Stopwatch.start_new() all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if "metadata" in patient_id: continue if "labels" in patient_id: continue patient_img = helpers.load_patient_images(patient_id, LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images( patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id, LUNA16_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] annotation_index = 0 if z0 < 0: z0 = 0 z1 = predict_volume_shape[0] else: z1 = z0 + 1 for z in range(z0, z1): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round( float(p_z) / patient_img.shape[0], 4) p_y_perc = round( float(p_y) / patient_img.shape[1], 4) p_x_perc = round( float(p_x) / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [ annotation_index, p_x, p_y, p_z, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm ] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "ax", "ay", "az", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm" ]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) return df
def process_excluded_annotations_patient(src_path, patient_id): df_node = pandas.read_csv( "resources/luna16_annotations/annotations_excluded.csv") dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_dir = dst_dir + patient_id + "/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) # pos_annos_df = pandas.read_csv(TRAIN_DIR + "metadata/" + patient_id + "_annos_pos_lidc.csv") pos_annos_df = pandas.read_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv") pos_annos_manual = None manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) dmm = pos_annos_manual["dmm"] # check itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) df_patient = df_node[df_node["seriesuid"] == patient_id] print("Annos: ", len(df_patient)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array( itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array( itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 patient_imgs = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") neg_annos = [] df_patient = df_node[df_node["seriesuid"] == patient_id] anno_index = 0 for index, annotation in df_patient.iterrows(): node_x = annotation["coordX"] if flip_direction_x: node_x *= -1 node_y = annotation["coordY"] if flip_direction_y: node_y *= -1 node_z = annotation["coordZ"] center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float - origin) / spacing) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes( 0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) diameter_pixels = 6 / settings.TARGET_VOXEL_MM diameter_percent = diameter_pixels / float(patient_imgs.shape[1]) ok = True for index, row in pos_annos_df.iterrows(): pos_coord_x = row["coord_x"] * patient_imgs.shape[2] pos_coord_y = row["coord_y"] * patient_imgs.shape[1] pos_coord_z = row["coord_z"] * patient_imgs.shape[0] diameter = row["diameter"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt( math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 64): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if pos_annos_manual is not None and ok: for index, row in pos_annos_manual.iterrows(): pos_coord_x = row["x"] * patient_imgs.shape[2] pos_coord_y = row["y"] * patient_imgs.shape[1] pos_coord_z = row["z"] * patient_imgs.shape[0] diameter = row["d"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt( math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 72): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if not ok: continue neg_annos.append([ anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1 ]) anno_index += 1 df_annos = pandas.DataFrame(neg_annos, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore" ]) df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_excluded.csv", index=False) return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_pos_annotations_patient(src_path, patient_id): df_node = pandas.read_csv("resources/luna16_annotations/annotations.csv") dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_dir = dst_dir + patient_id + "/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) df_patient = df_node[df_node["seriesuid"] == patient_id] print("Annos: ", len(df_patient)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array( itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array( itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 patient_imgs = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") pos_annos = [] df_patient = df_node[df_node["seriesuid"] == patient_id] anno_index = 0 for index, annotation in df_patient.iterrows(): node_x = annotation["coordX"] if flip_direction_x: node_x *= -1 node_y = annotation["coordY"] if flip_direction_y: node_y *= -1 node_z = annotation["coordZ"] diam_mm = annotation["diameter_mm"] print("Node org (x,y,z,diam): ", (round(node_x, 2), round( node_y, 2), round(node_z, 2), round(diam_mm, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float - origin) / spacing) # center_int = numpy.rint((center_float - origin) ) print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes( 0, 2).shape # center_int = numpy.rint((center_float - origin) ) print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) diameter_pixels = diam_mm / settings.TARGET_VOXEL_MM diameter_percent = diameter_pixels / float(patient_imgs.shape[1]) pos_annos.append([ anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1 ]) anno_index += 1 df_annos = pandas.DataFrame(pos_annos, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore" ]) df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv", index=False) return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_pos_annotations_patient(src_path, patient_id): df_node = pandas.read_csv("resources/luna16_annotations/annotations.csv") dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_dir = dst_dir + patient_id + "/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) df_patient = df_node[df_node["seriesuid"] == patient_id] print("Annos: ", len(df_patient)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing /settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array(itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") pos_annos = [] df_patient = df_node[df_node["seriesuid"] == patient_id] anno_index = 0 for index, annotation in df_patient.iterrows(): node_x = annotation["coordX"] if flip_direction_x: node_x *= -1 node_y = annotation["coordY"] if flip_direction_y: node_y *= -1 node_z = annotation["coordZ"] diam_mm = annotation["diameter_mm"] print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(diam_mm, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float-origin) / spacing) # center_int = numpy.rint((center_float - origin) ) print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape # center_int = numpy.rint((center_float - origin) ) print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) diameter_pixels = diam_mm / settings.TARGET_VOXEL_MM diameter_percent = diameter_pixels / float(patient_imgs.shape[1]) pos_annos.append([anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1]) anno_index += 1 df_annos = pandas.DataFrame(pos_annos, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"]) df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv", index=False) return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_luna_candidates_patient(src_path, patient_id): dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/" img_dir = dst_dir + patient_id + "/" df_pos_annos = pandas.read_csv(dst_dir + patient_id + "_annos_pos_lidc.csv") if not os.path.exists(dst_dir): os.mkdir(dst_dir) pos_annos_manual = None manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) print("Pos annos: ", len(df_pos_annos)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array( itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array( itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array( itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 src_df = pandas.read_csv("resources/luna16_annotations/" + "candidates_V2.csv") src_df = src_df[src_df["seriesuid"] == patient_id] src_df = src_df[src_df["class"] == 0] patient_imgs = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") candidate_list = [] for df_index, candiate_row in src_df.iterrows(): node_x = candiate_row["coordX"] if flip_direction_x: node_x *= -1 node_y = candiate_row["coordY"] if flip_direction_y: node_y *= -1 node_z = candiate_row["coordZ"] candidate_diameter = 6 # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2))) center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float - origin) / spacing) # center_int = numpy.rint((center_float - origin) ) # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2])) # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes( 0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) coord_x = center_float_rescaled[0] coord_y = center_float_rescaled[1] coord_z = center_float_rescaled[2] ok = True for index, row in df_pos_annos.iterrows(): pos_coord_x = row["coord_x"] * patient_imgs.shape[2] pos_coord_y = row["coord_y"] * patient_imgs.shape[1] pos_coord_z = row["coord_z"] * patient_imgs.shape[0] diameter = row["diameter"] * patient_imgs.shape[2] dist = math.sqrt( math.pow(pos_coord_x - coord_x, 2) + math.pow(pos_coord_y - coord_y, 2) + math.pow(pos_coord_z - coord_z, 2)) if dist < (diameter + 64): # make sure we have a big margin ok = False print("################### Too close", (coord_x, coord_y, coord_z)) break if pos_annos_manual is not None and ok: for index, row in pos_annos_manual.iterrows(): pos_coord_x = row["x"] * patient_imgs.shape[2] pos_coord_y = row["y"] * patient_imgs.shape[1] pos_coord_z = row["z"] * patient_imgs.shape[0] diameter = row["d"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt( math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 72): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if not ok: continue candidate_list.append([ len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0 ]) df_candidates = pandas.DataFrame(candidate_list, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore" ]) df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
def process_excluded_annotations_patient(src_path, patient_id): df_node = pandas.read_csv("resources/luna16_annotations/annotations_excluded.csv") dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_dir = dst_dir + patient_id + "/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) # pos_annos_df = pandas.read_csv(TRAIN_DIR + "metadata/" + patient_id + "_annos_pos_lidc.csv") pos_annos_df = pandas.read_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv") pos_annos_manual = None manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) dmm = pos_annos_manual["dmm"] # check itk_img = SimpleITK.ReadImage(src_path) img_array = SimpleITK.GetArrayFromImage(itk_img) print("Img array: ", img_array.shape) df_patient = df_node[df_node["seriesuid"] == patient_id] print("Annos: ", len(df_patient)) num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane origin = numpy.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) print("Origin (x,y,z): ", origin) spacing = numpy.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) print("Spacing (x,y,z): ", spacing) rescale = spacing / settings.TARGET_VOXEL_MM print("Rescale: ", rescale) direction = numpy.array(itk_img.GetDirection()) # x,y,z Origin in world coordinates (mm) print("Direction: ", direction) flip_direction_x = False flip_direction_y = False if round(direction[0]) == -1: origin[0] *= -1 direction[0] = 1 flip_direction_x = True print("Swappint x origin") if round(direction[4]) == -1: origin[1] *= -1 direction[4] = 1 flip_direction_y = True print("Swappint y origin") print("Direction: ", direction) assert abs(sum(direction) - 3) < 0.01 patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png") neg_annos = [] df_patient = df_node[df_node["seriesuid"] == patient_id] anno_index = 0 for index, annotation in df_patient.iterrows(): node_x = annotation["coordX"] if flip_direction_x: node_x *= -1 node_y = annotation["coordY"] if flip_direction_y: node_y *= -1 node_z = annotation["coordZ"] center_float = numpy.array([node_x, node_y, node_z]) center_int = numpy.rint((center_float-origin) / spacing) center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape # center_int = numpy.rint((center_float - origin) ) # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2])) diameter_pixels = 6 / settings.TARGET_VOXEL_MM diameter_percent = diameter_pixels / float(patient_imgs.shape[1]) ok = True for index, row in pos_annos_df.iterrows(): pos_coord_x = row["coord_x"] * patient_imgs.shape[2] pos_coord_y = row["coord_y"] * patient_imgs.shape[1] pos_coord_z = row["coord_z"] * patient_imgs.shape[0] diameter = row["diameter"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 64): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if pos_annos_manual is not None and ok: for index, row in pos_annos_manual.iterrows(): pos_coord_x = row["x"] * patient_imgs.shape[2] pos_coord_y = row["y"] * patient_imgs.shape[1] pos_coord_z = row["z"] * patient_imgs.shape[0] diameter = row["d"] * patient_imgs.shape[2] print((pos_coord_x, pos_coord_y, pos_coord_z)) print(center_float_rescaled) dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2)) if dist < (diameter + 72): # make sure we have a big margin ok = False print("################### Too close", center_float_rescaled) break if not ok: continue neg_annos.append([anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1]) anno_index += 1 df_annos = pandas.DataFrame(neg_annos, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"]) df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_excluded.csv", index=False) return [patient_id, spacing[0], spacing[1], spacing[2]]