def make_pos_annotation_images(): src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/" dst_dir = settings.BASE_DIR_SSD + "luna16_train_cubes_pos/" for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "") # print(patient_id) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) diam_mm = int(row["diameter"] * images.shape[2]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual(): src_dir = "resources/luna16_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*_manual.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4" not in patient_id: continue print(patient_id) # if not "172845185165807139298420209778" in patient_id: # continue df_annos = pandas.read_csv(csv_file) # if len(df_annos) == 0: # continue try: images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") except: continue for index, row in df_annos.iterrows(): coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) diameter = int(row["d"] * images.shape[2]) node_type = int(row["id"]) malscore = int(diameter) malscore = min(25, malscore) malscore = max(16, malscore) anno_index = index cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual_ndsb3(): src_dir = "resources/ndsb3_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) train_label_df = pandas.read_csv("resources/stage1_labels.csv") train_label_df.set_index(["id"], inplace=True) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4.1" in patient_id: continue cancer_label = train_label_df.loc[patient_id]["cancer"] df_annos = pandas.read_csv(csv_file) # if len(df_annos) == 0: # continue try: images = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") except: continue anno_index = 0 for index, row in df_annos.iterrows(): pos_neg = "pos" if row["id"] == 0 else "neg" coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) malscore = int(round(row["dmm"])) anno_index += 1 cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue print(patient_id) assert malscore > 0 or pos_neg == "neg" save_cube_img( dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_annotation_images_lidc(): src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) # 結節の座標などを示すCSVを読み込む for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue # 患者のCT画像を読み込む images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): # 座標 coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) # 悪性腫瘍スコア malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace( ".", "xpointx").replace("_", "xunderscorex") # 画像リストから64x64x64のキューブを作るう cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue # ファイル名: 患者ID_注釈index_悪性腫瘍スコア^2_1_post.png save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_annotation_images_lidc(): #https://github.com/juliandewit/kaggle_ndsb2017/issues/2 #src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) #pdb.set_trace() for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace( ".", "xpointx").replace("_", "xunderscorex") cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ", (coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual(): src_dir = "resources/luna16_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*_manual.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4" not in patient_id: continue print(patient_id) # if not "172845185165807139298420209778" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) diameter = int(row["d"] * images.shape[2]) node_type = int(row["id"]) malscore = int(diameter) malscore = min(25, malscore) malscore = max(16, malscore) anno_index = index cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual_ndsb3(): src_dir = "resources/ndsb3_manual_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) train_label_df = pandas.read_csv("resources/stage1_labels.csv") train_label_df.set_index(["id"], inplace=True) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4.1" in patient_id: continue cancer_label = train_label_df.loc[patient_id]["cancer"] df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") anno_index = 0 for index, row in df_annos.iterrows(): pos_neg = "pos" if row["id"] == 0 else "neg" coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) malscore = int(round(row["dmm"])) anno_index += 1 cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue print(patient_id) assert malscore > 0 or pos_neg == "neg" save_cube_img(dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_annotation_images_lidc(): src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace(".", "xpointx").replace("_", "xunderscorex") cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: print(" ***** Skipping ", coord_x, coord_y, coord_z) continue if cube_img.mean() < 10: print(" ***** Suspicious ", coord_x, coord_y, coord_z) if cube_img.shape != (64, 64, 64): print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z)) continue save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])