def data_generator(batch_size, record_list, train_set): batch_idx = 0 means = [] random_state = numpy.random.RandomState(1301) while True: img_list = [] class_list = [] size_list = [] if train_set: random.shuffle(record_list) CROP_SIZE = CUBE_SIZE #逐一遍历所有数据 for record_idx, record_item in enumerate(record_list): class_label = record_item[1] size_label = record_item[2] #直径不管你训练不训练,它都是一个已知的数据,所以保留 #处理negative cube if class_label == 0: cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48) wiggle = 48 - CROP_SIZE - 1 indent_x = 0 indent_y = 0 indent_z = 0 if wiggle > 0: indent_x = random.randint(0, wiggle) indent_y = random.randint(0, wiggle) indent_z = random.randint(0, wiggle) #截取到crop_size大小的cube cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] #数据增强 if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) #处理positive cube else: cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64) if train_set: pass current_cube_size = cube_image.shape[0] indent_x = (current_cube_size - CROP_SIZE) / 2 indent_y = (current_cube_size - CROP_SIZE) / 2 indent_z = (current_cube_size - CROP_SIZE) / 2 indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) #截取到crop_size大小的cube cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) #数据增强 if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] #查看cube的均值,每100万个cube看一次 means.append(cube_image.mean()) if train_set: if len(means) % 1000000 == 0: print("Mean: ", sum(means) / len(means)) #3D卷积的正规化 32*32*32 img3d = prepare_image_for_net3D(cube_image) #添加数据 img_list.append(img3d) class_list.append(class_label) size_list.append(size_label) batch_idx += 1 if batch_idx >= batch_size: x = numpy.vstack(img_list) y_class = numpy.vstack(class_list) y_size = numpy.vstack(size_list) yield x, {"out_class": y_class, "out_malignancy": y_size} img_list = [] class_list = [] size_list = [] batch_idx = 0
def data_generator(batch_size, record_list, train_set): batch_idx = 0 means = [] random_state = numpy.random.RandomState(1301) while True: img_list = [] class_list = [] size_list = [] if train_set: random.shuffle(record_list) CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 for record_idx, record_item in enumerate(record_list): # rint patient_dir class_label = record_item[1] size_label = record_item[2] if class_label == 0: cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48) # if train_set: # # helpers.save_cube_img("c:/tmp/pre.png", cube_image, 8, 8) # cube_image = random_rotate_cube_img(cube_image, 0.99, -180, 180) # # if train_set: # if random.randint(0, 100) > 0.1: # # cube_image = numpy.flipud(cube_image) # cube_image = elastic_transform48(cube_image, 64, 8, random_state) wiggle = 48 - CROP_SIZE - 1 indent_x = 0 indent_y = 0 indent_z = 0 if wiggle > 0: indent_x = random.randint(0, wiggle) indent_y = random.randint(0, wiggle) indent_z = random.randint(0, wiggle) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2( cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) else: cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64) if train_set: pass current_cube_size = cube_image.shape[0] indent_x = (current_cube_size - CROP_SIZE) / 2 indent_y = (current_cube_size - CROP_SIZE) / 2 indent_z = (current_cube_size - CROP_SIZE) / 2 wiggle_indent = 0 wiggle = current_cube_size - CROP_SIZE - 1 if wiggle > (CROP_SIZE / 2): wiggle_indent = CROP_SIZE / 4 wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1 if train_set: indent_x = wiggle_indent + random.randint(0, wiggle) indent_y = wiggle_indent + random.randint(0, wiggle) indent_z = wiggle_indent + random.randint(0, wiggle) indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2( cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] means.append(cube_image.mean()) img3d = prepare_image_for_net3D(cube_image) if train_set: if len(means) % 1000000 == 0: print("Mean: ", sum(means) / len(means)) img_list.append(img3d) class_list.append(class_label) size_list.append(size_label) batch_idx += 1 if batch_idx >= batch_size: x = numpy.vstack(img_list) y_class = numpy.vstack(class_list) y_size = numpy.vstack(size_list) yield x, {"out_class": y_class, "out_malignancy": y_size} img_list = [] class_list = [] size_list = [] batch_idx = 0
def predict_cubes(patient_ids, z0, model_path, magnification=1, flip=False, holdout_no=-1, ext_name="", fold_count=2): sw = helpers.Stopwatch.start_new() all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if "metadata" in patient_id: continue if "labels" in patient_id: continue patient_img = helpers.load_patient_images(patient_id, LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images( patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id, LUNA16_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] annotation_index = 0 if z0 < 0: z0 = 0 z1 = predict_volume_shape[0] else: z1 = z0 + 1 for z in range(z0, z1): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round( float(p_z) / patient_img.shape[0], 4) p_y_perc = round( float(p_y) / patient_img.shape[1], 4) p_x_perc = round( float(p_x) / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [ annotation_index, p_x, p_y, p_z, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm ] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "ax", "ay", "az", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm" ]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) return df
def data_generator(test_files): img_list = [] # while True: CROP_SIZE = CUBE_SIZE for test_idx, test_item in enumerate(test_files): file_name = ntpath.basename(test_item) parts = file_name.split("_") pn = analysis_filename(file_name)[1] # logger.info("data_generator:file_name {0}".format(file_name)) # logger.info("===pn:{0}".format(pn)) if pn == "neg" and parts[ 0] != "ndsb3manual": # 除了ndsb3manual 其他neg都是6*8 """6*8 情形""" # logger.info("situation 6*8") cube_image = helpers.load_cube_img(test_item, 6, 8, 48) # logger.info("cube image: {0}".format(cube_image)) wiggle = 48 - CROP_SIZE - 1 indent_x = 0 indent_y = 0 indent_z = 0 if wiggle > 0: indent_x = random.randint(0, wiggle) indent_y = random.randint(0, wiggle) indent_z = random.randint(0, wiggle) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] # logger.info("cube_image with indent_x(random.randint(0,wiggle)): {0}".format(cube_image)) if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2( cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) else: # pos的都是8*8的 ndsb3manual的neg也是8*8的 """8*8 情形""" # logger.info("situation 8*8") cube_image = helpers.load_cube_img(test_item, 8, 8, 64) # logger.info("cube image: {0}".format(cube_image)) current_cube_size = cube_image.shape[0] wiggle_indent = 0 wiggle = current_cube_size - CROP_SIZE - 1 if wiggle > (CROP_SIZE / 2): wiggle_indent = CROP_SIZE / 4 wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1 indent_x = wiggle_indent + random.randint(0, wiggle) indent_y = wiggle_indent + random.randint(0, wiggle) indent_z = wiggle_indent + random.randint(0, wiggle) indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] # logger.info("cube_image with indent_x(random.randint(0,wiggle)):{0}".format(cube_image)) if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2( cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) img3d = prepare_image_for_net3D(cube_image) img_list.append((img3d, file_name)) return img_list
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2): if luna16: dst_dir = settings.LUNA_NODULE_DETECTION_DIR else: dst_dir = settings.NDSB3_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" # if holdout_no is not None: # holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else "" flip_ext = "" if flip: flip_ext = "_flip" dst_dir += "predictions" + str(int( magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) if not luna16: if train_data: labels_df = pandas.read_csv("resources/stage1_labels.csv") labels_df.set_index(["id"], inplace=True) else: #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv") labels_df = pandas.read_csv("resources/tc_sample_submission.csv") labels_df.set_index(["id"], inplace=True) patient_ids = [] for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR): if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name): continue patient_ids.append(file_name) all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if not luna16: if patient_id not in labels_df.index: continue if "metadata" in patient_id: continue if only_patient_id is not None and only_patient_id != patient_id: continue if holdout_no is not None and train_data: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count if patient_fold != holdout_no: continue print(patient_index, ": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" if continue_job and only_patient_id is None: if os.path.exists(csv_target_path): continue patient_img = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images( patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round( p_z / patient_img.shape[0], 4) p_y_perc = round( p_y / patient_img.shape[1], 4) p_x_perc = round( p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv_line = [ annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm, p_x, p_y, p_z ] #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm", "abs_x", "abs_y", "abs_z" ]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def data_generator(batch_size, record_list, train_set): batch_idx = 0 means = [] random_state = numpy.random.RandomState(1301) while True: img_list = [] class_list = [] size_list = [] if train_set: random.shuffle(record_list) CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 for record_idx, record_item in enumerate(record_list): #rint patient_dir class_label = record_item[1] size_label = record_item[2] if class_label == 0: cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48) # if train_set: # # helpers.save_cube_img("c:/tmp/pre.png", cube_image, 8, 8) # cube_image = random_rotate_cube_img(cube_image, 0.99, -180, 180) # # if train_set: # if random.randint(0, 100) > 0.1: # # cube_image = numpy.flipud(cube_image) # cube_image = elastic_transform48(cube_image, 64, 8, random_state) wiggle = 48 - CROP_SIZE - 1 indent_x = 0 indent_y = 0 indent_z = 0 if wiggle > 0: indent_x = random.randint(0, wiggle) indent_y = random.randint(0, wiggle) indent_z = random.randint(0, wiggle) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) else: cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64) if train_set: pass current_cube_size = cube_image.shape[0] indent_x = (current_cube_size - CROP_SIZE) / 2 indent_y = (current_cube_size - CROP_SIZE) / 2 indent_z = (current_cube_size - CROP_SIZE) / 2 wiggle_indent = 0 wiggle = current_cube_size - CROP_SIZE - 1 if wiggle > (CROP_SIZE / 2): wiggle_indent = CROP_SIZE / 4 wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1 if train_set: indent_x = wiggle_indent + random.randint(0, wiggle) indent_y = wiggle_indent + random.randint(0, wiggle) indent_z = wiggle_indent + random.randint(0, wiggle) indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] means.append(cube_image.mean()) img3d = prepare_image_for_net3D(cube_image) if train_set: if len(means) % 1000000 == 0: print("Mean: ", sum(means) / len(means)) img_list.append(img3d) class_list.append(class_label) size_list.append(size_label) batch_idx += 1 if batch_idx >= batch_size: x = numpy.vstack(img_list) y_class = numpy.vstack(class_list) y_size = numpy.vstack(size_list) yield x, {"out_class": y_class, "out_malignancy": y_size} img_list = [] class_list = [] size_list = [] batch_idx = 0
def predict_cubes(path, model_path, magnification=1, holdout_no=-1, ext_name="", fold_count=2): dst_dir = settings.LUNA_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" dst_dir += "predictions" + str(int( magnification * 10)) + holdout_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) patient_id = path all_predictions_csv = [] if holdout_no is not None: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count print(": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" print(patient_id) try: patient_img = helpers.load_patient_images(patient_id + '_Preprocessed', '', "*_i.png", []) except: print('Please Re-Process the dicom file again') if magnification != 1: patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed', '', "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2( cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round(p_z / patient_img.shape[0], 4) p_y_perc = round(p_y / patient_img.shape[1], 4) p_x_perc = round(p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round( 2 * step / patient_img.shape[2], 4) diameter_perc = round( diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [ annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm ] patient_predictions_csv.append( patient_predictions_csv_line) all_predictions_csv.append( [patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=[ "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm" ]) print("Started Filtering") print(all_predictions_csv) #print(batch_data) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2): if luna16: dst_dir = settings.LUNA_NODULE_DETECTION_DIR else: dst_dir = settings.NDSB3_NODULE_DETECTION_DIR if not os.path.exists(dst_dir): os.makedirs(dst_dir) holdout_ext = "" # if holdout_no is not None: # holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else "" flip_ext = "" if flip: flip_ext = "_flip" dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) sw = helpers.Stopwatch.start_new() model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path) if not luna16: if train_data: labels_df = pandas.read_csv("resources/stage1_labels.csv") labels_df.set_index(["id"], inplace=True) else: labels_df = pandas.read_csv("resources/stage2_sample_submission.csv") labels_df.set_index(["id"], inplace=True) patient_ids = [] for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR): if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name): continue patient_ids.append(file_name) all_predictions_csv = [] for patient_index, patient_id in enumerate(reversed(patient_ids)): if not luna16: if patient_id not in labels_df.index: continue if "metadata" in patient_id: continue if only_patient_id is not None and only_patient_id != patient_id: continue if holdout_no is not None and train_data: patient_fold = helpers.get_patient_fold(patient_id) patient_fold %= fold_count if patient_fold != holdout_no: continue print(patient_index, ": ", patient_id) csv_target_path = dst_dir + patient_id + ".csv" if continue_job and only_patient_id is None: if os.path.exists(csv_target_path): continue patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) # patient_img = patient_img[:, ::-1, :] # patient_mask = patient_mask[:, ::-1, :] step = PREDICT_STEP CROP_SIZE = CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) print("Predict volume shape: ", predict_volume.shape) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): #if cube_img is None: cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE] cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != CUBE_SIZE: cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > P_TH: p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 p_z_perc = round(p_z / patient_img.shape[0], 4) p_y_perc = round(p_y / patient_img.shape[1], 4) p_x_perc = round(p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(diameter_mm / patient_img.shape[2], 4) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv.append(patient_predictions_csv_line) all_predictions_csv.append([patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: print("Done: ", done_count, " skipped:", skipped_count) df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)] # df_features = pandas.DataFrame(patient_features_csv, columns=cols) # for index, row in df.iterrows(): # if row["diameter_mm"] < 0: # print("Dropping") # anno_index = row["anno_index"] # df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True) # # df_features.to_csv(csv_target_path_features, index=False) # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) # df.to_csv("c:/tmp/tmp2.csv", index=False) print(predict_volume.mean()) print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def data_generator(batch_size, record_list, train_set): batch_idx = 0 means = [] random_state = numpy.random.RandomState(1301) while True: img_list = [] subtlety_list = [] lobulation_list = [] internal_structure_list = [] calcification_list = [] texture_list = [] spiculation_list = [] margin_list = [] sphericiy_list = [] malignacy_list = [] diameter_list = [] if train_set: random.shuffle(record_list) CROP_SIZE = CUBE_SIZE #逐一遍历所有数据 for record_idx, record_item in enumerate(record_list): subtlety_label = record_item[10] lobulation_label = record_item[9] internal_structure_label = record_item[8] calcification_label = record_item[7] texture_label = record_item[6] spiculation_label = record_item[5] margin_label = record_item[4] sphericiy_label = record_item[3] malignacy_label = record_item[2] diameter_label = round(record_item[1], 4) #处理cube cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64) current_cube_size = cube_image.shape[0] indent_x = (current_cube_size - CROP_SIZE) / 2 indent_y = (current_cube_size - CROP_SIZE) / 2 indent_z = (current_cube_size - CROP_SIZE) / 2 #数据增强 wiggle_indent = CROP_SIZE / 4 wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1 if train_set: indent_x = wiggle_indent + random.randint(0, wiggle) indent_y = wiggle_indent + random.randint(0, wiggle) indent_z = wiggle_indent + random.randint(0, wiggle) indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) #截取到crop_size大小的cube cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2( cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) #数据增强 if train_set: if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image) if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image) if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1] if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :] #3D卷积的正规化 32*32*32 img3d = prepare_image_for_net3D(cube_image) #添加数据 img_list.append(img3d) subtlety_list.append(subtlety_label) lobulation_list.append(lobulation_label) internal_structure_list.append(internal_structure_label) calcification_list.append(calcification_label) texture_list.append(texture_label) spiculation_list.append(spiculation_label) margin_list.append(margin_label) sphericiy_list.append(sphericiy_label) malignacy_list.append(malignacy_label) diameter_list.append(diameter_label) batch_idx += 1 if batch_idx >= batch_size: x = numpy.vstack(img_list) y_diamter = numpy.vstack(diameter_list) y_malignacy = numpy.vstack(malignacy_list) y_sphericiy = numpy.vstack(sphericiy_list) y_margin = numpy.vstack(margin_list) y_spiculation = numpy.vstack(spiculation_list) y_texture = numpy.vstack(texture_list) y_calcification = numpy.vstack(calcification_list) y_internal_structure = numpy.vstack(internal_structure_list) y_lobulation = numpy.vstack(lobulation_list) y_subtlety = numpy.vstack(subtlety_list) yield x, { "out_diamter": y_diamter, "out_malignancy": y_malignacy, "out_sphericiy": y_sphericiy, "out_margin": y_margin, "out_spiculation": y_spiculation, "out_texture": y_texture, "out_calcification": y_calcification, "out_internal_structure": y_internal_structure, "out_lobulation": y_lobulation, "out_subtlety": y_subtlety } img_list = [] subtlety_list = [] lobulation_list = [] internal_structure_list = [] calcification_list = [] texture_list = [] spiculation_list = [] margin_list = [] sphericiy_list = [] malignacy_list = [] diameter_list = [] batch_idx = 0
def data_generator(test_files, data_source): img_list = [] # while True: CROP_SIZE = CUBE_SIZE for test_idx, test_item in enumerate(test_files): file_name = ntpath.basename(test_item) parts = file_name.split('_') # logger.info("data_generator:file_name {0}".format(file_name)) # if parts[0] == "ndsb3manual" or parts[0] == "hostpitalmanual": # patient_id = parts[1] # else: # patient_id = parts[0] if data_source == "testdata_neg" and parts[0] != "ndsb3manual": # 除了ndsb3manual 其他neg都是6*8 """6*8 情形""" # logger.info("situation 6*8") cube_image = helpers.load_cube_img(test_item, 6, 8, 48) # logger.info("cube image: {0}".format(cube_image)) wiggle = 48 - CROP_SIZE - 1 indent_x = 0 indent_y = 0 indent_z = 0 if wiggle > 0: indent_x = random.randint(0, wiggle) indent_y = random.randint(0, wiggle) indent_z = random.randint(0, wiggle) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] # logger.info("cube_image with indent_x(random.randint(0,wiggle)): {0}".format(cube_image)) if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) else: # pos的都是8*8的 ndsb3manual的neg也是8*8的 """8*8 情形""" # logger.info("situation 8*8") cube_image = helpers.load_cube_img(test_item, 8, 8, 64) # logger.info("cube image: {0}".format(cube_image)) current_cube_size = cube_image.shape[0] wiggle_indent = 0 wiggle = current_cube_size - CROP_SIZE - 1 if wiggle > (CROP_SIZE / 2): wiggle_indent = CROP_SIZE / 4 wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1 indent_x = wiggle_indent + random.randint(0, wiggle) indent_y = wiggle_indent + random.randint(0, wiggle) indent_z = wiggle_indent + random.randint(0, wiggle) indent_x = int(indent_x) indent_y = int(indent_y) indent_z = int(indent_z) cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE] # logger.info("cube_image with indent_x(random.randint(0,wiggle)):{0}".format(cube_image)) if CROP_SIZE != CUBE_SIZE: cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE) img3d = prepare_image_for_net3D(cube_image) img_list.append((img3d, file_name)) # logger.info("img_list:{0}".format(img_list)) # batch_idx += 1 # if batch_idx >= batch_size: # x = numpy.vstack(img_list) # yield x # img_list = [] # batch_idx = 0 return img_list