def label_detected_nucleus(nucleus_dir, ground_truth_dir): ground_truth_csvs = [str(f) for f in Path(ground_truth_dir).glob('*/*.csv')] label_output = [] for ground_truth_csv in ground_truth_csvs: ground_truth_dir, base = os.path.split(ground_truth_csv) sub_dir = os.path.split(ground_truth_dir)[1] inference_csv = os.path.join(nucleus_dir, "{}-{}".format(sub_dir, base)) label_csv = os.path.join(nucleus_dir, "label-{}-{}".format(sub_dir, base)) ground_truth_locations = get_locations_from_csv( ground_truth_csv, hasHeader=False, hasProb=False) inference_locations = get_locations_from_csv( inference_csv, hasHeader=True, hasProb=True) label_output.clear() for (y1, x1, prob) in inference_locations: inside = False for (y2, x2) in ground_truth_locations: if is_inside(x1, y1, x2, y2, 32): inside = True label_output.append((y1, x1, prob, True)) break if not inside: label_output.append((y1, x1, prob, False)) print(len(label_output), len(inference_locations)) assert len(label_output) == len(inference_locations) tuple_2_csv(label_output, label_csv, ['Y', 'X', 'prob', 'is_mitosis'])
def check_nucleus_inference(inference_dir, ground_truth_dir): ground_truth_csvs = [ str(f) for f in Path(ground_truth_dir).glob('*/*.csv') ] matched_count = 0 total_count = 0 for ground_truth_csv in ground_truth_csvs: ground_truth_dir, base = os.path.split(ground_truth_csv) sub_dir = os.path.split(ground_truth_dir)[1] inference_csv = os.path.join(inference_dir, "{}-{}".format(sub_dir, base)) ground_truth_locations = get_locations_from_csv(ground_truth_csv, hasHeader=False, hasProb=False) inference_locations = get_locations_from_csv(inference_csv, hasHeader=True, hasProb=False) for (x1, y1) in ground_truth_locations: total_count = total_count + 1 for (x2, y2) in inference_locations: if is_inside(x2, y2, x1, y1, 32): matched_count = matched_count + 1 break print("There are {} ground truth points, found {} of them.".format( total_count, matched_count))
def combine_csvs(input_dir, output_dir, hasHeader=True, hasProb=True, clean_output_dir=False): if clean_output_dir: shutil.rmtree(output_dir) input_files = [str(f) for f in Path(input_dir).glob('**/**/*.csv')] combine_csvs = {} for input_file in input_files: points = get_locations_from_csv(input_file, hasHeader=hasHeader, hasProb=hasProb) basename, y_offset, x_offset = \ os.path.basename(input_file).split('.')[0].split("_") if not basename in combine_csvs: combine_csvs[basename] = [] y_offset = int(y_offset) x_offset = int(x_offset) for i in range(len(points)): points[i] = \ (points[i][0] + y_offset, points[i][1] + x_offset, points[i][2]) combine_csvs[basename].extend(points) os.makedirs(output_dir, exist_ok=True) for combined_csv in combine_csvs: tuple_2_csv(combine_csvs[combined_csv], os.path.join(output_dir, combined_csv) + '.csv', columns=['Y', 'X', 'prob'])
def cluster_prediction_result(pred_dir, eps, min_samples, hasHeader, isWeightedAvg=False, prob_threshold=0): """ cluster the prediction results to avoid the duplicated predictions introduced by the small stride. Args: pred_dir: directory for the prediction result eps: maximum distance between two samples for them to be considered as in the same neighborhood. min_samples: number of samples (or total weight) in a neighborhood for a point to be considered as a core point. hasHeader: boolean value to indicate if the csv file has the header isWeightedAvg: boolean value to indicate if add the prediction. probabilities as the weight to compute the averaged coordinates of each cluster. prob_threshold: probability threshold over which the location is considered a positive prediction for the purposes of clustering. """ pred_files = list_files(pred_dir, "*.csv") pred_files = get_file_id(pred_files, GROUND_TRUTH_FILE_ID_RE) clustered_dir = os.path.dirname(pred_dir + "/") + "_clustered/" if os.path.exists(clustered_dir): print(clustered_dir) shutil.rmtree(clustered_dir) for k, pred_file in pred_files.items(): print(pred_file) pred_locations = get_locations_from_csv(pred_file, hasHeader=hasHeader, hasProb=True) pred_locations = [ p for p in pred_locations if float(p[2]) > prob_threshold ] # apply dbscan clustering on each prediction file if len(pred_locations) > 0: clustered_pred_locations = dbscan_clustering( pred_locations, eps=eps, min_samples=min_samples, isWeightedAvg=isWeightedAvg) # save the prediction results clustered_file_name = pred_file.replace(pred_dir, clustered_dir) tuple_2_csv(clustered_pred_locations, clustered_file_name, columns={'row', 'col', 'avg_prob'})
def csv_2_arr(csv_file, h, w, hasHeader): """Convert a csv file with the columns (row,col,val) to a 2D array in which [row,col] = val. Args: csv_file: csv file path. h: The number of rows in the resulting array. w: The number of columns in the resulting array. hasHeader: boolean value to indicate if the input csv file have a header. Returns: A 2D NumPy array of shape (h, w) in which [row,col] = val. """ points = get_locations_from_csv(csv_file, hasHeader=hasHeader, hasProb=True) points = np.asarray(points) return ijv_2_arr(points, h, w)
def extract_patches(img_dir, location_csv_dir, output_patch_basedir, patch_size=64): location_csv_files = [str(f) for f in Path(location_csv_dir).glob('*.csv')] if len(location_csv_files) == 0: raise ValueError( "Please check the input dir for the location csv files.") for location_csv_file in location_csv_files: print("Processing {} ......".format(location_csv_file)) points = get_locations_from_csv(location_csv_file, hasHeader=True, hasProb=False) # Get the image file name. subfolder = os.path.basename(location_csv_file) \ .replace('-', '/') \ .replace('.csv', '') img_file = os.path.join(img_dir, "{}.tif".format(subfolder)) print("Processing {} ......".format(img_file)) img = cv2.imread(img_file) img = np.asarray(img)[:, :, ::-1] output_patch_dir = os.path.join(output_patch_basedir, subfolder) if not os.path.exists(output_patch_dir): os.makedirs(output_patch_dir, exist_ok=True) for (row, col) in points: patch = extract_patch(img, row, col, patch_size) save_patch(patch, path=output_patch_dir, lab=0, case=0, region=0, row=row, col=col, rotation=0, row_shift=0, col_shift=0, suffix=0, ext="png")