def gen_slide_names(path): files = FilesScanner(path, ['.bmp']).get_files() # # 1-p0.6042_BD1607254-子宫内膜C_2018-10-09 16_42_03_x23043_y40485_w162_h218_2x.jpg pattern00 = re.compile( r'1-p\d\.\d+_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_\dx)?.bmp') # 2018-03-22-11_26_58_x15789_y31806_w63_h61_s385.jpg pattern01 = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_s\d+)?.bmp') names = [] for item in files: # 细胞图文件名 basename = os.path.basename(item).replace(' ', '-') items = re.findall(pattern00, basename) if not items: items = re.findall(pattern01, basename) if items: parent_name, x, y, w, h, _ = items[0] if parent_name not in names: names.append(parent_name) else: raise Exception("%s IS NOT ACCEPTED!" % basename) exit() return names
def find_abnormal(): # 1-p0.1718_TC18050036_x34939_y52118_w107_h105_2x.jpg # 1-p0.5982_TC18053765_x46070_y20472_w26_h28_.jpg pattern = re.compile( r'1-p0.\d{4}_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_?(\dx)?.jpg') src_path = "/home/cnn/Development/DATA/CELL_CLASSIFIED_JOB_20181022/CELLS/TIFFS_CHECKED" dst_path = "/home/cnn/Development/DATA/CELL_CLASSIFIED_JOB_20181022/CELLS/ABNORMAL_IMAGE_COLLECTIONS" images = FilesScanner(src_path, ['.jpg']).get_files() for path in images: basename = os.path.basename(path) items = re.findall(pattern, basename) if items: # print(items) pass else: print(basename) big_name, x, y, w, h, _ = items[0] if int(w) > 500 or int(h) > 500: shutil.copyfile( path, os.path.join(dst_path, "%s_%s_%s_%s_%s.jpg" % (big_name, x, y, w, h)))
def collect_tiff_ctype_collection(path): dict_ = {} images = FilesScanner(path).get_files() for image in images: basename = os.path.basename(image) tiff_name, x, y, w, h, s = re.findall(PATTERN, basename)[0] ctype = os.path.basename(os.path.dirname(image)) if tiff_name in dict_: lst = dict_[tiff_name] else: lst = [] if ctype in lst: pass else: lst.append(ctype) dict_[tiff_name] = lst with open('tiff_children_distribution.txt', 'w') as o: for key, lst in dict_.items(): o.write("%s\t%s\n" % (key, "\t".join(lst)))
def do_similar_remove(path): files = FilesScanner(path).get_files() pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_s(\d+).jpg') dict_ = {} total = len(files) for index, file in enumerate(files): print("%s / %s ..." % (index + 1, total)) basename = os.path.basename(file) cell_type = os.path.basename(os.path.dirname(file)) items = re.findall(pattern, basename)[0] big_name, x, y, w, h, _ = items x, y, w, h = int(x), int(y), int(w), int(h) if big_name in dict_: lst = dict_[big_name] for item in lst: x_, y_, w_, h = item[:-1] if cal_IOU((x, y, w, h), (x_, y_, w_, h)) > 0.6: save_path = os.path.join(REPEAT_FILE_SAVE_PATH, cell_type) if not os.path.exists(save_path): os.makedirs(save_path) shutil.move(file, save_path) # shutil.copy(item[-1], save_path) # shutil.copy(file, save_path) else: dict_[big_name].append((x, y, w, h, file)) else: dict_[big_name] = [(x, y, w, h, file)] return dict_
def tiff_readable_check(path): """ 病理图像可读性验证 :param path: 原图路径 :return: """ files = FilesScanner(path, ['.tif', 'kfb']).get_files() filename_lst = [] filepath_lst = [] for file in files: basename = os.path.basename(file) if basename in filename_lst: raise Exception("%s\n%s" % (file, filepath_lst[filename_lst.index(basename)])) else: filename_lst.append(basename) filepath_lst.append(file) for file in files: try: try: slide = openslide.OpenSlide(file) except: slide = TSlide(file) except Exception as e: raise Exception("%s %s" % (file, str(e)))
def run(self): print("Initial DARKNET and XCEPTION model ...") total = len(self.tiff_lst) for index, tiff in enumerate(self.tiff_lst): # 获取大图文件名,不带后缀 tiff_basename, _ = os.path.splitext(os.path.basename(tiff)) tiff_basename = tiff_basename.replace(" ", "-") print('Process %s / %s %s ...' % (index + 1, total, tiff_basename)) # 切片文件存储路径 slice_save_path = os.path.join(self.slice_dir_path, tiff_basename) t0 = datetime.datetime.now() # 如果路径下切图文件不存在,执行切图 if not os.path.exists(slice_save_path): # 执行切图 ImageSlice(tiff, self.slice_dir_path).get_slices() # 获取切图文件路径 tif_images = FilesScanner(slice_save_path, ['.jpg']).get_files() t1 = datetime.datetime.now() print('TIFF SLICE COST: %s' % (t1 - t0)) seg_results = DarknetPredict().predict(tif_images) t2 = datetime.datetime.now() print('TIFF DARKNET COST: %s' % (t2 - t1)) # save segment result into csv xcep_pre = XceptionPreprocess(tiff) seg_csv = os.path.join(self.meta_files_path, tiff_basename + "_seg.csv") xcep_pre.write_csv(seg_results, seg_csv) # generate numpy array, it is the input of second stage classification algorithm cell_numpy, cell_index = xcep_pre.gen_np_array_csv(seg_csv=seg_csv) # run classification predictions = XceptionPredict().predict(np.asarray(cell_numpy)) t3 = datetime.datetime.now() print('XCEPTION COST: %s' % (t3 - t2)) # summarize two stages' result and generate a final csv clas = XceptionPostprocess() clas_dict = clas.convert_all(predictions=predictions, cell_index=cell_index) clas_csv = os.path.join(self.meta_files_path, tiff_basename + "_clas.csv") clas.write_csv(clas_dict, clas_csv) ############################### 获取审核图像 ###################################################### # GET VIEW CELL IMAGES clas.cut_cells_p_marked(tiff, clas_dict, self.cells_path, factor=0.2, N=2) t4 = datetime.datetime.now() print("TIFF %s TOTAL COST %s ..." % (tiff_basename, t4 - t0))
def generate_xml_path_dict(xml_path_lst): """ 生成 xml 文件 <名称: 路径> dict :param xml_path_lst: xml 文件路径 :return: dict """ files = FilesScanner(xml_path_lst, ['.xml']).get_files() dict_ = {} for file in files: basename = os.path.basename(file).replace(".xml", "") dict_[basename] = file return dict_
def collect(path): lst = [] images = FilesScanner(path).get_files() for image in images: basename = os.path.basename(image) ctype = os.path.basename(os.path.dirname(image)) if ctype in SELECTED: tiff_name, x, y, w, h, s = re.findall(PATTERN, basename)[0] if tiff_name not in lst: lst.append(tiff_name) with open("work_tiff_list_20181102_SELECTED.txt", 'w') as o: o.write("%s" % ("\n".join(lst)))
def generate_csv_path_dict(csv_files_path): """ 模型生成的 csv 文件<名称:路径> dict :param csv_files_path: csv 文件路径 :return: dict """ files = FilesScanner(csv_files_path, ['.csv']).get_files() dict_ = {} for file in files: basename = os.path.basename(file) if basename.endswith('_clas.csv'): basename = basename.replace("_clas.csv", "") dict_[basename] = file return dict_
def get_abnormal_tiff_list(): src_path = '/home/cnn/Development/DATA/CELL_CLASSIFIED_JOB_20181022/CELLS/ABNORMAL_IMAGE_COLLECTIONS' files = FilesScanner(src_path, ['.jpg']).get_files() lst = [] for item in files: basename = os.path.basename(item) key = basename.split('_')[0] if key in lst: pass else: lst.append(key) with open("4x_tiff_lst.txt", 'w') as o: o.write("\n".join(lst))
def collect(image_root_path, collect_files_save_path): images = FilesScanner(path, ['.jpg']).get_files() for image in images: basename = os.path.basename(image) cell_type = os.path.basename(os.path.dirname(image)) if '_' in cell_type: cell_type = cell_type.split('_')[0] if '-' in cell_type: cell_type = cell_type.split('-')[0] if cell_type in PATHOLOGY_TYPE_CLASSES: pass else: raise Exception("%s NOT CLASSIFIED" % image)
def find_size_over_608(path): images = FilesScanner(path, ['.jpg']).get_files() total = len(images) for index, image in enumerate(images): basename = os.path.basename(image) ctype = os.path.basename(os.path.dirname(image)) print("%s / %s %s" % (index + 1, total, basename)) img = Image.open(image) w, h = img.size if w > 608 or h > 608: save_path = os.path.join(OUT_PUT_PATH, ctype) if not os.path.exists(save_path): os.makedirs(save_path) shutil.copy(image, save_path)
def random_cells_cut_progress( in_dir, out_path, start, end, num, size, ): """ 多进程切图方法 :param in_dir: 输入 TIFF 文件路径 :param out_path: 输出 PATCH 存放路径 :param start: 切图范围起点 :param end: 切图范围终点 :param num: 单文件所需切图数量 :param size: 切图文件大小 :return: """ kfbs = FilesScanner(in_dir, ['.kfb']).get_files() # 设置进程池 executor = ProcessPoolExecutor(max_workers=20) tasks = [] for index, path in enumerate(kfbs): tasks.append( executor.submit(worker, path, out_path, (start, end), num, (size, size))) job_count = len(tasks) # 失败任务统计 fail_task_collection = [] for future in as_completed(tasks): status, _ = future.result() if status == 1: fail_task_collection.append(_) job_count -= 1 print("LAST JOB NUM %s" % job_count) print('\n'.join(fail_task_collection))
def do_repeat_remove(path): files = FilesScanner(path).get_files() pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_s(\d+).jpg') dict_ = {} for file in files: basename = os.path.basename(file) cell_type = os.path.basename(os.path.dirname(file)) items = re.findall(pattern, basename)[0] key = "_".join(items[:-1]) if key not in dict_: dict_[key] = items[1:-1] else: save_path = os.path.join(REPEAT_FILE_SAVE_PATH, cell_type) if not os.path.exists(save_path): os.makedirs(save_path) # shutil.move(file, save_path) shutil.copy(dict_[key], save_path) return dict_
def restore_tiff_children_lst(path): """ 返回训练数据 路径及细胞类别字典 :param path: :return: """ images = FilesScanner(path, ['.jpg']).get_files() print("TRAIN_DATA IMAGE COUNT: %s" % len(images)) # TC18053113_x54903_y33619_w465_h522_s95.jpg pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_s(\d+).jpg') dict_ = {} for image in images: basename = os.path.basename(image) ctype = os.path.basename(os.path.dirname(image)) tiff_name, x, y, w, h, s = re.findall(pattern, basename)[0] if tiff_name in dict_: dict_[tiff_name].append((ctype, image)) else: dict_[tiff_name] = [(ctype, image)] return dict_
dst = "/home/cnn/Development/DATA/RECHECK_DATA_IN_20181026" with open("names_lst.txt") as f: lines = f.readlines() already_exist_images = [ "_".join(line.replace("\n", '').split("_")[:-1]) for line in lines ] print(already_exist_images[:100]) pattern = re.compile( r'1-p0.\d{4}_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_?(\dx)?.jpg') for item in lst: images = FilesScanner(item, ['.jpg']).get_files() for name in images: basename = os.path.basename(name) big_name, x, y, w, h, _ = re.findall(pattern, basename)[0] basename = "%s_x%s_y%s_w%s_h%s" % (big_name, x, y, w, h) cell_type = os.path.basename(os.path.dirname(name)) if basename not in already_exist_images: print("WA %s is NEW " % basename) save_path = os.path.join(dst, 'NO_CHECK', big_name, cell_type) else: print("EEEE %s is EXIST!" % basename) save_path = os.path.join(dst, 'CHECKED', cell_type) if not os.path.exists(save_path):
def run(self): print("Initial DARKNET and XCEPTION model ...") total = len(self.tiff_lst) for index, tiff in enumerate(self.tiff_lst): # 获取大图文件名,不带后缀 tiff_basename, _ = os.path.splitext(os.path.basename(tiff)) tiff_basename = tiff_basename.replace(" ", "-") print('Process %s / %s %s ...' % (index + 1, total, tiff_basename)) # 切片文件存储路径 slice_save_path = os.path.join(self.slice_dir_path, tiff_basename) t0 = datetime.datetime.now() # 如果路径下切图文件不存在,执行切图 if not os.path.exists(slice_save_path): # 执行切图 ImageSlice(tiff, self.slice_dir_path).get_slices() # 获取切图文件路径 tif_images = FilesScanner(slice_save_path, ['.jpg']).get_files() t1 = datetime.datetime.now() print('TIFF SLICE COST: %s' % (t1 - t0)) tasks = [] # 创建切图进程池 executor = ProcessPoolExecutor(max_workers=GPU_NUM) if len(tif_images) < cfg.darknet.min_job_length: tasks.append(executor.submit(yolo_predict, '0', tif_images)) else: # 任务切分 n = int((len(tif_images) / float(GPU_NUM)) + 0.5) patches = [tif_images[i: i + n] for i in range(0, len(tif_images), n)] for gpu_index, patch in enumerate(patches): tasks.append(executor.submit(yolo_predict, str(gpu_index), patch)) seg_results = {} for future in as_completed(tasks): result = future.result() seg_results.update(result) # 关闭进程池 executor.shutdown(wait=True) try: slide = openslide.OpenSlide(tiff) except: slide = TSlide(tiff) keys = list(seg_results.keys()) for key in keys: lst = seg_results[key] x0, y0 = key.split('_') x0, y0 = int(x0), int(y0) for item in lst: label, accuracy, (x, y, w, h) = item accuracy, x, y, w, h = float(accuracy), int(x), int(y), int(w), int(h) x, y = x0 + x, y0 + y save_path = os.path.join(self.cells_path, tiff_basename, label) if not os.path.exists(save_path): os.makedirs(save_path) image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}.jpg".format(1 - accuracy, tiff_basename, x, y, w, h) slide.read_region((x, y), 0, (w, h)).convert("RGB").save(os.path.join(save_path, image_name))
def get_cell_image(path, ctype, parent_pathes): """ 获取细胞文件路径 :param path: 细胞图像路径 :param ctype: 标注类别 MANUAL or AUTO :param parent_pathes: 大图名称及对应路径字典 :return: """ # 检查本地有无细胞图像文件路径信息文件 # 如果存在,则直接读取 # 如果没有,通过 FileScanner 工具类获取并写入本地文件 # image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt' # check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path) # if os.path.exists(check_name): # with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)) as f: # files = [item.replace('\n', '') for item in f.readlines()] # else: files = FilesScanner(path, ['.bmp', '.jpg']).get_files() # with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path), 'w') as o: # o.writelines([item + '\n' for item in files]) # 根据细胞图像文件名生成细胞坐标信息 cells_dict = {} # # 1-p0.6042_BD1607254-子宫内膜C_2018-10-09 16_42_03_x23043_y40485_w162_h218_2x.jpg pattern00 = re.compile( r'1-p\d\.\d+_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_\dx)?.(bmp|jpg)') # 2018-03-22-11_26_58_x15789_y31806_w63_h61_s385.jpg pattern01 = re.compile( r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_s\d+)?.(bmp|jpg)') for item in files: if item.endswith('.bmp'): # 细胞图文件名 basename = os.path.basename(item).replace(' ', '-') parent = os.path.dirname(item) # 细胞所属类别 clas_type = os.path.basename(parent) if "_NEW" in clas_type or "_2" in clas_type or "_1" in clas_type: clas_type = clas_type.split("_")[0] parent = os.path.dirname(parent) items = re.findall(pattern00, basename) if not items: items = re.findall(pattern01, basename) if items: parent_name, x, y, w, h, _ = items[0] else: raise Exception("%s IS NOT ACCEPTED!" % basename) exit() # parent_name = os.path.basename(parent).replace(' ', '-') parent = os.path.dirname(parent) # 大图所属类别 parent_type = os.path.basename(parent) # 大图原始路径 try: parent_path = parent_pathes[parent_name] except Exception as e: print("%s NOT FOUND" % parent_name) print("CANNOT FIND RELATIVE TIFF PATH INFO, %s\n%s" % (str(e), item)) exit() # 解析坐标信息 point = get_location_from_filename(basename) assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename _, x, y, w, h, _ = point x, y, w, h = int(x), int(y), int(w), int(h) # 修正 AGC 细胞类别 if clas_type in AGC_CLASSES: clas_type = 'AGC' if parent_type in AGC_CLASSES: parent_type = 'AGC' # if parent_type not in PATHOLOGY_TYPE_CLASSES: # raise Exception(item + " PARENT_TYPE NOT FOUND") # 细胞位置及类别信息 info = { 'name': basename, 'cell_type': clas_type, 'cell_path': item, 'parent': parent_name, 'parent_full_name': os.path.basename(parent_path), 'parent_type': parent_type, 'x': x, 'y': y, 'w': w, 'h': h, } if parent_name in cells_dict: cells_dict[parent_name].append(info) else: cells_dict[parent_name] = [info] # 将解析细胞数据按归属大图名称写入文件 for key, lines in cells_dict.items(): # 生成输出路径 save_path = os.path.join(METADATA_FILE_PATH, ctype + '_IMAGES_PATH_DICT') os.makedirs(save_path, exist_ok=True) with open(os.path.join(save_path, key + '.txt'), 'w') as f: for line in lines: f.write(json.dumps(line) + '\n') return cells_dict
import re from concurrent.futures import ProcessPoolExecutor, as_completed from constants import SELECTED_CELL_XML_SAVE_PATH, MAX_CPU_WORKERS from utils import FilesScanner, generate_selected_level_xml if not os.path.exists(SELECTED_CELL_XML_SAVE_PATH): os.makedirs(SELECTED_CELL_XML_SAVE_PATH, exist_ok=True) if __name__ == '__main__': # 读取指定位置的算法人员筛选后的细胞文件路径 # cell_images_path = CELL_IMAGES_SAVE_PATH cell_images_path = '/home/cnn/Development/DATA/BATCH_4_TRAIN_DATA/CELLS/' print("SCANNING PATH %s..." % cell_images_path) cell_images_lst = FilesScanner(cell_images_path, ['.jpg']).get_files() print("CELLS COUNT: %s" % len(cell_images_lst)) # 2018-03-22-11_26_58_x15789_y31806_w63_h61_s385.jpg pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_s\d+)?.jpg') print("COLLECT POINT INFO FROM JPG FILES...") tiff_cell_dict = {} for path in cell_images_lst: cell_type = os.path.basename(os.path.dirname(path)) jpg = os.path.basename(path) point = re.findall(pattern, jpg) if not point: print(path) try: tiff_name, x, y, w, h, _ = point[0]
def get_cell_image(path, ctype, parent_pathes): """ 获取细胞文件路径 :param path: 细胞图像路径 :param ctype: 标注类别 MANUAL or AUTO :param parent_pathes: 大图名称及对应路径字典 :return: """ # 检查本地有无细胞图像文件路径信息文件 # 如果存在,则直接读取 # 如果没有,通过 FileScanner 工具类获取并写入本地文件 image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt' check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path) if os.path.exists(check_name): with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)) as f: files = [item.replace('\n', '') for item in f.readlines()] else: files = FilesScanner(path, ['.jpg']).get_files() with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path), 'w') as o: o.writelines([item + '\n' for item in files]) # 根据细胞图像文件名生成细胞坐标信息 cells_dict = {} for item in files: if item.endswith('.jpg'): # 细胞图文件名 basename = os.path.basename(item) parent = os.path.dirname(item) # 细胞所属类别 clas_type = os.path.basename(parent) parent = os.path.dirname(parent) # 细胞所属大图名称 parent_name = os.path.basename(parent) parent = os.path.dirname(parent) # 大图所属类别 parent_type = os.path.basename(parent) # 大图原始路径 try: parent_path = parent_pathes[parent_name] except Exception as e: print("CANNOT FIND RELATIVE TIFF PATH INFO, %s" % str(e)) exit() # 解析坐标信息 point = get_location_from_filename(basename) assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename _, x, y, w, h, _ = point # 修正 AGC 细胞类别 if clas_type in AGC_CLASSES: clas_type = 'AGC' # 解析与修正大图分类 if '_' in parent_type: parent_type = parent_type.split('_')[-1] if parent_type in AGC_CLASSES: parent_type = 'AGC' # 细胞位置及类别信息 info = { 'name': basename, 'cell_type': clas_type, 'cell_path': item, 'parent': parent_name, 'parent_full_name': os.path.basename(parent_path), 'parent_type': parent_type, 'x': x, 'y': y, 'w': w, 'h': h, } if parent_name in cells_dict: cells_dict[parent_name].append(info) else: cells_dict[parent_name] = [info] # 将解析细胞数据按归属大图名称写入文件 for key, lines in cells_dict.items(): # 生成输出路径 save_path = os.path.join(METADATA_FILE_PATH, ctype + '_IMAGES_PATH_DICT') os.makedirs(save_path, exist_ok=True) with open(os.path.join(save_path, key + '.txt'), 'w') as f: for line in lines: f.write(json.dumps(line) + '\n') return cells_dict
def generate_labelme_format_xml(csv_files_path, patch_dict, xml_save_path): """ 将 csv 文件内容写入 xml :param csv_files_path: 读取的 csv 存放目录 :param xml_save_path: 输出的 xml 存放路径 :return: """ files = FilesScanner(csv_files_path, postfix=['.csv']).get_files() clas_files = [item for item in files if item.endswith('_clas.csv')] # 待处理 csv 文件总数 total = len(clas_files) for index, file in enumerate(clas_files): print("Processing %s / %s %s" % (index + 1, total, os.path.basename(file))) with open(file) as f: lines = csv.reader(f) dict_ = {} next(lines, None) for line in lines: key = line[0] box = { 'name': line[3], 'xmin': 0 if float(line[5]) < 0 else int(float(line[5]) + 0.5), 'ymin': 0 if float(line[6]) < 0 else int(float(line[6]) + 0.5), 'xmax': 0 if float(line[7]) < 0 else int(float(line[7]) + 0.5), 'ymax': 0 if float(line[8]) < 0 else int(float(line[8]) + 0.5), } if key not in dict_: dict_[key] = [box] else: dict_[key].append(box) for key, lst in dict_.items(): if key in patch_dict: patch = patch_dict[key] label = patch['label'] image_path = patch['path'] save_path = os.path.join(xml_save_path, label) if not os.path.exists(save_path): os.makedirs(save_path) # remove duplicated cells lst_ = [] for item in lst: x, y, w, h = item['xmin'], item['ymin'], item['xmax'] - item['xmin'], item['ymax'] - item['ymin'] for item_ in lst_: x_, y_, w_, h_ = item_['xmin'], item_['ymin'], item_['xmax'] - item_['xmin'], item_['ymax'] - item_['ymin'] if cal_IOU((x, y, w, h), (x_, y_, w_, h_)) > 0.8: break else: lst_.append(item) write_to_labelme_xml(lst_, os.path.join(save_path, key + '.xml')) shutil.copy(image_path, save_path) else: raise Exception("%s NOT FOUND IN DICT" % file)
def run(self): print("Initial DARKNET and XCEPTION model ...") total = len(self.tiff_lst) for index, tiff in enumerate(self.tiff_lst): # 获取大图文件名,不带后缀 tiff_basename, _ = os.path.splitext(os.path.basename(tiff)) tiff_basename = tiff_basename.replace(" ", "-") print('Process %s / %s %s ...' % (index + 1, total, tiff_basename)) # 检测是否已经切图并识别完成 # 检测细胞文件夹是否已经存在,若存在直接跳过 check_cell_path = os.path.join(self.cells_path, tiff_basename) if os.path.exists(check_cell_path): children = os.listdir(check_cell_path) if len(children) > 0: print("%s HAS BEEN PROCESSED!" % tiff_basename) continue # 切片文件存储路径 slice_save_path = os.path.join(self.slice_dir_path, tiff_basename) t0 = datetime.datetime.now() # 如果路径下切图文件不存在,执行切图 if not os.path.exists(slice_save_path): # 执行切图 ImageSlice(tiff, self.slice_dir_path).get_slices() # 获取切图文件路径 tif_images = FilesScanner(slice_save_path, ['.jpg']).get_files() t1 = datetime.datetime.now() print('TIFF SLICE COST: %s' % (t1 - t0)) # CHECK IF ALREADY PROCESSED seg_csv = os.path.join(self.meta_files_path, tiff_basename + "_seg.csv") # 将细胞分割结果写入文件 xcep_pre = XceptionPreprocess(tiff) if not os.path.exists(seg_csv): #################################### YOLO 处理 ##################################################### tasks = [] # 创建切图进程池 executor = ProcessPoolExecutor(max_workers=GPU_NUM) if len(tif_images) < cfg.darknet.min_job_length: tasks.append(executor.submit(yolo_predict, '0', tif_images)) else: # 任务切分 n = int((len(tif_images) / float(GPU_NUM)) + 0.5) patches = [tif_images[i: i + n] for i in range(0, len(tif_images), n)] for gpu_index, patch in enumerate(patches): tasks.append(executor.submit(yolo_predict, str(gpu_index), patch)) seg_results = {} for future in as_completed(tasks): result = future.result() seg_results.update(result) # 关闭进程池 executor.shutdown(wait=True) # WRITE DATA TO CSV xcep_pre.write_csv(seg_results, seg_csv) t2 = datetime.datetime.now() print("DARKNET COST %s" % (t2 - t1)) # XCEPTION preprocess cell_lst, cell_index = xcep_pre.gen_np_array_csv(seg_csv=seg_csv) ##################################### XCEPTION 处理 ################################################# tasks = [] # 创建切图进程池 executor = ProcessPoolExecutor(max_workers=GPU_NUM) if len(cell_lst) < cfg.xception.min_job_length: tasks.append(executor.submit(xception_predict, '0', np.asarray(cell_lst))) else: # 任务切分 n = int((len(cell_lst) / float(GPU_NUM)) + 0.5) cell_patches = [cell_lst[i: i + n] for i in range(0, len(cell_lst), n)] for gpu_index, patch in enumerate(cell_patches): tasks.append(executor.submit(xception_predict, str(gpu_index), np.asarray(patch))) predictions_ = {} for future in as_completed(tasks): index, result = future.result() predictions_[index] = result predictions = [] for i in range(len(predictions_)): predictions.extend(predictions_[str(i)]) # 关闭进程池 executor.shutdown(wait=True) t3 = datetime.datetime.now() print("XCEPTION COST %s" % (t3 - t2)) clas = XceptionPostprocess() clas_dict = clas.convert_all(predictions=predictions, cell_index=cell_index) clas_csv = os.path.join(self.meta_files_path, tiff_basename + '_clas.csv') clas.write_csv(clas_dict, clas_csv) ############################### 生成审核图像 ###################################################### # GET VIEW CELL IMAGES clas.cut_cells_p_marked(tiff, clas_dict, self.cells_path, factor=0.2, N=1) t4 = datetime.datetime.now() print("GET VIEW IMAGES COST %s" % (t4 - t3)) print("TIFF %s TOTAL COST %s ..." % (tiff_basename, t4 - t0))
# 中间文件存放目录 meta_files_path = os.path.join(resource_save_path, 'test', 'META') # 识别出的细胞存储路径 cells_save_path = os.path.join(resource_save_path, 'test', 'CELLS') else: # 切图文件存储路径 slice_dir_path = os.path.join(resource_save_path, 'SLICE') # 中间文件存放目录 meta_files_path = os.path.join(resource_save_path, 'META') # 识别出的细胞存储路径 cells_save_path = os.path.join(resource_save_path, 'CELLS') tiff_lst = FilesScanner(tiff_dir_path, ['.kfb', '.tif']).get_files() # 执行 TIFF 文件完整性校验 for tiff in tiff_lst: try: try: slide = openslide.OpenSlide(tiff) except: slide = TSlide(tiff) except Exception as e: raise Exception("%s %s" % (tiff, str(e))) for item in [slice_dir_path, meta_files_path, cells_save_path]: if not os.path.exists(item): os.makedirs(item)
# coding: utf-8 import os import openslide from tslide import TSlide from utils import FilesScanner tiff_resource_path = '' tiffs = FilesScanner(tiff_resource_path, ['.kfb', '.tif']).get_files() files = {} for item in tiffs: basename = os.path.basename(item) if item in files: files[item].append(item) else: files[item] = [item] for key, lst in files.items(): if len(lst) > 1: print(lst) # for tiff in tiffs: # try: # try: # slide = openslide.OpenSlide(tiff) # except: # slide = TSlide(tiff) # except: # print("TIFF OPEN FAILED => \n%s" % tiff)
def collect_cells_by_accuracy(path, accuracy, output): cell_images = FilesScanner(path, ['.jpg'])
patch.save(os.path.join(save_path, image_name)) #patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR) #cv2.imwrite(os.path.join(save_path, image_name), patch, [int(cv2.IMWRITE_JPEG_QUALITY), 95]) except Exception as e: print(e) print(x_, y_, w_, h_) print(slide.dimensions) continue return None if __name__ == '__main__': # xmls_path = TRAIN_DATA_SAVE_PATH # 获取 xml 文件路径列表 xmls = FilesScanner(CHECKED_CELL_XML_SAVE_PATH, ['.xml']).get_files() # xmls = FilesScanner(SELECTED_CELL_XML_SAVE_PATH, ['.xml']).get_files() size = len(xmls) executor = ProcessPoolExecutor(max_workers=10) tasks = [] tif_path = '/home/cnn/Development/DATA/TRAIN_DATA/TIFFS' os.makedirs(METADATA_FILE_PATH, exist_ok=True) tif_images_collections_path = os.path.join(METADATA_FILE_PATH, 'TIFF_IMAGES_PATH_DICT.txt') tiff_dict = generate_name_path_dict(tif_path, ['.tif', '.kfb'], tif_images_collections_path) # tiff_dict = generate_name_path_dict('', ['.tif', '.kfb'])