def get_coordinate(point_path_lst): lst = [] for item in point_path_lst: basename = os.path.basename(item['path']) point = get_location_from_filename(basename) if not point: print(basename) exit() _, x, y, w, h, _ = point lst.append((x, y, w, h)) return lst
def get_cell_image(path, ctype, parent_pathes): """ 获取细胞文件路径 :param path: 细胞图像路径 :param ctype: 标注类别 MANUAL or AUTO :param parent_pathes: 大图名称及对应路径字典 :return: """ # 检查本地有无细胞图像文件路径信息文件 # 如果存在,则直接读取 # 如果没有,通过 FileScanner 工具类获取并写入本地文件 # image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt' # check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path) # if os.path.exists(check_name): # with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)) as f: # files = [item.replace('\n', '') for item in f.readlines()] # else: files = FilesScanner(path, ['.bmp', '.jpg']).get_files() # with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path), 'w') as o: # o.writelines([item + '\n' for item in files]) # 根据细胞图像文件名生成细胞坐标信息 cells_dict = {} # # 1-p0.6042_BD1607254-子宫内膜C_2018-10-09 16_42_03_x23043_y40485_w162_h218_2x.jpg pattern00 = re.compile( r'1-p\d\.\d+_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_\dx)?.(bmp|jpg)') # 2018-03-22-11_26_58_x15789_y31806_w63_h61_s385.jpg pattern01 = re.compile( r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_s\d+)?.(bmp|jpg)') for item in files: if item.endswith('.bmp'): # 细胞图文件名 basename = os.path.basename(item).replace(' ', '-') parent = os.path.dirname(item) # 细胞所属类别 clas_type = os.path.basename(parent) if "_NEW" in clas_type or "_2" in clas_type or "_1" in clas_type: clas_type = clas_type.split("_")[0] parent = os.path.dirname(parent) items = re.findall(pattern00, basename) if not items: items = re.findall(pattern01, basename) if items: parent_name, x, y, w, h, _ = items[0] else: raise Exception("%s IS NOT ACCEPTED!" % basename) exit() # parent_name = os.path.basename(parent).replace(' ', '-') parent = os.path.dirname(parent) # 大图所属类别 parent_type = os.path.basename(parent) # 大图原始路径 try: parent_path = parent_pathes[parent_name] except Exception as e: print("%s NOT FOUND" % parent_name) print("CANNOT FIND RELATIVE TIFF PATH INFO, %s\n%s" % (str(e), item)) exit() # 解析坐标信息 point = get_location_from_filename(basename) assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename _, x, y, w, h, _ = point x, y, w, h = int(x), int(y), int(w), int(h) # 修正 AGC 细胞类别 if clas_type in AGC_CLASSES: clas_type = 'AGC' if parent_type in AGC_CLASSES: parent_type = 'AGC' # if parent_type not in PATHOLOGY_TYPE_CLASSES: # raise Exception(item + " PARENT_TYPE NOT FOUND") # 细胞位置及类别信息 info = { 'name': basename, 'cell_type': clas_type, 'cell_path': item, 'parent': parent_name, 'parent_full_name': os.path.basename(parent_path), 'parent_type': parent_type, 'x': x, 'y': y, 'w': w, 'h': h, } if parent_name in cells_dict: cells_dict[parent_name].append(info) else: cells_dict[parent_name] = [info] # 将解析细胞数据按归属大图名称写入文件 for key, lines in cells_dict.items(): # 生成输出路径 save_path = os.path.join(METADATA_FILE_PATH, ctype + '_IMAGES_PATH_DICT') os.makedirs(save_path, exist_ok=True) with open(os.path.join(save_path, key + '.txt'), 'w') as f: for line in lines: f.write(json.dumps(line) + '\n') return cells_dict
for item in manual_point_lst: path = item['path'] cell_type = item['type'] cell_save_path = os.path.join(merge_dir_path, parent_type, key, cell_type) if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) shutil.copy(path, cell_save_path) # 检测算法识别细胞的坐标位置,进行重复性判断 manual_point_coordinate_lst = get_coordinate(manual_point_lst) for point in auto_point_lst: basename = os.path.basename(point['path']) _, x, y, w, h, _ = get_location_from_filename(basename) # 与审核图像存在重复 for item in manual_point_coordinate_lst: if cal_IOU((x, y, w, h), item) > 0.8: break else: path = point['path'] cell_type = point['type'] cell_save_path = os.path.join(merge_dir_path, parent_type, key, cell_type + '_NEW') if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) # 该图像不存在对应审核图像,直接拷贝图像至目标文件夹 shutil.copy(path, cell_save_path)
def get_cell_image(path, ctype, parent_pathes): """ 获取细胞文件路径 :param path: 细胞图像路径 :param ctype: 标注类别 MANUAL or AUTO :param parent_pathes: 大图名称及对应路径字典 :return: """ # 检查本地有无细胞图像文件路径信息文件 # 如果存在,则直接读取 # 如果没有,通过 FileScanner 工具类获取并写入本地文件 image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt' check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path) if os.path.exists(check_name): with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)) as f: files = [item.replace('\n', '') for item in f.readlines()] else: files = FilesScanner(path, ['.jpg']).get_files() with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path), 'w') as o: o.writelines([item + '\n' for item in files]) # 根据细胞图像文件名生成细胞坐标信息 cells_dict = {} for item in files: if item.endswith('.jpg'): # 细胞图文件名 basename = os.path.basename(item) parent = os.path.dirname(item) # 细胞所属类别 clas_type = os.path.basename(parent) parent = os.path.dirname(parent) # 细胞所属大图名称 parent_name = os.path.basename(parent) parent = os.path.dirname(parent) # 大图所属类别 parent_type = os.path.basename(parent) # 大图原始路径 try: parent_path = parent_pathes[parent_name] except Exception as e: print("CANNOT FIND RELATIVE TIFF PATH INFO, %s" % str(e)) exit() # 解析坐标信息 point = get_location_from_filename(basename) assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename _, x, y, w, h, _ = point # 修正 AGC 细胞类别 if clas_type in AGC_CLASSES: clas_type = 'AGC' # 解析与修正大图分类 if '_' in parent_type: parent_type = parent_type.split('_')[-1] if parent_type in AGC_CLASSES: parent_type = 'AGC' # 细胞位置及类别信息 info = { 'name': basename, 'cell_type': clas_type, 'cell_path': item, 'parent': parent_name, 'parent_full_name': os.path.basename(parent_path), 'parent_type': parent_type, 'x': x, 'y': y, 'w': w, 'h': h, } if parent_name in cells_dict: cells_dict[parent_name].append(info) else: cells_dict[parent_name] = [info] # 将解析细胞数据按归属大图名称写入文件 for key, lines in cells_dict.items(): # 生成输出路径 save_path = os.path.join(METADATA_FILE_PATH, ctype + '_IMAGES_PATH_DICT') os.makedirs(save_path, exist_ok=True) with open(os.path.join(save_path, key + '.txt'), 'w') as f: for line in lines: f.write(json.dumps(line) + '\n') return cells_dict