def get_coordinate(point_path_lst):
    lst = []
    for item in point_path_lst:
        basename = os.path.basename(item['path'])
        point = get_location_from_filename(basename)
        if not point:
            print(basename)
            exit()

        _, x, y, w, h, _ = point
        lst.append((x, y, w, h))

    return lst
def get_cell_image(path, ctype, parent_pathes):
    """
    获取细胞文件路径
    :param path: 细胞图像路径
    :param ctype: 标注类别 MANUAL or AUTO
    :param parent_pathes: 大图名称及对应路径字典
    :return:
    """

    # 检查本地有无细胞图像文件路径信息文件
    # 如果存在,则直接读取
    # 如果没有,通过 FileScanner 工具类获取并写入本地文件

    # image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt'
    # check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)

    # if os.path.exists(check_name):
    #     with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)) as f:
    #         files = [item.replace('\n', '') for item in f.readlines()]
    # else:
    files = FilesScanner(path, ['.bmp', '.jpg']).get_files()
    # with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path), 'w') as o:
    #     o.writelines([item + '\n' for item in files])

    # 根据细胞图像文件名生成细胞坐标信息
    cells_dict = {}

    # # 1-p0.6042_BD1607254-子宫内膜C_2018-10-09 16_42_03_x23043_y40485_w162_h218_2x.jpg
    pattern00 = re.compile(
        r'1-p\d\.\d+_(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_\dx)?.(bmp|jpg)')

    # 2018-03-22-11_26_58_x15789_y31806_w63_h61_s385.jpg
    pattern01 = re.compile(
        r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)(_s\d+)?.(bmp|jpg)')

    for item in files:
        if item.endswith('.bmp'):
            # 细胞图文件名
            basename = os.path.basename(item).replace(' ', '-')

            parent = os.path.dirname(item)
            # 细胞所属类别
            clas_type = os.path.basename(parent)

            if "_NEW" in clas_type or "_2" in clas_type or "_1" in clas_type:
                clas_type = clas_type.split("_")[0]

            parent = os.path.dirname(parent)

            items = re.findall(pattern00, basename)
            if not items:
                items = re.findall(pattern01, basename)

            if items:
                parent_name, x, y, w, h, _ = items[0]
            else:
                raise Exception("%s IS NOT ACCEPTED!" % basename)
                exit()

            # parent_name = os.path.basename(parent).replace(' ', '-')

            parent = os.path.dirname(parent)
            # 大图所属类别
            parent_type = os.path.basename(parent)

            # 大图原始路径
            try:
                parent_path = parent_pathes[parent_name]
            except Exception as e:
                print("%s NOT FOUND" % parent_name)
                print("CANNOT FIND RELATIVE TIFF PATH INFO, %s\n%s" %
                      (str(e), item))
                exit()

            # 解析坐标信息
            point = get_location_from_filename(basename)
            assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename

            _, x, y, w, h, _ = point
            x, y, w, h = int(x), int(y), int(w), int(h)

            # 修正 AGC 细胞类别
            if clas_type in AGC_CLASSES:
                clas_type = 'AGC'

            if parent_type in AGC_CLASSES:
                parent_type = 'AGC'

            # if parent_type not in PATHOLOGY_TYPE_CLASSES:
            # raise Exception(item + " PARENT_TYPE NOT FOUND")

            # 细胞位置及类别信息
            info = {
                'name': basename,
                'cell_type': clas_type,
                'cell_path': item,
                'parent': parent_name,
                'parent_full_name': os.path.basename(parent_path),
                'parent_type': parent_type,
                'x': x,
                'y': y,
                'w': w,
                'h': h,
            }

            if parent_name in cells_dict:
                cells_dict[parent_name].append(info)
            else:
                cells_dict[parent_name] = [info]

    # 将解析细胞数据按归属大图名称写入文件
    for key, lines in cells_dict.items():
        # 生成输出路径
        save_path = os.path.join(METADATA_FILE_PATH,
                                 ctype + '_IMAGES_PATH_DICT')
        os.makedirs(save_path, exist_ok=True)
        with open(os.path.join(save_path, key + '.txt'), 'w') as f:
            for line in lines:
                f.write(json.dumps(line) + '\n')

    return cells_dict
            for item in manual_point_lst:
                path = item['path']
                cell_type = item['type']

                cell_save_path = os.path.join(merge_dir_path, parent_type, key,
                                              cell_type)
                if not os.path.exists(cell_save_path):
                    os.makedirs(cell_save_path)

                shutil.copy(path, cell_save_path)

            # 检测算法识别细胞的坐标位置,进行重复性判断
            manual_point_coordinate_lst = get_coordinate(manual_point_lst)
            for point in auto_point_lst:
                basename = os.path.basename(point['path'])
                _, x, y, w, h, _ = get_location_from_filename(basename)

                # 与审核图像存在重复
                for item in manual_point_coordinate_lst:
                    if cal_IOU((x, y, w, h), item) > 0.8:
                        break
                else:
                    path = point['path']
                    cell_type = point['type']
                    cell_save_path = os.path.join(merge_dir_path, parent_type,
                                                  key, cell_type + '_NEW')
                    if not os.path.exists(cell_save_path):
                        os.makedirs(cell_save_path)

                    # 该图像不存在对应审核图像,直接拷贝图像至目标文件夹
                    shutil.copy(path, cell_save_path)
예제 #4
0
def get_cell_image(path, ctype, parent_pathes):
    """
    获取细胞文件路径
    :param path: 细胞图像路径
    :param ctype: 标注类别 MANUAL or AUTO
    :param parent_pathes: 大图名称及对应路径字典
    :return:
    """

    # 检查本地有无细胞图像文件路径信息文件
    # 如果存在,则直接读取
    # 如果没有,通过 FileScanner 工具类获取并写入本地文件

    image_path_info_dict_path = ctype + '_IMAGES_PATH_DICT.txt'
    check_name = os.path.join(METADATA_FILE_PATH, image_path_info_dict_path)

    if os.path.exists(check_name):
        with open(os.path.join(METADATA_FILE_PATH,
                               image_path_info_dict_path)) as f:
            files = [item.replace('\n', '') for item in f.readlines()]
    else:
        files = FilesScanner(path, ['.jpg']).get_files()
        with open(os.path.join(METADATA_FILE_PATH, image_path_info_dict_path),
                  'w') as o:
            o.writelines([item + '\n' for item in files])

    # 根据细胞图像文件名生成细胞坐标信息
    cells_dict = {}

    for item in files:
        if item.endswith('.jpg'):
            # 细胞图文件名
            basename = os.path.basename(item)

            parent = os.path.dirname(item)
            # 细胞所属类别
            clas_type = os.path.basename(parent)

            parent = os.path.dirname(parent)
            # 细胞所属大图名称
            parent_name = os.path.basename(parent)

            parent = os.path.dirname(parent)
            # 大图所属类别
            parent_type = os.path.basename(parent)

            # 大图原始路径
            try:
                parent_path = parent_pathes[parent_name]
            except Exception as e:
                print("CANNOT FIND RELATIVE TIFF PATH INFO, %s" % str(e))
                exit()

            # 解析坐标信息
            point = get_location_from_filename(basename)
            assert point, "THIS JPG NAME IS NOT ACCEPTED => %s" % basename

            _, x, y, w, h, _ = point

            # 修正 AGC 细胞类别
            if clas_type in AGC_CLASSES:
                clas_type = 'AGC'

            # 解析与修正大图分类
            if '_' in parent_type:
                parent_type = parent_type.split('_')[-1]

            if parent_type in AGC_CLASSES:
                parent_type = 'AGC'

            # 细胞位置及类别信息
            info = {
                'name': basename,
                'cell_type': clas_type,
                'cell_path': item,
                'parent': parent_name,
                'parent_full_name': os.path.basename(parent_path),
                'parent_type': parent_type,
                'x': x,
                'y': y,
                'w': w,
                'h': h,
            }

            if parent_name in cells_dict:
                cells_dict[parent_name].append(info)
            else:
                cells_dict[parent_name] = [info]

    # 将解析细胞数据按归属大图名称写入文件
    for key, lines in cells_dict.items():
        # 生成输出路径
        save_path = os.path.join(METADATA_FILE_PATH,
                                 ctype + '_IMAGES_PATH_DICT')
        os.makedirs(save_path, exist_ok=True)
        with open(os.path.join(save_path, key + '.txt'), 'w') as f:
            for line in lines:
                f.write(json.dumps(line) + '\n')

    return cells_dict