Пример #1
0
def do_similar_remove(path):
    files = FilesScanner(path).get_files()

    pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_s(\d+).jpg')

    dict_ = {}
    total = len(files)
    for index, file in enumerate(files):
        print("%s / %s ..." % (index + 1, total))
        basename = os.path.basename(file)
        cell_type = os.path.basename(os.path.dirname(file))

        items = re.findall(pattern, basename)[0]
        big_name, x, y, w, h, _ = items

        x, y, w, h = int(x), int(y), int(w), int(h)
        if big_name in dict_:
            lst = dict_[big_name]
            for item in lst:
                x_, y_, w_, h = item[:-1]
                if cal_IOU((x, y, w, h), (x_, y_, w_, h)) > 0.6:
                    save_path = os.path.join(REPEAT_FILE_SAVE_PATH, cell_type)
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)

                    shutil.move(file, save_path)
                    # shutil.copy(item[-1], save_path)
                    # shutil.copy(file, save_path)
            else:
                dict_[big_name].append((x, y, w, h, file))
        else:
            dict_[big_name] = [(x, y, w, h, file)]

    return dict_
        def rm_duplicates(boxes):
            boxes_new = []

            unique_point_collection = []
            for box in boxes:
                label, accuracy, (x_center, y_center, w, h) = box
                x = int(x_center - w / 2)
                y = int(y_center - h / 2)

                for index, item in enumerate(unique_point_collection):
                    ratio = cal_IOU(item[2], (x, y, w, h))
                    if ratio > cfg.darknet.min_overlap_ratio:
                        if item[1] > accuracy:
                            pass
                        else:
                            unique_point_collection[index] = (label, accuracy, (x_center, y_center, w, h))
                            boxes_new[index] = box
                        break
                else:
                    unique_point_collection.append((label, accuracy, (x, y, w, h)))
                    boxes_new.append(box)
            return boxes_new
                path = item['path']
                cell_type = item['type']

                cell_save_path = os.path.join(merge_dir_path, parent_type, key,
                                              cell_type)
                if not os.path.exists(cell_save_path):
                    os.makedirs(cell_save_path)

                shutil.copy(path, cell_save_path)

            # 检测算法识别细胞的坐标位置,进行重复性判断
            manual_point_coordinate_lst = get_coordinate(manual_point_lst)
            for point in auto_point_lst:
                basename = os.path.basename(point['path'])
                _, x, y, w, h, _ = get_location_from_filename(basename)

                # 与审核图像存在重复
                for item in manual_point_coordinate_lst:
                    if cal_IOU((x, y, w, h), item) > 0.8:
                        break
                else:
                    path = point['path']
                    cell_type = point['type']
                    cell_save_path = os.path.join(merge_dir_path, parent_type,
                                                  key, cell_type + '_NEW')
                    if not os.path.exists(cell_save_path):
                        os.makedirs(cell_save_path)

                    # 该图像不存在对应审核图像,直接拷贝图像至目标文件夹
                    shutil.copy(path, cell_save_path)
Пример #4
0
def remove_repeat_cells(key, csv_file_path):
    if key not in tiff_dict:
        raise Exception("XCEPTION PREPROCESS %s NOT FOUND" % key)

    tiff_path = tiff_dict[key]

    try:
        try:
            slide = openslide.OpenSlide(tiff_path)
        except:
            slide = TSlide(tiff_path)
    except:
        raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path)

    save_path = os.path.join(TEST_IMAGE_SAVE_PATH, key)

    with open(csv_file_path) as f:
        lines = csv.reader(f)

        dict_ = {}
        unique_cells_collection = []

        next(lines, None)

        count = 0
        for line in lines:
            name, label01, accu01, label02, accu02, xmin, ymin, xmax, ymax = line
            xmin, ymin, xmax, ymax = float(xmin), float(ymin), float(
                xmax), float(ymax)
            x, y, w, h = xmin, ymin, int(xmax - xmin + 0.5), int(ymax - ymin +
                                                                 0.5)

            tiff_name, start_x, start_y = re.findall(pattern, name)[0]
            start_x, start_y = int(start_x), int(start_y)

            x, y = int(start_x + x), int(start_y + y)

            origin_save_path = os.path.join(save_path, "origin", label02)
            removal_save_path = os.path.join(save_path, "removal", label02)

            if not os.path.exists(origin_save_path):
                os.makedirs(origin_save_path)

            if not os.path.exists(removal_save_path):
                os.makedirs(removal_save_path)

            patch = slide.read_region((x, y), 0, (w, h)).convert("RGB")
            image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y, w, h)

            patch.save(os.path.join(origin_save_path, image_name))
            for item in unique_cells_collection:
                label, x_, y_, w_, h_ = item

                ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_))

                if ratio > 0.7 and label == label02:
                    break
            else:
                unique_cells_collection.append((label02, x, y, w, h))
                patch.save(os.path.join(removal_save_path, image_name))

            count += 1

        print("ORIGIN POINTS COLLECTION LENGTH: %s" % count)
        print("AFTER DUPLICATE REMOVAL COLLECTION LENGTH: %s" %
              len(unique_cells_collection))

        return unique_cells_collection
Пример #5
0
def cell_classification(xml_path_lst, csv_path_lst):
    """
    基于
    :param xml_path_lst:  xml 文件路径列表
    :param csv_path_lst:  csv 文件路径列表
    :return:
    """

    print("GET XML AND CSV <NAME: PATH> DICT...")
    xml_dict = generate_xml_path_dict(xml_path_lst)
    csv_dict = generate_csv_path_dict(csv_path_lst)

    # GET NEW MODEL OUTPUT POINTS COLLECTION
    csv_points_dict = {}

    removal_xml_save_path = '/home/tsimage/Development/DATA/removal_xmls'

    keys = list(csv_dict.keys())
    total = len(keys)

    print("GET CSV LABELLED POINTS COLLECTION ...")
    # read csv
    for index, key in enumerate(keys):
        print("GET CSV DATA %s / %s %s..." % (index + 1, total, key))
        removal_xml_data_path = os.path.join(removal_xml_save_path,
                                             key + ".txt")

        lst = []
        if os.path.exists(removal_xml_data_path):
            with open(removal_xml_data_path) as f:
                lines = f.readlines()
                for line in lines:
                    label, x, y, w, h = line.replace("\n", "").split(',')
                    lst.append((label, int(x), int(y), int(w), int(h)))
        else:
            lst = remove_repeat_cells(key, csv_dict[key])
            write_to_txt(removal_xml_data_path, lst)

        csv_points_dict[key] = lst

    xml_points_dict = {}
    keys = list(xml_dict.keys())
    total = len(keys)

    print("GET XML LABELLED POINTS COLLECTION ...")
    # read xml
    for index, key in enumerate(keys):
        print("GET XML DATA %s / %s %s..." % (index + 1, total, key))
        lst = read_data_from_xml(xml_dict[key])
        xml_points_dict[key] = lst

    print('CELL COMPARE AND CLASSIFICATION ...')
    # compare and classification
    keys = list(csv_points_dict.keys())
    total = len(keys)

    # 模型诊断类别与原始手工标注类别不一致的细胞集合
    dict_modify = {}

    # 新识别出的细胞集合
    dict_new = {}

    # 新识别出的细胞集合
    dict_same = {}

    for index, key in enumerate(keys):
        print("CLASSIFICATION %s / %s %s..." % (index + 1, total, key))

        same_lst = []
        new_lst = []
        modify_lst = []

        csv_lst = csv_points_dict[key]
        xml_lst = xml_points_dict[key]

        for csv_item in csv_lst:
            label01, x01, y01, w01, h01 = csv_item

            for xml_item in xml_lst:
                label02, x02, y02, w02, h02 = xml_item

                ratio = cal_IOU((x02, y02, w02, h02), (x01, y01, w01, h01))
                if ratio > 0.8:
                    if label01 == label02:
                        same_lst.append(csv_item)
                    else:
                        modify_lst.append(
                            (label02, label01, x01, y01, w01, h01))

                    break
            else:
                new_lst.append(csv_item)

        dict_same[key] = same_lst
        dict_new[key] = new_lst
        dict_modify[key] = modify_lst

    # 模型漏标的细胞集合
    dict_miss = {}

    keys = list(xml_points_dict.keys())
    total = len(keys)
    for index, key in enumerate(keys):
        if key not in csv_dict:
            continue

        miss_lst = []

        csv_lst = csv_points_dict[key]
        xml_lst = xml_points_dict[key]

        for xml_item in xml_lst:
            label01, x01, y01, w01, h01 = xml_item

            for csv_item in csv_lst:
                label02, x02, y02, w02, h02 = csv_item

                if cal_IOU((x02, y02, w02, h02), (x01, y01, w01, h01)) > 0.8:
                    break
            else:
                miss_lst.append(xml_item)

        dict_miss[key] = miss_lst

    data_after_classification_save_path = '/home/tsimage/Development/DATA/data_after_removal'
    for index, key in enumerate(keys):
        save_path = os.path.join(data_after_classification_save_path, key)

        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if key in dict_same:
            write_to_txt(os.path.join(save_path, 'same.txt'), dict_same[key])

        if key in dict_new:
            write_to_txt(os.path.join(save_path, 'new.txt'), dict_new[key])

        if key in dict_modify:
            write_to_txt(os.path.join(save_path, 'modify.txt'),
                         dict_modify[key])

        if key in dict_miss:
            write_to_txt(os.path.join(save_path, 'miss.txt'), dict_miss[key])
Пример #6
0
        # 当两种标注数据同时存在时,进行去重处理
        # 以手工标注信息为准,对自动标注信息在手工标注信息中进行遍历
        # 若存在重合度 ratio > 0.5 的标注信息,直接丢弃;否则加入细胞信息列表
        if n == 2:
            # 以手工标注信息为基准
            # TO-DO 过滤 重叠率 > 0.5 图像
            available_points = deepcopy(manual)

            # 对自动标注信息进行遍历,存在手工标注信息中存在 重合度 > 0.5 的细胞信息,直接丢弃
            for item_ in auto:
                x_, y_, w_, h_ = item_['x'], item_['y'], item_['w'], item_['h']

                for item in manual:
                    x, y, w, h = item['x'], item['y'], item['w'], item['h']
                    ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_))

                    if ratio > ACCEPTED_OVERLAPPED_RATIO:
                        similar_count += 1
                        break
                else:
                    available_points.append(item_)

            points_collection[name] = available_points

        elif n == 1:
            if auto:
                points_collection[name] = auto

            if manual:
                points_collection[name] = manual
def generate_labelme_format_xml(csv_files_path, patch_dict, xml_save_path):
    """
    将 csv 文件内容写入 xml
    :param csv_files_path: 读取的 csv 存放目录
    :param xml_save_path: 输出的 xml 存放路径
    :return:
    """
    files = FilesScanner(csv_files_path, postfix=['.csv']).get_files()
    clas_files = [item for item in files if item.endswith('_clas.csv')]

    # 待处理 csv 文件总数
    total = len(clas_files)
    for index, file in enumerate(clas_files):
        print("Processing %s / %s %s" % (index + 1, total, os.path.basename(file)))

        with open(file) as f:
            lines = csv.reader(f)

            dict_ = {}
            next(lines, None)

            for line in lines:
                key = line[0]
                box = {
                    'name': line[3],
                    'xmin': 0 if float(line[5]) < 0 else int(float(line[5]) + 0.5),
                    'ymin': 0 if float(line[6]) < 0 else int(float(line[6]) + 0.5),
                    'xmax': 0 if float(line[7]) < 0 else int(float(line[7]) + 0.5),
                    'ymax': 0 if float(line[8]) < 0 else int(float(line[8]) + 0.5),
                }

                if key not in dict_:
                    dict_[key] = [box]
                else:
                    dict_[key].append(box)

            for key, lst in dict_.items():
                if key in patch_dict:
                    patch = patch_dict[key]
                    label = patch['label']
                    image_path = patch['path']

                    save_path = os.path.join(xml_save_path, label)
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)

                    # remove duplicated cells
                    lst_ = []
                    for item in lst:
                        x, y, w, h = item['xmin'], item['ymin'], item['xmax'] - item['xmin'], item['ymax'] - item['ymin']
                        for item_ in lst_:
                            x_, y_, w_, h_ = item_['xmin'], item_['ymin'], item_['xmax'] - item_['xmin'], item_['ymax'] - item_['ymin']
                            if cal_IOU((x, y, w, h), (x_, y_, w_, h_)) > 0.8:
                                break
                        else:
                            lst_.append(item)

                    write_to_labelme_xml(lst_, os.path.join(save_path, key + '.xml'))
                    shutil.copy(image_path, save_path)
                else:
                    raise Exception("%s NOT FOUND IN DICT" % file)
        print("%s / %s %s ... " % (index + 1, total, key))

        lst01 = cell_dict[key]

        if key in train_dict:
            lst02 = train_dict[key]

            for ctype01, path01 in lst01:
                basename = os.path.basename(path01)
                x01, y01, w01, h01, _ = re.findall(pattern01, basename)[0]

                for ctype02, path02 in lst02:
                    basename = os.path.basename(path02)
                    x02, y02, w02, h02, _ = re.findall(pattern02, basename)[0]

                    ratio = cal_IOU((int(x01), int(y01), int(w01), int(h01)),
                                    (int(x02), int(y02), int(w02), int(h02)))
                    if ratio > 0.5:
                        cell_save_path = os.path.join(merge_dir_path, key,
                                                      ctype02)
                        if not os.path.exists(cell_save_path):
                            os.makedirs(cell_save_path)

                        shutil.copy(path01, cell_save_path)
                        break
                else:
                    cell_save_path = os.path.join(merge_dir_path, key,
                                                  ctype01 + '_NEW')
                    if not os.path.exists(cell_save_path):
                        os.makedirs(cell_save_path)

                    shutil.copy(path01, cell_save_path)