Пример #1
0
def worker(image, start_x, start_y, height, patch_width, patch_height, delta,
           patch_save_path):
    """
    按行切图,并保存为图像文件,每个 worker 只负责处理一行
    :param image: 图像文件地址
    :param start_x: 切图起点坐标-x
    :param start_y: 切图起点坐标-y
    :param height: 切图最大高度
    :param patch_width: 切图尺寸-宽
    :param patch_height: 切图尺寸-高
    :param delta: 切图步长
    :param patch_save_path:
    :return: 切图数量
    """

    # 结果队列
    queue = []

    try:
        slide = openslide.OpenSlide(image)
    except:
        slide = TSlide(image)

    # # 获取图像宽,高
    # width, height = slide.dimensions

    try:
        while start_y < height:
            # 读取patch块
            patch = slide.read_region((start_x, start_y), 0,
                                      (patch_width, patch_height))

            # 图像格式转换
            patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)

            # 过滤
            patch_gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
            if cv2.Laplacian(patch_gray, cv2.CV_64F).var() > cfg.slice.THRESH:
                # 生成文件路径
                save_path = os.path.join(patch_save_path,
                                         "%s_%s.jpg" % (start_x, start_y))
                # 文件写入
                cv2.imwrite(save_path, patch)
                cv2.imwrite(
                    '/tmp/metadata/cells/1/%s_%s.jpg' % (start_x, start_y),
                    patch)

                queue.append(save_path)

            start_y += delta

        return queue
    except:
        raise
Пример #2
0
    def gen_np_array_mem(self,
                         results,
                         classes=cfg.darknet.classes,
                         det=cfg.xception.det1,
                         size=cfg.xception.size):
        """
        :param classes: [class_i,]
        :param det: the threshold of det to use certain box or not, from darknet prediction
        :param size: image size to cut, default to 299, which is used in Xception/inception
        :param results: dict generated after running darknet predict: {x_y: [(class_i, det, (x,y,w,h)),]}
        :return:
            numpy array: numpy array of each cell, in order
            cell_index: {index: [x_y, [class_i, det, (x,y,w,h)]]},
                        index is index in numpy array,
                           x_y is jpg image name, it represents cell source,
                        box = [class_i, det, (x,y,w,h)] is cell info from darknet
        """
        def resize_img(img, size):

            # pad zero with short side
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            # now, yolo output is square, only need resize

            img_resized = img_croped.resize((size, size))
            return img_resized

        try:
            slide = openslide.OpenSlide(self.input_file)
        except:
            slide = TSlide(self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    x = int(x_y.split('_')[0]) + int(box[2][0])
                    y = int(x_y.split('_')[1]) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1
        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index
def tiff_readable_check(path):
    """
    病理图像可读性验证
    :param path: 原图路径
    :return:
    """

    files = FilesScanner(path, ['.tif', 'kfb']).get_files()
    filename_lst = []
    filepath_lst = []

    for file in files:
        basename = os.path.basename(file)

        if basename in filename_lst:
            raise Exception("%s\n%s" %
                            (file, filepath_lst[filename_lst.index(basename)]))
        else:
            filename_lst.append(basename)
            filepath_lst.append(file)

    for file in files:
        try:
            try:
                slide = openslide.OpenSlide(file)
            except:
                slide = TSlide(file)
        except Exception as e:
            raise Exception("%s %s" % (file, str(e)))
Пример #4
0
    def cut_cells(self, tifname, new_dict, save_path):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
        """
        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                image_name = "{}_x{}_y{}_w{}_h{}.jpg".format(
                    basename, x, y, w, h)
                save_path_i = os.path.join(save_path, str(box[2]))
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region((x, y), 0,
                                  (w, h)).convert("RGB").save(image_fullname)
        slide.close()
Пример #5
0
def cut_cells(filename, labels_csv, save_path, factor, N):
    labels = read_labels_csv(labels_csv)
    marked_boxes = read_labels_xml(os.path.splitext(filename)[0] + ".xml")

    try:
        slide = openslide.OpenSlide(filename)
    except:
        slide = TSlide(filename)

    basename = os.path.splitext(os.path.basename(filename))[0]
    parent_d = os.path.basename(os.path.dirname(filename))
    save_path = os.path.join(save_path, parent_d, basename)
    for box in labels:
        x, y, w, h, p, label = box
        marked_class_i = is_overlapped(marked_boxes, box, factor)
        if marked_class_i:
            image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, marked_class_i, basename, x, y, w, h, N)
        else:
            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, basename, x, y, w, h, N)
        save_path_i = os.path.join(save_path, label)
        os.makedirs(save_path_i, exist_ok=True)
        image_fullname = os.path.join(save_path_i, image_name)
        x_N, y_N = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)
        w_N, h_N = int(N * w), int(N * h)
        slide.read_region((x_N, y_N), 0,
                          (w_N, h_N)).convert("RGB").save(image_fullname)
    slide.close()
Пример #6
0
def worker_in_memory(image, start_x, start_y, height, patch_width,
                     patch_height, delta):
    """
    按行切图,并保存为图像文件,每个 worker 只负责处理一行
    :param image: 图像文件地址
    :param start_x: 切图起点坐标-x
    :param start_y: 切图起点坐标-y
    :param height: 切图最大高度
    :param patch_width: 切图尺寸-宽
    :param patch_height: 切图尺寸-高
    :param delta: 切图步长
    :return: 切图数量
    """

    # 结果队列
    queue = {}

    try:
        slide = openslide.OpenSlide(image)
    except:
        slide = TSlide(image)

    try:
        while start_y < height:
            # 读取patch块
            patch = slide.read_region((start_x, start_y), 0,
                                      (patch_width, patch_height))

            # 图像格式转换
            patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
            key = '%s_%s' % (start_x, start_y)
            queue[key] = patch

            start_y += delta

        return queue
    except:
        raise
Пример #7
0
def worker(tiff_path, keys, points_dict, save_path, N):
    basename = os.path.splitext(os.path.basename(tiff_path))[0].replace(
        " ", "-")

    try:
        slide = openslide.OpenSlide(tiff_path)
    except:
        slide = TSlide(tiff_path)

    cell_count = 0
    for x_y in keys:
        boxes = points_dict[x_y]
        for box in boxes:
            x0, y0 = x_y.split('_')
            x = int(x0) + int(box[4][0])
            y = int(y0) + int(box[4][1])
            w = int(box[4][2])
            h = int(box[4][3])

            # make save dir
            cell_save_dir = os.path.join(save_path, box[2])
            os.makedirs(cell_save_dir, exist_ok=True)

            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                1 - box[3], basename, x, y, w, h, N)
            cell_save_path = os.path.join(cell_save_dir, image_name)

            slide.read_region(
                (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                (int(N * w), int(N * h))).convert("RGB").save(cell_save_path)

            cell_count += 1

    slide.close()

    return cell_count
Пример #8
0
    def get_slices(self):
        """

        :return: [切图存放路径,]
        """

        for image in self.images:
            t0 = datetime.datetime.now()

            # 获取病理图像文件名,假如文件名中有空格的话,以 "_" 替换
            img_name = os.path.basename(image).split(".")[0].replace(" ", "_")
            print("Image Process %s ..." % image)

            try:
                slide = None
                if image.endswith(".tif"):
                    slide = openslide.OpenSlide(image)

                if image.endswith(".kfb"):
                    slide = TSlide(image)

                if slide:
                    _width, _height = slide.dimensions

                    # 创建进程池
                    executor = ProcessPoolExecutor(
                        max_workers=cfg.slice.SLICE_PROCESS_NUM)

                    t1 = datetime.datetime.now()
                    print("Adding Job to Pool...")

                    # 获取中心位置坐标
                    center_x, center_y = _width / 2, _height / 2

                    # 计算左上坐标
                    width = (cfg.center.PATCH_NUM -
                             1) * cfg.center.DELTA + cfg.center.PATCH_WIDTH
                    height = (cfg.center.PATCH_NUM -
                              1) * cfg.center.DELTA + cfg.center.PATCH_HEIGHT

                    print(width, height)

                    x = center_x - width / 2
                    y = center_y - height / 2

                    # 修正坐标
                    x = x if x >= 0 else 0
                    y = y if y >= 0 else 0

                    # 计算重点位置
                    width = x + width
                    height = y + height

                    x, y, width, height = int(x), int(y), int(width), int(
                        height)

                    # 收集任务结果
                    tasks = []
                    while x < width:
                        tasks.append(
                            executor.submit(worker_in_memory, image, x, y,
                                            height, cfg.center.PATCH_WIDTH,
                                            cfg.center.PATCH_HEIGHT,
                                            cfg.center.DELTA))
                        x += cfg.center.DELTA

                    t2 = datetime.datetime.now()
                    job_count = len(tasks)
                    print(
                        "Done, cost: %s, Total Job Count: %s, Worker Count: %s"
                        % ((t2 - t1), job_count, cfg.slice.SLICE_PROCESS_NUM))

                    results = {}
                    # 计数器
                    patch_count = 0
                    for future in as_completed(tasks):
                        queue = future.result()
                        results.update(deepcopy(queue))

                        count = len(queue)
                        patch_count += count
                        job_count -= 1
                        print(
                            "One Job Done, Got %s patches, last Job Count: %s"
                            % (count, job_count))

                    t3 = datetime.datetime.now()
                    print(
                        "File - %s, Size: (%s, %s), Got Patch Num %s, Total cost time: %s"
                        % (img_name, _width, _height, patch_count, t3 - t0))

                    return cfg.code.success, results
            except Exception as e:
                return cfg.code.fail, str(e)
Пример #9
0
    def get_slices(self):
        """

        :return: [切图存放路径,]
        """
        # 处理成功任务列表
        done = []
        # 处理失败任务列表
        fail = []

        for image in self.images:
            t0 = datetime.datetime.now()

            # 获取病理图像文件名,假如文件名中有空格的话,以 "_" 替换
            img_name = os.path.basename(image).split(".")[0].replace(" ", "_")
            print("Image Process %s ..." % image)

            try:
                slide = None
                if image.endswith(".tif"):
                    slide = openslide.OpenSlide(image)

                if image.endswith(".kfb"):
                    slide = TSlide(image)

                if slide:
                    _width, _height = slide.dimensions

                    output_path = os.path.join(self.output_path, img_name)
                    # 假如目标路径存在,删除文件然后重新写入
                    if os.path.exists(output_path):
                        shutil.rmtree(output_path)

                    os.makedirs(output_path, exist_ok=True)

                    # 创建进程池
                    executor = ProcessPoolExecutor(
                        max_workers=cfg.slice.SLICE_PROCESS_NUM)

                    t1 = datetime.datetime.now()
                    print("Adding Job to Pool...")

                    # 按行读取,仅读取图像中间(指定比例)位置
                    x, y, width, height = int(_width * cfg.slice.AVAILABLE_PATCH_START_RATIO), \
                                          int(_height * cfg.slice.AVAILABLE_PATCH_START_RATIO), \
                                          int(_width * cfg.slice.AVAILABLE_PATCH_END_RATIO), \
                                          int(_height * cfg.slice.AVAILABLE_PATCH_END_RATIO)

                    # 收集任务结果
                    tasks = []
                    while x < width:
                        tasks.append(
                            executor.submit(worker, image, x, y, height,
                                            cfg.slice.WIDTH, cfg.slice.HEIGHT,
                                            cfg.slice.DELTA, output_path))
                        x += cfg.slice.DELTA

                    t2 = datetime.datetime.now()
                    job_count = len(tasks)
                    print(
                        "Done, cost: %s, Total Job Count: %s, Worker Count: %s"
                        % ((t2 - t1), job_count, cfg.slice.SLICE_PROCESS_NUM))

                    # 计数器
                    patch_count = 0
                    for future in as_completed(tasks):
                        queue = future.result()
                        count = len(queue)

                        patch_count += count
                        job_count -= 1
                        print(
                            "One Job Done, Got %s patches, last Job Count: %s"
                            % (count, job_count))

                    t3 = datetime.datetime.now()
                    print(
                        "File - %s, Size: (%s, %s), Got Patch Num %s, Total cost time: %s"
                        % (img_name, _width, _height, patch_count, t3 - t0))
                    print(".jpg files saved path: %s" % output_path)

                    done.append(output_path)
                else:
                    fail.append({
                        'name': img_name,
                        'err': 'unsupported file format'
                    })
            except Exception as e:
                raise
                fail.append({'name': image, 'err': str(e)})

        return {'done': done, 'fail': fail}
    else:
        # 切图文件存储路径
        slice_dir_path = os.path.join(resource_save_path, 'SLICE')

        # 中间文件存放目录
        meta_files_path = os.path.join(resource_save_path, 'META')

        # 识别出的细胞存储路径
        cells_save_path = os.path.join(resource_save_path, 'CELLS')

    tiff_lst = FilesScanner(tiff_dir_path, ['.kfb', '.tif']).get_files()

    # 执行 TIFF 文件完整性校验
    for tiff in tiff_lst:
        try:
            try:
                slide = openslide.OpenSlide(tiff)
            except:
                slide = TSlide(tiff)
        except Exception as e:
            raise Exception("%s %s" % (tiff, str(e)))

    for item in [slice_dir_path, meta_files_path, cells_save_path]:
        if not os.path.exists(item):
            os.makedirs(item)

    PCK(tiff_lst, slice_dir_path, meta_files_path, cells_save_path).run()
    print("PLEASE GET CELL IMAGES IN %s" % cells_save_path)

    t1 = datetime.datetime.now()
    print("TIFF NUM: %s, TOTAL COST %s ..." % (len(tiff_lst), (t1 - t0)))
Пример #11
0
    def cut_cells_p_marked_(self,
                            tifname,
                            new_dict,
                            save_path,
                            factor=0.3,
                            N=4):
        def get_labels(xmlname):
            if not os.path.isfile(xmlname):
                return []

            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa55ff": "EC",
                "#ff5500": "AGC",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        tiff_dict = get_tiff_dict()
        if tifname not in tiff_dict:
            raise Exception("XCEPTION POSTPROCESS %s NOT FOUND" % tifname)

        try:
            slide = openslide.OpenSlide(tiff_dict[tifname])
        except:
            slide = TSlide(tiff_dict[tifname])

        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                _, x, y = re.findall(pattern, x_y)[0]
                x = int(x) + int(box[4][0])
                y = int(y) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])

                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Пример #12
0
    def cut_cells_p_marked(self,
                           tifname,
                           new_dict,
                           save_path,
                           factor=0.3,
                           N=4):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :param factor: overlapping threshold, added marked info to image filename if overlapped
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
                        (note: x, y here is relative to wsi.
                               p is the value of the second det.
                               image size is twice the annotation box.
                               check if the cell is marked, add marked if so.)
        """

        # https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely
        def get_labels(xmlname):
            """collect labeled boxes from asap xml
            :param xmlname: full path name of .xml file, got from .tif/.kfb file
            :output format: [[class_i, [(xi,yi),]],]
            """
            if not os.path.isfile(xmlname):
                return []
            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa557f": "ADC",
                "#aa55ff": "EC",
                "#ff5500": "AGC1",
                "#ff557f": "AGC2",
                "#ff55ff": "AGC3",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            """check if predicted box is marked already
            :param marked_boxes: [[class_i, [(xi,yi),]],]
            :param box: (x, y, w, h)
            :param factor: overlapping threshold, added marked info to image filename if overlapped
            """
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Пример #13
0
    def run(self):
        print("Initial DARKNET and XCEPTION model ...")

        total = len(self.tiff_lst)
        for index, tiff in enumerate(self.tiff_lst):
            # 获取大图文件名,不带后缀
            tiff_basename, _ = os.path.splitext(os.path.basename(tiff))
            tiff_basename = tiff_basename.replace(" ", "-")
            print('Process %s / %s %s ...' % (index + 1, total, tiff_basename))

            # 切片文件存储路径
            slice_save_path = os.path.join(self.slice_dir_path, tiff_basename)

            t0 = datetime.datetime.now()
            # 如果路径下切图文件不存在,执行切图
            if not os.path.exists(slice_save_path):
                # 执行切图
                ImageSlice(tiff, self.slice_dir_path).get_slices()

            # 获取切图文件路径
            tif_images = FilesScanner(slice_save_path, ['.jpg']).get_files()
            t1 = datetime.datetime.now()
            print('TIFF SLICE COST: %s' % (t1 - t0))

            tasks = []

            # 创建切图进程池
            executor = ProcessPoolExecutor(max_workers=GPU_NUM)

            if len(tif_images) < cfg.darknet.min_job_length:
                tasks.append(executor.submit(yolo_predict, '0', tif_images))
            else:
                # 任务切分
                n = int((len(tif_images) / float(GPU_NUM)) + 0.5)
                patches = [tif_images[i: i + n] for i in range(0, len(tif_images), n)]

                for gpu_index, patch in enumerate(patches):
                    tasks.append(executor.submit(yolo_predict, str(gpu_index), patch))

            seg_results = {}
            for future in as_completed(tasks):
                result = future.result()
                seg_results.update(result)

            # 关闭进程池
            executor.shutdown(wait=True)

            try:
                slide = openslide.OpenSlide(tiff)
            except:
                slide = TSlide(tiff)

            keys = list(seg_results.keys())
            for key in keys:
                lst = seg_results[key]
                x0, y0 = key.split('_')
                x0, y0 = int(x0), int(y0)

                for item in lst:
                    label, accuracy, (x, y, w, h) = item
                    accuracy, x, y, w, h = float(accuracy), int(x), int(y), int(w), int(h)
                    x, y = x0 + x, y0 + y

                    save_path = os.path.join(self.cells_path, tiff_basename, label)
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)

                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}.jpg".format(1 - accuracy, tiff_basename, x, y, w, h)
                    slide.read_region((x, y), 0, (w, h)).convert("RGB").save(os.path.join(save_path, image_name))
Пример #14
0
    def gen_np_array_mem_(self,
                          results,
                          classes=cfg.darknet.classes,
                          det=cfg.xception.det1,
                          size=cfg.xception.size):
        def resize_img(img, size):
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            img_resized = img_croped.resize((size, size))

            return img_resized

        tiff_dict = get_tiff_dict()
        if self.input_file not in tiff_dict:
            raise Exception("XCEPTION PREPROCESS %s NOT FOUND" %
                            self.input_file)

        try:
            try:
                slide = openslide.OpenSlide(tiff_dict[self.input_file])
            except:
                slide = TSlide(tiff_dict[self.input_file])
        except:
            raise Exception('TIFF FILE OPEN FAILED => %s' % self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    # print(x_y)
                    _, x, y = re.findall(self.pattern, x_y)[0]
                    # print("1=> %s, %s" % (x, y))
                    x = int(x) + int(box[2][0])
                    y = int(y) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    # print("2=> %s, %s" % (x, y))

                    # center_x = x + int(w / 2 + 0.5)
                    # center_y = y + int(h / 2 + 0.5)
                    # w_ = max(w, h)
                    # h_ = w_

                    # x_ = center_x - int(w_ / 2 + 0.5)
                    # y_ = center_y - int(h_ / 2 + 0.5)

                    # x_ = 0 if x_ < 0 else x_
                    # y_ = 0 if y_ < 0 else y_

                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    # cell = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB")
                    # image_name = "%s_%s_%s_%s.jpg" % (x, y, w, h)
                    # cell.save(os.path.join('/home/tsimage/Development/DATA/middle_cells', image_name))

                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1

        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index