Пример #1
0
    def test_polygon_merger_rtree(self):
        """Test Polygon_merger_v2.run()."""
        annotationPath = utilities.externaldata(
            'data/TCGA-A2-A0YE-01Z-00-DX1_GET_MergePolygons.svs_annotations.json.sha512')  # noqa
        slide_annotations = json.load(open(annotationPath))
        _, contours_df = parse_slide_annotations_into_tables(slide_annotations)

        # init & run polygon merger
        pm = Polygon_merger_v2(contours_df, verbose=0)
        pm.unique_groups.remove("roi")
        pm.run()

        # make sure it is what we expect
        assert pm.new_contours.shape == (16, 16)
        assert set(pm.new_contours.loc[:, 'group']) == {
            'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate'}

        # add colors (aesthetic)
        for group in pm.unique_groups:
            cs = contours_df.loc[contours_df.loc[:, "group"] == group, "color"]
            pm.new_contours.loc[
                pm.new_contours.loc[:, "group"] == group, "color"] = cs.iloc[0]

        # get rid of nonenclosed stroma (aesthetic)
        pm.new_contours = _discard_nonenclosed_background_group(
            pm.new_contours, background_group="mostly_stroma")

        # get list of annotation documents
        annotation_docs = get_annotation_documents_from_contours(
            pm.new_contours.copy(), separate_docs_by_group=True,
            docnamePrefix='test',
            verbose=False, monitorPrefix="annotation docs")
        assert len(annotation_docs) == 3
Пример #2
0
    def test_polygon_merger(self):
        """Test Polygon_merger.run()."""
        # init and run polygon merger on masks grid
        pm = Polygon_merger(maskpaths=maskpaths,
                            GTCodes_df=GTCodes_df,
                            discard_nonenclosed_background=True,
                            verbose=1)
        contours_df = pm.run()

        # make sure it is what we expect
        self.assertTupleEqual(contours_df.shape, (17, 13))
        self.assertSetEqual(
            set(contours_df.loc[:, 'group']), {
                'roi', 'mostly_tumor', 'mostly_stroma',
                'mostly_lymphocytic_infiltrate'
            })

        # deleting existing annotations in target slide (if any)
        delete_annotations_in_slide(gc, SAMPLE_SLIDE_ID)

        # get list of annotation documents
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            separate_docs_by_group=True,
            docnamePrefix='test',
            verbose=False,
            monitorPrefix=SAMPLE_SLIDE_ID + ": annotation docs")

        # post annotations to slide -- make sure it posts without errors
        for annotation_doc in annotation_docs:
            resp = gc.post("/annotation?itemId=" + SAMPLE_SLIDE_ID,
                           json=annotation_doc)
            self.assertTrue('annotation' in resp.keys())
    def visualize_results(self):
        """Visualize results in DSA."""
        # get contours
        contours_df = get_contours_from_mask(
            MASK=self.labeled,
            GTCodes_df=self.cdt.GTcodes.copy(),
            get_roi_contour=True,
            roi_group='roi',
            background_group='not_specified',
            discard_nonenclosed_background=True,
            MIN_SIZE=15,
            MAX_SIZE=None,
            verbose=self.cdt.verbose == 3,
            monitorPrefix=self.monitorPrefix + ": -- contours")

        # get annotation docs
        annprops = {
            'F': self.cdt.slide_info['magnification'] / self.cdt.MAG,
            'X_OFFSET': self.xmin,
            'Y_OFFSET': self.ymin,
            'opacity': self.cdt.opacity,
            'lineWidth': self.cdt.lineWidth,
        }
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            separate_docs_by_group=True,
            docnamePrefix='cdt',
            annprops=annprops,
            verbose=self.cdt.verbose == 3,
            monitorPrefix=self.monitorPrefix + ": -- annotation docs")

        # post annotations to slide
        for doc in annotation_docs:
            _ = self.cdt.gc.post("/annotation?itemId=" + self.cdt.slide_id,
                                 json=doc)
def get_tissue_boundary_annotation_documents(
        gc, slide_id, labeled,
        color='rgb(0,0,0)', group='tissue', annprops=None):
    """Get annotation documents of tissue boundaries to visualize on DSA.

    Parameters
    -----------
    gc : object
        girder client to use
    slide_id : str
        girder ID of slide
    labeled : np array
        mask of tissue regions using slide thumbnail. This could either be
        a binary mask or a mask where each unique value corresponds to one
        tissue region. It will be binalized anyways. This can be obtained
        using get_tissue_mask().
    color : str
        color to assign to boundaries. format like rgb(0,0,0)
    group : str
        label for annotations
    annpops : dict
        properties of annotation elements. Contains the following keys
        F, X_OFFSET, Y_OFFSET, opacity, lineWidth. Refer to
        get_single_annotation_document_from_contours() for details.

    Returns
    --------
    list of dicts
        each dict is an annotation document that you can post to DSA

    """
    # Get annotations properties
    if annprops is None:
        slide_info = gc.get('item/%s/tiles' % slide_id)
        annprops = {
            'F': slide_info['sizeX'] / labeled.shape[1],  # relative to base
            'X_OFFSET': 0,
            'Y_OFFSET': 0,
            'opacity': 0,
            'lineWidth': 4.0,
        }

    # Define GTCodes dataframe
    GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color'])
    GTCodes_df.loc['tissue', 'group'] = group
    GTCodes_df.loc['tissue', 'GT_code'] = 1
    GTCodes_df.loc['tissue', 'color'] = color

    # get annotation docs
    contours_tissue = get_contours_from_mask(
        MASK=0 + (labeled > 0), GTCodes_df=GTCodes_df,
        get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=False,
        monitorPrefix="tissue: getting contours")
    annotation_docs = get_annotation_documents_from_contours(
        contours_tissue.copy(), docnamePrefix='test', annprops=annprops,
        verbose=False, monitorPrefix="tissue : annotation docs")

    return annotation_docs
Пример #5
0
    def test_polygon_merger_tiled_masks(self, girderClient):  # noqa
        """Test Polygon_merger.run()."""
        # read GTCodes dataframe
        gtcodePath = getTestFilePath('sample_GTcodes.csv')
        GTCodes_df = read_csv(gtcodePath)
        GTCodes_df.index = GTCodes_df.loc[:, 'group']

        # This is where masks for adjacent rois are saved
        mask_loadpath = getTestFilePath(
            os.path.join('annotations_and_masks', 'polygon_merger_roi_masks'))
        maskpaths = [
            os.path.join(mask_loadpath, j) for j in os.listdir(mask_loadpath)
            if j.endswith('.png')
        ]

        # init and run polygon merger on masks grid
        pm = Polygon_merger(maskpaths=maskpaths,
                            GTCodes_df=GTCodes_df,
                            discard_nonenclosed_background=True,
                            verbose=1)
        contours_df = pm.run()

        # make sure it is what we expect
        assert contours_df.shape == (13, 13)
        assert set(contours_df.loc[:, 'group']) == {
            'roi', 'mostly_tumor', 'mostly_stroma',
            'mostly_lymphocytic_infiltrate'
        }

        # deleting existing annotations in target slide (if any)
        sampleSlideItem = girderClient.resourceLookup(
            '/user/admin/Public/TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs'
        )  # noqa
        sampleSlideId = str(sampleSlideItem['_id'])
        delete_annotations_in_slide(girderClient, sampleSlideId)

        # get list of annotation documents
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            separate_docs_by_group=True,
            docnamePrefix='test',
            verbose=False,
            monitorPrefix=sampleSlideId + ": annotation docs")

        # post annotations to slide -- make sure it posts without errors
        for annotation_doc in annotation_docs:
            resp = girderClient.post("/annotation?itemId=" + sampleSlideId,
                                     json=annotation_doc)
            assert 'annotation' in resp.keys()
Пример #6
0
    def visualize_contiguous_superpixels(self):
        """Visualize contiguous spixels, color-coded by cellularity."""
        # get cellularity cluster membership mask
        cellularity_mask = np.zeros(self.spixel_mask.shape)
        for spval, sp in self.fdata.iterrows():
            cellularity_mask[self.spixel_mask == spval] = sp['cluster']

        # Define GTCodes dataframe
        GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color'])
        for spval, cp in self.cluster_props.items():
            spstr = 'cellularity-%d' % (cp['cellularity'])
            GTCodes_df.loc[spstr, 'group'] = spstr
            GTCodes_df.loc[spstr, 'GT_code'] = spval
            GTCodes_df.loc[spstr, 'color'] = cp['color']

        # get contours df
        contours_df = get_contours_from_mask(MASK=cellularity_mask,
                                             GTCodes_df=GTCodes_df,
                                             get_roi_contour=False,
                                             MIN_SIZE=0,
                                             MAX_SIZE=None,
                                             verbose=self.cd.verbose == 3,
                                             monitorPrefix=self.monitorPrefix)

        # get annotation docs
        annprops = {
            'F': (self.ymax - self.ymin) / self.tissue_rgb.shape[0],
            'X_OFFSET': self.xmin,
            'Y_OFFSET': self.ymin,
            'opacity': self.cd.opacity_contig,
            'lineWidth': self.cd.lineWidth,
        }
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            docnamePrefix='contig',
            annprops=annprops,
            annots_per_doc=1000,
            separate_docs_by_group=True,
            verbose=self.cd.verbose == 3,
            monitorPrefix=self.monitorPrefix)
        for didx, doc in enumerate(annotation_docs):
            self.cd._print2(
                "%s: Posting doc %d of %d" %
                (self.monitorPrefix, didx + 1, len(annotation_docs)))
            _ = self.cd.gc.post("/annotation?itemId=" + self.cd.slide_id,
                                json=doc)
    def test_get_annotation_documents_from_contours(self,
                                                    girderClient):  # noqa
        """Test get_contours_from_bin_mask()."""
        self._setup()
        sampleSlideItem = girderClient.resourceLookup(
            '/user/admin/Public/TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs'
        )  # noqa
        sampleSlideId = str(sampleSlideItem['_id'])
        # get list of annotation documents
        annprops = {
            'X_OFFSET': self.X_OFFSET,
            'Y_OFFSET': self.Y_OFFSET,
            'opacity': 0.2,
            'lineWidth': 4.0,
        }
        annotation_docs = get_annotation_documents_from_contours(
            self.CONTOURS_DF.copy(),
            separate_docs_by_group=True,
            annots_per_doc=10,
            docnamePrefix='test',
            annprops=annprops,
            verbose=False,
            monitorPrefix=self.MASKNAME[:12] + ": annotation docs")

        # make sure its what we expect
        assert len(annotation_docs) == 8
        assert {j['name']
                for j in annotation_docs} == {
                    'test_blood_vessel-0', 'test_exclude-0',
                    'test_mostly_lymphocytic_infiltrate-0',
                    'test_mostly_stroma-0', 'test_mostly_tumor-0',
                    'test_mostly_tumor-1', 'test_normal_acinus_or_duct-0',
                    'test_roi-0'
                }

        # deleting existing annotations in target slide (if any)
        delete_annotations_in_slide(girderClient, sampleSlideId)

        # post annotations to slide -- make sure it posts without errors
        resp = girderClient.post("/annotation?itemId=" + sampleSlideId,
                                 json=annotation_docs[0])
        assert 'annotation' in resp.keys()
    def test_polygon_merger_v2(self):
        """Test Polygon_merger_v2.run()."""
        # init & run polygon merger
        pm = Polygon_merger_v2(contours_df, verbose=1)
        pm.unique_groups.remove("roi")
        pm.run()

        # make sure it is what we expect
        self.assertTupleEqual(pm.new_contours.shape, (16, 13))
        self.assertSetEqual(
            set(pm.new_contours.loc[:, 'group']),
            {'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate'})

        # add colors (aesthetic)
        for group in pm.unique_groups:
            cs = contours_df.loc[contours_df.loc[:, "group"] == group, "color"]
            pm.new_contours.loc[pm.new_contours.loc[:, "group"] == group,
                                "color"] = cs.iloc[0]

        # get rid of nonenclosed stroma (aesthetic)
        pm.new_contours = _discard_nonenclosed_background_group(
            pm.new_contours, background_group="mostly_stroma")

        # deleting existing annotations in target slide (if any)
        existing_annotations = gc.get('/annotation/item/' + POST_SLIDE_ID)
        for ann in existing_annotations:
            gc.delete('/annotation/%s' % ann['_id'])

        # get list of annotation documents
        annotation_docs = get_annotation_documents_from_contours(
            pm.new_contours.copy(),
            separate_docs_by_group=True,
            docnamePrefix='test',
            verbose=False,
            monitorPrefix=POST_SLIDE_ID + ": annotation docs")

        # post annotations to slide -- make sure it posts without errors
        for annotation_doc in annotation_docs:
            resp = gc.post("/annotation?itemId=" + POST_SLIDE_ID,
                           json=annotation_doc)
            self.assertTrue('annotation' in resp.keys())
    def test_get_annotation_documents_from_contours(self):
        """Test get_contours_from_bin_mask()."""
        # get list of annotation documents
        annprops = {
            'X_OFFSET': X_OFFSET,
            'Y_OFFSET': Y_OFFSET,
            'opacity': 0.2,
            'lineWidth': 4.0,
        }
        annotation_docs = get_annotation_documents_from_contours(
            CONTOURS_DF.copy(),
            separate_docs_by_group=True,
            annots_per_doc=10,
            docnamePrefix='test',
            annprops=annprops,
            verbose=False,
            monitorPrefix=MASKNAME[:12] + ": annotation docs")

        # make sure its what we expect
        self.assertTrue(len(annotation_docs) == 8)
        self.assertSetEqual(
            {j['name']
             for j in annotation_docs},
            {
                'test_blood_vessel-0', 'test_exclude-0',
                'test_mostly_lymphocytic_infiltrate-0', 'test_mostly_stroma-0',
                'test_mostly_tumor-0', 'test_mostly_tumor-1',
                'test_normal_acinus_or_duct-0', 'test_roi-0'
            })

        # deleting existing annotations in target slide (if any)
        existing_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID)
        for ann in existing_annotations:
            gc.delete('/annotation/%s' % ann['_id'])

        # post annotations to slide -- make sure it posts without errors
        resp = gc.post("/annotation?itemId=" + SAMPLE_SLIDE_ID,
                       json=annotation_docs[0])
        self.assertTrue('annotation' in resp.keys())
Пример #10
0
def create_review_galleries(
        tilepath_base, upload_results=True, gc=None,
        gallery_savepath=None, gallery_folderid=None,
        padding=25, tiles_per_row=2, tiles_per_column=5,
        annprops=None, url=None, nameprefix=''):
    """Create and or post review galleries for rapid review.

    Parameters
    ----------
    tilepath_base : str
        directory where combined visualization.
    upload_results : bool
        upload results to DSA?
    gc : girder_client.Girder_Client
        authenticated girder client. Only needed upload_results.
    gallery_savepath : str
        directory to save gallery. Only if upload_results.
    gallery_folderid : str
        girder ID of folder to post galleries. Only if upload_result.
    padding : int
        padding in pixels between tiles in same gallery.
    tiles_per_row : int
        how many visualization tiles per row in gallery.
    tiles_per_column : int
        how many visualization tiles per column in gallery.
    annprops : dict
        properties of the annotations to be posted to DSA. Passed directly
        as annprops to get_annotation_documents_from_contours()
    url : str
        url of the Digital Slide Archive Instance. For example:
        http://candygram.neurology.emory.edu:8080/
    nameprefix : str
        prefix to prepend to gallery name

    Returns
    -------
    list
        each entry is a dict representing the response of the server
        post request to upload the gallery to DSA.

    """
    if upload_results:
        for par in ('gc', 'gallery_folderid', 'url'):
            if locals()[par] is None:
                raise Exception(
                    "%s cannot be None if upload_results!" % par)

    if gallery_savepath is None:
        gallery_savepath = tempfile.mkdtemp(prefix='gallery-')

    savepaths = []
    resps = []

    tile_paths = [
        os.path.join(tilepath_base, j) for j in
        os.listdir(tilepath_base) if j.endswith('.png')]
    tile_paths.sort()

    def _parse_tilepath(tpath):
        basename = os.path.basename(tpath)
        basename = basename[:basename.rfind('.')]
        tileinfo = {'slide_name': basename.split('_')[0]}
        for attrib in ['id', 'left', 'top', 'bottom', 'right']:
            tileinfo[attrib] = basename.split(
                attrib + '-')[1].split('_')[0]

        # add URL in histomicsTK
        tileinfo['URL'] = url + \
            "histomicstk#?image=%s&bounds=%s%%2C%s%%2C%s%%2C%s%%2C0" % (
                tileinfo['id'],
                tileinfo['left'], tileinfo['top'],
                tileinfo['right'], tileinfo['bottom'])
        return tileinfo

    n_tiles = len(tile_paths)
    n_galleries = int(np.ceil(n_tiles / (tiles_per_row * tiles_per_column)))

    tileidx = 0

    for galno in range(n_galleries):

        # this makes a 8-bit, mono image (initializes as 1x1x3 matrix)
        im = pyvips.Image.black(1, 1, bands=3)

        # this will store the roi contours
        contours = []

        for row in range(tiles_per_column):

            rowpos = im.height + padding

            # initialize "row" strip image
            row_im = pyvips.Image.black(1, 1, bands=3)

            for col in range(tiles_per_row):

                if tileidx == n_tiles:
                    break

                tilepath = tile_paths[tileidx]

                print("Inserting tile %d of %d: %s" % (
                    tileidx, n_tiles, tilepath))
                tileidx += 1

                # # get tile from file
                tile = pyvips.Image.new_from_file(
                    tilepath, access="sequential")

                # insert tile into mosaic row
                colpos = row_im.width + padding
                row_im = row_im.insert(
                    tile[:3], colpos, 0, expand=True, background=255)

                if upload_results:

                    tileinfo = _parse_tilepath(tilepath)

                    xmin = colpos
                    ymin = rowpos
                    xmax = xmin + tile.width
                    ymax = ymin + tile.height
                    xmin, xmax, ymin, ymax = [
                        str(j) for j in (xmin, xmax, ymin, ymax)]
                    contours.append({
                        'group': tileinfo['slide_name'],
                        'label': tileinfo['URL'],
                        'color': 'rgb(0,0,0)',
                        'coords_x': ",".join([xmin, xmax, xmax, xmin, xmin]),
                        'coords_y': ",".join([ymin, ymin, ymax, ymax, ymin]),
                    })

                    # Add a small contour so that when the pathologist
                    # changes the label to approve or disapprove of the
                    # FOV, the URL in THIS contour (a link to the original
                    # FOV) can be used. We place it in the top right corner.
                    boxsize = 25
                    xmin = str(int(xmax) - boxsize)
                    ymax = str(int(ymin) + boxsize)
                    contours.append({
                        'group': tileinfo['slide_name'],
                        'label': tileinfo['URL'],
                        'color': 'rgb(0,0,0)',
                        'coords_x': ",".join([xmin, xmax, xmax, xmin, xmin]),
                        'coords_y': ",".join([ymin, ymin, ymax, ymax, ymin]),
                    })

            # insert row into main gallery
            im = im.insert(row_im, 0, rowpos, expand=True, background=255)

        filename = '%s_gallery-%d' % (nameprefix, galno + 1)
        savepath = os.path.join(gallery_savepath, filename + '.tiff')
        print("Saving gallery %d of %d to %s" % (
            galno + 1, n_galleries, savepath))

        # save temporarily to disk to be uploaded
        im.tiffsave(
            savepath, tile=True, tile_width=256, tile_height=256, pyramid=True)

        if upload_results:
            # upload the gallery to DSA
            resps.append(gc.uploadFileToFolder(
                folderId=gallery_folderid, filepath=savepath,
                filename=filename))
            os.remove(savepath)

            # get and post FOV location annotations
            annotation_docs = get_annotation_documents_from_contours(
                DataFrame(contours), separate_docs_by_group=True,
                annprops=annprops)
            for doc in annotation_docs:
                _ = gc.post(
                    "/annotation?itemId=" + resps[-1]['itemId'], json=doc)
        else:
            savepaths.append(savepath)

    return resps if upload_results else savepaths
Пример #11
0
            int(m['y']) + im_size[1],
            int(m['y']) + im_size[1]))

        prediction_data[item_id]['has_holes'].append(0.0)
        prediction_data[item_id]['touches_edge-top'].append(0.0)
        prediction_data[item_id]['touches_edge-left'].append(0.0)
        prediction_data[item_id]['touches_edge-bottom'].append(0.0)
        prediction_data[item_id]['touches_edge-right'].append(0.0)

    # loop through each item id and push to annotations
    for item_id, contour_rows in prediction_data.items():
        contours_df = DataFrame(contour_rows)
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            separate_docs_by_group=False,
            annots_per_doc=100,
            docnamePrefix=DOCNAME_PREFIX,
            annprops=annprops,
            verbose=False,
            monitorPrefix='')

        # get current annotations documents from item
        existing_annotations = gc.get('/annotation/item/' + item_id)

        # delete annotation documents starting with the same prefix as about to be pushed
        for ann in existing_annotations:
            if 'name' in ann['annotation']:
                doc_name = ann['annotation']['name']
                if doc_name.startswith(DOCNAME_PREFIX):
                    gc.delete('/annotation/%s' % ann['_id'])

        # post the annotation documents you created
Пример #12
0
def push_annotations_as_doc(gc, item_id, contours, doc_name='Default', group_name='default', color='rgb(255,0,0)',
                            opacity=0.2):
    """Given a list of opencv contours, push them as annotations to DSA HistomicsUI viewer.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client if private item
    item_id : str
        image item id
    contours : list
        list of contours in opencv format to push as annotations
    doc_name : str (optional)
        prefix of the document to push annoations as, default-# will be appended at the end. Mulitple documents may be
        pushed if enough contours are present.
    group_name : str (optional)
        group name to assign to annotations
    color : str (optional)
        rgb color to use for annoations
    opacity : float (optional)
        set the opacity to use for the annotation

    Return
    ------
    contours_df : pandas.DataFrame
        dataframe used to create the annotation document

    """
    # seed the dict to create dataframe
    data = {'group': [], 'color': [], 'ymin': [], 'ymax': [], 'xmin': [], 'xmax': [], 'has_holes': [],
            'touches_edge-top': [], 'touches_edge-left': [], 'touches_edge-bottom': [], 'touches_edge-right': [],
            'coords_x': [], 'coords_y': []}

    # for each contour convert the x, y coordinates to string if 'x1,x2,...,xn' and 'y1,y2,...,yn'
    for contour in contours:
        # append the values that don't change between contour
        data['color'].append(color)
        data['group'].append(group_name)
        data['has_holes'].append(0.0)
        data['touches_edge-top'].append(0.0)
        data['touches_edge-left'].append(0.0)
        data['touches_edge-bottom'].append(0.0)
        data['touches_edge-right'].append(0.0)

        # based on the contours, get the min and max of each axis
        xmin, ymin = np.min(contour, axis=0)[0]
        xmax, ymax = np.max(contour, axis=0)[0]
        data['xmin'].append(xmin)
        data['ymin'].append(ymin)
        data['xmax'].append(xmax)
        data['ymax'].append(ymax)

        # do the conversion to string format of x and y coordinates
        xs, ys = [], []
        for xy in contour:
            xs.append(str(xy[0, 0]))
            ys.append(str(xy[0, 1]))

        data['coords_x'].append(','.join(xs))
        data['coords_y'].append(','.join(ys))

    # build dataframe form dict
    contours_df = DataFrame(data)

    # convert dataframe to an annotation doc
    annprops = {'X_OFFSET': 0, 'Y_OFFSET': 0, 'opacity': opacity, 'lineWidth': 4.0}

    annotation_docs = get_annotation_documents_from_contours(
        contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=doc_name, annprops=annprops,
        verbose=False, monitorPrefix=''
    )

    # get current annotations documents from item
    existing_annotations = gc.get('/annotation/item/' + item_id)

    # delete annotation documents starting with the same prefix as about to be pushed
    for ann in existing_annotations:
        # some annotations may not have a name, handle this case
        annotation = ann['annotation']
        if 'name' in annotation:
            current_doc_name = annotation['name']
            if current_doc_name.startswith(doc_name):
                gc.delete('/annotation/%s' % ann['_id'])

    # post the annotation documents you created
    for annotation_doc in annotation_docs:
        resp = gc.post(
            "/annotation?itemId=" + item_id, json=annotation_doc)

    return contours_df
Пример #13
0
def dsa_predict(model, gc, item_id, group_name='Positive', ann_doc_name='Default', preprocess_input=None,
                tile_size=(224, 224), save_mag=10, mask_mag=1.25, tissue_threshold=0.3, batch_size=8,
                pred_threshold=0.5, color='rgb(255,153,0)'):
    """Predict on DSA image item, using a grid tiling approach given a binary trained model.
    Parameters
    ----------
    model : tensorflow.keras.models.Model
        a trained keras model for binary classification
    gc : girder_client.GirderClient
        authenticated client, used to get the images
    item_id : str
        image item id
    group_name : str (optional)
        name of the positive class, will be used as the group name in annotation elements
    ann_doc_name : str (optional)
        prepend name of the annotation documents
    preprocess_input : function (optional)
        a function that is applied to the images to process them, works on a tensor-style image
    tile_size : tuple (optional)
        size to predict images at
    save_mag : float (optional)
        magnification to extract tiles at
    mask_mag : float (optional)
        tissue mask is used to decide which tiles to predict on, this is the magnification of the tissue mask
    tissue_threshold : float (optional)
        fraction of tile that must contain tissue to be predicted on
    batch_size : int (optional)
        predictions are done on images in batches
    pred_threshold : float (optinal)
        model predicts a probability from 0 to 1, predictions above pred_threshold are considered the positive class
        that will be pushed as annotations
    color : str (optional)
        rgb(###,###,###) color of element box in annotation element
    Return
    ------
    annotation_data : dict
        annotation data that was pushed as annotation
    """
    # info about the source image
    im_info = gc.get('item/{}/tiles'.format(item_id))
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']

    if save_mag is None:
        # save magnification will be native magnification
        save_mag = fr_mag

    fr_to_lr_factor = save_mag / fr_mag
    # tile size is determined by the save res
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(tile_size[1] / fr_to_lr_factor)  # (width, height)

    # get tissue mask
    lr_im = get_region_im(gc, item_id, {'magnification': mask_mag})[:, :, :3]
    tissue_mask = get_tissue_mask(lr_im)[0]

    # we will loop through image in batches, get the coordinates for batches
    coords = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[
                          int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag),
                          int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)
                          ]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(tissue_tile) / tissue_tile.size < tissue_threshold:
                continue
            coords.append((x, y))

    # break the coords in batch size chunks
    coord_batches = [coords[i:i + batch_size] for i in range(0, len(coords), batch_size)]
    annotation_data = {col_name: [] for col_name in COL_NAMES}

    print('predicting in batches')
    print('*********************')
    for batch_num, coord_batch in enumerate(coord_batches):
        print('{} of {}'.format(batch_num + 1, len(coord_batches)))
        # get all the images in this batch
        batch_ims = []
        for coord in coord_batch:
            region = {'left': coord[0], 'top': coord[1], 'width': fr_tile_size[0], 'height': fr_tile_size[1],
                      'magnification': save_mag}
            batch_ims.append(get_region_im(gc, item_id, region)[:, :, :3])

        # convert to tensor shape
        batch_ims = np.array(batch_ims)

        # process the image before prediction on it
        batch_ims = preprocess_input(batch_ims) / 255.

        # predict on the batch
        predictions = model.predict(batch_ims)

        # identify predictions that are glomeruli
        for i, pred in enumerate(predictions):
            if pred[0] > pred_threshold:
                # add the data to annotation data
                annotation_data['group'].append(group_name)
                annotation_data['color'].append(color)
                annotation_data['has_holes'].append(0.0)
                annotation_data['touches_edge-top'].append(0.0)
                annotation_data['touches_edge-left'].append(0.0)
                annotation_data['touches_edge-bottom'].append(0.0)
                annotation_data['touches_edge-right'].append(0.0)
                xmin, ymin = coord_batch[i][0], coord_batch[i][1]
                annotation_data['xmin'].append(xmin)
                annotation_data['ymin'].append(ymin)
                xmax = xmin + fr_tile_size[0]
                ymax = ymin + fr_tile_size[1]
                annotation_data['xmax'].append(xmax)
                annotation_data['ymax'].append(ymax)
                annotation_data['coords_x'].append('{},{},{},{}'.format(xmin, xmax, xmax, xmin))
                annotation_data['coords_y'].append('{},{},{},{}'.format(ymin, ymin, ymax, ymax))

    # only push if annotation data is not empty
    n = len(annotation_data['group'])
    if n:
        print('number of tiles to push: {}'.format(n))
        contours_df = DataFrame(annotation_data)
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=ann_doc_name,
            annprops=ANNPROPS, verbose=False, monitorPrefix=''
        )

        # get current annotations documents from item
        existing_annotations = gc.get('/annotation/item/' + item_id)

        # delete annotation documents starting with the same prefix as about to be pushed
        for ann in existing_annotations:
            if 'name' in ann['annotation']:
                doc_name = ann['annotation']['name']
                if doc_name.startswith(ann_doc_name):
                    gc.delete('/annotation/%s' % ann['_id'])

        # post the annotation documents you created
        for annotation_doc in annotation_docs:
            _ = gc.post(
                "/annotation?itemId=" + item_id, json=annotation_doc)
    else:
        print('no positive tiles to push..')
    return annotation_data