示例#1
0
def _load_tile_data(tx, ty):
    # Do nothing if this tile has already been loaded
    if _tile_loaded() and db.get('coords', 'tile') == (tx, ty):
        return

    path = cytokit_io.get_extract_image_path(cfg.region_index, tx, ty,
                                             cfg.extract_name)
    path = osp.join(cfg.exp_data_dir, path)
    img, meta = cytokit_io.read_tile(path, return_metadata=True)

    # Select cycle and z plane
    img = img[cfg.extract_cycle, cfg.extract_z]
    labels = list(meta['structured_labels'][cfg.extract_cycle, cfg.extract_z])

    logger.info(
        'Loaded tile image for tile x = %s, tile y = %s, shape = %s, dtype = %s',
        tx, ty, img.shape, img.dtype)
    if img.dtype != np.uint8 and img.dtype != np.uint16:
        raise ValueError(
            'Only 8 or 16 bit images are supported (image type = {})'.format(
                img.dtype))

    # Image is now (C, H, W)
    db.put('images', 'tile', img)
    db.put('channels', 'tile', labels)
    db.put('coords', 'tile', (tx, ty))
示例#2
0
文件: data.py 项目: wishgale/cytokit
    def add_cell_images(g):
        reg, tx, ty = g.iloc[0][['region_index', 'tile_x', 'tile_y']]

        # Extract the relevant 2D image to be used for both cell object isolation and cell image display
        path = osp.join(
            output_dir,
            cytokit_io.get_extract_image_path(reg, tx, ty, extract))
        img, meta = cytokit_io.read_tile(path, return_metadata=True)
        icyc, iz = kwargs.get('cycle', 0), kwargs.get('z', 0)
        img = img[icyc, iz]
        channels = list(meta['structured_labels'][icyc, iz])
        processor = cvproc.get_image_processor(channels,
                                               ranges=ranges,
                                               colors=colors)

        # Get the cell image data frame containing the original cell id, cell image based on processed
        # raw image, and associated cell image properties
        cell_data = pd.DataFrame(
            extract_single_cell_image_data(g,
                                           img,
                                           processor.run(img),
                                           channels,
                                           image_size=image_size))

        # Verify that the only shared field between the two datasets is 'id'
        assert g.columns.isin(cell_data.columns).sum() == 1, \
            'Cell data frame should only have one overlapping field with cytometry data frame;' \
            '\nCell fields = {}\nCytometry fields = {}'.format(cell_data.columns, g.columns)

        # Left join cytometry data on single cell data
        return pd.merge(g, cell_data, how='left', on='id')
示例#3
0
def create_montage(output_dir,
                   config,
                   extract,
                   name,
                   region_indexes,
                   prep_fn=None,
                   compress=6):
    from cytokit.utils import ij_utils

    # Loop through regions and generate a montage for each, skipping any (with a warning) that
    # do not have focal plane selection information
    if region_indexes is None:
        region_indexes = config.region_indexes

    path = None
    for ireg in region_indexes:
        logger.info('Generating montage for region %d of %d', ireg + 1,
                    len(region_indexes))
        tiles = []
        labels = None
        for itile in range(config.n_tiles_per_region):
            tx, ty = config.get_tile_coordinates(itile)
            path = cytokit_io.get_extract_image_path(ireg, tx, ty, extract)
            tile, meta = cytokit_io.read_tile(osp.join(output_dir, path),
                                              return_metadata=True)
            if labels is None:
                labels = meta['labels']
            tiles.append(tile)
        reg_img_montage = montage(tiles, config)
        if prep_fn is not None:
            reg_img_montage = prep_fn(reg_img_montage)
        path = osp.join(output_dir,
                        cytokit_io.get_montage_image_path(ireg, name))
        logger.info('Saving montage to file "%s"', path)
        tags = [] if labels is None else ij_utils.get_slice_label_tags(labels)
        cytokit_io.save_tile(path,
                             reg_img_montage,
                             config=config,
                             infer_labels=False,
                             extratags=tags,
                             compress=compress)
    logger.info('Montage generation complete; results saved to "%s"',
                None if path is None else osp.dirname(path))
示例#4
0
    def add_cell_images(g):
        # Get region and tile coordinates as well as z coordinate depending on whether
        # it is supposed to be fetched from the given data or static
        reg, tx, ty = g.iloc[0][['region_index', 'tile_x', 'tile_y']]
        iz = g.iloc[0]['z'] if z is None else z

        # Extract the relevant 2D image to be used for both cell object isolation and cell image display
        path = osp.join(
            output_dir,
            cytokit_io.get_extract_image_path(reg, tx, ty, extract))
        if path not in tile_cache:
            tile_cache[path] = cytokit_io.read_tile(path, return_metadata=True)
        img, meta = tile_cache[path]
        img = img[cycle, iz]
        channels = list(meta['structured_labels'][cycle, iz])
        processor = cvproc.get_image_processor(channels,
                                               ranges=ranges,
                                               colors=colors)

        # Get the cell image data frame containing the original cell id, cell image based on processed
        # raw image, and associated cell image properties
        cell_data = pd.DataFrame(
            extract_single_cell_image_data(g,
                                           img,
                                           processor.run(img),
                                           channels,
                                           image_size=image_size,
                                           **kwargs))

        # Verify that the only shared field between the two datasets is 'id'
        assert g.columns.isin(cell_data.columns).sum() == 1, \
            'Cell data frame should only have one overlapping field with cytometry data frame;' \
            '\nCell fields = {}\nCytometry fields = {}'.format(cell_data.columns, g.columns)

        # Left join cytometry data on single cell data
        return pd.merge(g, cell_data, how='left', on='id')
示例#5
0
    def extract(self,
                name,
                channels,
                z='best',
                region_indexes=None,
                tile_indexes=None,
                raw_dir=None):
        """Create a new data extraction include either raw, processed, or cytometric imaging data

        Args:
            name: Name of extraction to be created; This will be used to construct result path like
                EXP_DIR/output/extract/`name`
            channels: List of strings indicating channel names (case-insensitive) prefixed by source for that
                channel (e.g. proc_DAPI, raw_CD4, cyto_nucleus_boundary); Available sources are:
                - "raw": Raw data images
                - "proc": Data generated as a results of preprocessing
                - "cyto": Cytometric object data (nuclei and cell boundaries)
            z: String or 1-based index selector for z indexes constructed as any of the following:
                - "best": Indicates that z slices should be inferred based on focal quality (default option)
                - "all": Indicates that a slice for all z-planes should be used
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            raw_dir: If using any channels sourced from raw data, this directory must be specified and should
                be equivalent to the same raw directory used during processing (i.e. nearly all operations like
                this are run relative to an `output_dir` -- the result of processing -- but in this case
                the original raw data path is needed as well)
        """
        channel_map = _map_channels(self.config, channels).groupby('source')
        channel_sources = sorted(list(channel_map.groups.keys()))

        z_slice_fn = _get_z_slice_fn(z, self.data_dir)
        region_indexes = cli.resolve_index_list_arg(region_indexes,
                                                    zero_based=True)
        tile_indexes = cli.resolve_index_list_arg(tile_indexes,
                                                  zero_based=True)

        logging.info('Creating extraction "%s"', name)

        tile_locations = _get_tile_locations(self.config, region_indexes,
                                             tile_indexes)

        extract_path = None
        for i, loc in enumerate(tile_locations):
            logging.info('Extracting tile {} of {}'.format(
                i + 1, len(tile_locations)))
            extract_tile = []

            # Create function used to crop out z-slices from extracted volumes
            z_slice = z_slice_fn(loc.region_index, loc.tile_x, loc.tile_y)

            slice_labels = []
            for src in channel_sources:

                # Initialize tile generator for this data source (which are all the same except
                # for when using raw data, which does not have pre-assembled tiles available)
                tile_gen_dir = self.data_dir
                tile_gen_mode = 'stack'
                if src == CH_SRC_RAW:
                    if not raw_dir:
                        raise ValueError(
                            'When extracting raw data channels, the `raw_dir` argument must be provided'
                        )
                    tile_gen_dir = raw_dir
                    tile_gen_mode = 'raw'
                generator = tile_generator.CytokitTileGenerator(
                    self.config,
                    tile_gen_dir,
                    loc.region_index,
                    loc.tile_index,
                    mode=tile_gen_mode,
                    path_fmt_name=PATH_FMT_MAP[src])
                tile = generator.run(None)

                # Crop raw images if necessary
                if src == CH_SRC_RAW:
                    tile = tile_crop.CytokitTileCrop(self.config).run(tile)

                # Sort channels by name to make extract channel order deterministic
                for _, r in channel_map.get_group(src).sort_values(
                        'channel_name').iterrows():

                    # Extract (z, h, w) subtile
                    sub_tile = tile[r['cycle_index'], z_slice,
                                    r['channel_index']]
                    logging.debug(
                        'Extraction for cycle %s, channel %s (%s), z slice %s, source "%s" complete (tile shape = %s)',
                        r['cycle_index'], r['channel_index'],
                        r['channel_name'], z_slice, src, sub_tile.shape)
                    assert sub_tile.ndim == 3, \
                        'Expecting sub_tile to have 3 dimensions but got shape {}'.format(sub_tile.shape)
                    slice_labels.append('{}_{}'.format(src, r['channel_name']))
                    extract_tile.append(sub_tile)

            # Stack the subtiles to give array with shape (z, channels, h, w) and then reshape to 5D
            # format like (cycles, z, channels, h, w)
            extract_tile = np.stack(extract_tile, axis=1)[np.newaxis]
            assert extract_tile.ndim == 5, \
                'Expecting extract tile to have 5 dimensions but got shape {}'.format(extract_tile.shape)

            extract_path = cytokit_io.get_extract_image_path(
                loc.region_index, loc.tile_x, loc.tile_y, name)
            extract_path = osp.join(self.data_dir, extract_path)
            logging.debug('Saving tile with shape %s (dtype = %s) to "%s"',
                          extract_tile.shape, extract_tile.dtype, extract_path)

            # Construct slice labels as repeats across z-dimension (there is only one time/cycle dimension)
            slice_label_tags = ij_utils.get_channel_label_tags(
                slice_labels, z=extract_tile.shape[1], t=1)
            cytokit_io.save_tile(extract_path,
                                 extract_tile,
                                 config=self.config,
                                 infer_labels=False,
                                 extratags=slice_label_tags)

        logging.info('Extraction complete (results saved to %s)',
                     osp.dirname(extract_path) if extract_path else None)
示例#6
0
    def test_pipeline_01(self):
        out_dir = tempfile.mkdtemp(prefix='cytokit_test_pipeline_01_')
        print('Initialized output dir {} for pipeline test 01'.format(out_dir))

        raw_dir = osp.join(cytokit.test_data_dir, 'experiment',
                           'cellular-marker-small', 'raw')
        val_dir = osp.join(cytokit.test_data_dir, 'experiment',
                           'cellular-marker-small', 'validation')
        config_dir = osp.join(cytokit.test_data_dir, 'experiment',
                              'cellular-marker-small', 'config')
        config = ck_config.load(config_dir)

        # Run processor and extractions/aggregations
        processor.Processor(data_dir=raw_dir,
                            config_path=config_dir).run_all(output_dir=out_dir)
        operator.Operator(data_dir=out_dir, config_path=config_dir).run_all()
        analysis.Analysis(data_dir=out_dir, config_path=config_dir).run_all()

        # ##################### #
        # Processor Data Checks #
        # ##################### #
        df = ck_fn.get_processor_data(out_dir)['drift_compensator']
        # Expect one drift comp record since there are two cycles and one is the reference
        self.assertEqual(len(df), 1)
        # Expecting 12 row and -3 col translation introduced in synthetic data
        self.assertEqual(df.iloc[0]['translation'], [12, -3])

        df = ck_fn.get_processor_data(out_dir)['focal_plane_selector']
        # Expect one focal selection record (there is only 1 tile in experiment and these
        # records are per-tile)
        self.assertEqual(len(df), 1)
        # Expecting second of 3 z planes to have the best focus (data was generated this way)
        self.assertEqual(df.iloc[0]['best_z'], 1)

        # ##################### #
        # Cytometry Stats Check #
        # ##################### #
        df = ck_fn.get_cytometry_data(out_dir, config, mode='best_z_plane')

        # Verify that the overall cell count and size found are in the expected ranges
        self.assertTrue(
            20 <= len(df) <= 25,
            'Expecting between 20 and 25 cells, found {} instead'.format(
                len(df)))
        nuc_diam, cell_diam = df['nucleus_diameter'].mean(
        ), df['cell_diameter'].mean()
        self.assertTrue(
            4 < nuc_diam < 6,
            'Expecting mean nucleus diameter in [4, 6] um, found {} instead'.
            format(nuc_diam))
        self.assertTrue(
            8 < cell_diam < 10,
            'Expecting mean cell diameter in [8, 10] um, found {} instead'.
            format(cell_diam))

        # The drift align dapi channels should be nearly identical across cycles, but in this case there are border
        # cells that end up with dapi=0 for cval=0 in drift compensation translation function so make the check
        # on a threshold (the ratio is < .5 with no drift compensation)
        dapi_ratio = df['ni:DAPI2'].mean() / df['ni:DAPI1'].mean()
        self.assertTrue(
            .8 < dapi_ratio <= 1,
            'Expecting cycle 2 DAPI averages to be similar to cycle 1 DAPI after drift compensation, '
            'found ratio {} (not in (.8, 1])'.format(dapi_ratio))

        # Check that all records are for single z plane (with known best focus)
        self.assertEqual(df['z'].nunique(), 1)
        self.assertEqual(int(df['z'].unique()[0]), 1)

        # Verify that single cell image generation works
        df = ck_fn.get_single_cell_image_data(out_dir,
                                              df,
                                              'best_z_segm',
                                              image_size=(64, 64))
        self.assertEqual(df['image'].iloc[0].shape, (64, 64, 3))
        self.assertTrue(df['image'].notnull().all())

        # ################## #
        # Segmentation Check #
        # ################## #
        # Load extract with object masks
        img, meta = ck_io.read_tile(osp.join(
            out_dir,
            ck_io.get_extract_image_path(ireg=0,
                                         tx=0,
                                         ty=0,
                                         name='best_z_segm')),
                                    return_metadata=True)
        # Ensure that the 8 channels set for extraction showed up in the resulting hyperstack
        self.assertEqual(len(meta['labels']), 8)

        # Verify that IoU for both nuclei and cell masks vs ground-truth is > 80%
        img_seg_cell = img[0, 0, meta['labels'].index('cyto_cell_mask')]
        img_seg_nucl = img[0, 0, meta['labels'].index('cyto_nucleus_mask')]
        img_val_cell = sk_io.imread(osp.join(val_dir, 'cells.tif'))
        img_val_nucl = sk_io.imread(osp.join(val_dir, 'nuclei.tif'))

        def iou(im1, im2):
            return ((im1 > 0) & (im2 > 0)).sum() / ((im1 > 0) |
                                                    (im2 > 0)).sum()

        self.assertGreater(iou(img_seg_cell, img_val_cell), .8)
        self.assertGreater(iou(img_seg_nucl, img_val_nucl), .8)

        # ############# #
        # Montage Check #
        # ############# #
        # Load montage and check that it has the same dimensions as the extract image above,
        # since there is only one tile in this case
        img_mntg = ck_io.read_tile(
            osp.join(out_dir,
                     ck_io.get_montage_image_path(ireg=0, name='best_z_segm')))
        self.assertEqual(img.shape, img_mntg.shape)
        self.assertEqual(img.dtype, img_mntg.dtype)
示例#7
0
文件: data.py 项目: wishgale/cytokit
def get_extract_image_meta(output_dir, extract):
    path = osp.join(output_dir,
                    cytokit_io.get_extract_image_path(0, 0, 0, extract))
    _, meta = cytokit_io.read_tile(path, return_metadata=True)
    return meta