Пример #1
0
Файл: utils.py Проект: bzrry/imc
def get_panorama_images(mcd_file: Path,
                        output_file_prefix: Path = None,
                        overwrite: bool = False) -> Optional[List[Array]]:
    import imageio

    byteoffset = 161

    mcd = McdParser(mcd_file)

    imgs = list()
    for slide in mcd.session.metadata["Panorama"]:
        start, end = (
            int(slide["ImageStartOffset"]),
            int(slide["ImageEndOffset"]),
        )
        img = mcd._get_buffer(start + byteoffset, end + byteoffset)
        if len(img) == 0:  # empty image
            continue
        if output_file_prefix is not None:
            output_file = output_file_prefix + f"_{slide['ID']}.png"
            if overwrite or (not output_file.exists()):
                with open(output_file, "wb") as f:
                    f.write(img)
        else:
            try:
                imgs.append(imageio.imread(img))
            except ValueError:
                continue
    mcd.close()
    if output_file_prefix is None:
        return imgs
    else:
        return None
Пример #2
0
class McdFileReader(FileReaderBase):
    def __init__(self, path: Union[str, Path]):
        super(McdFileReader, self).__init__(path)
        self._mcd_parser: Optional[McdParser] = None

    def _get_imc_file_panoramas(self, imc_file: IMCFileModel) -> List[IMCFilePanoramaModel]:
        return [
            IMCFilePanoramaModel(imc_file, panorama.id, panorama.image_type, panorama.description)
            for panorama in self._mcd_parser.session.panoramas.values() if panorama.image_type != 'Default'
        ]

    def _get_imc_file_acquisitions(self, imc_file: IMCFileModel) -> List[IMCFileAcquisitionModel]:
        return [
            IMCFileAcquisitionModel(imc_file, acquisition.id, acquisition.description, acquisition.channel_labels)
            for acquisition in self._mcd_parser.session.acquisitions.values() if acquisition.is_valid
        ]

    def read_panorama(self, panorama_id: int) -> Tuple[ImageDimensions, np.ndarray]:
        panorama = self._mcd_parser.session.panoramas[panorama_id]
        xs_physical = [panorama.x1, panorama.x2, panorama.x3, panorama.x4]
        ys_physical = [panorama.y1, panorama.y2, panorama.y3, panorama.y4]
        x_physical, y_physical = min(xs_physical), min(ys_physical)
        w_physical, h_physical = max(xs_physical) - x_physical, max(ys_physical) - y_physical
        data = imread(self._mcd_parser.get_panorama_image(panorama_id))
        if x_physical != panorama.x1:
            data = data[:, ::-1, :]
        if y_physical != panorama.y1:
            data = data[::-1, :, :]
        return (x_physical, y_physical, w_physical, h_physical), data

    def read_acquisition(self, acquisition_id: int, channel_label: str) -> Tuple[ImageDimensions, np.ndarray]:
        acquisition = self._mcd_parser.session.acquisitions[acquisition_id]
        xs_physical = [acquisition.roi_start_x_pos_um, acquisition.roi_end_x_pos_um]
        ys_physical = [acquisition.roi_start_y_pos_um, acquisition.roi_end_y_pos_um]
        x_physical, y_physical = min(xs_physical), min(ys_physical)
        w_physical, h_physical = max(xs_physical) - x_physical, max(ys_physical) - y_physical
        data = self._mcd_parser.get_acquisition_data(acquisition.id).get_image_by_label(channel_label)
        if x_physical != acquisition.roi_start_x_pos_um:
            data = data[:, ::-1]
        if y_physical != acquisition.roi_start_y_pos_um:
            data = data[::-1, :]
        return (x_physical, y_physical, w_physical, h_physical), data

    def __enter__(self) -> 'FileReaderBase':
        self._mcd_parser = McdParser(self._path)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._mcd_parser.close()

    @classmethod
    def accepts(cls, path: Union[str, Path]) -> bool:
        return Path(path).suffix.lower() == '.mcd'
Пример #3
0
 def test_read_imc_mcd(self, raw_path: Path):
     mcd_file_path = raw_path / '20210305_NE_mockData1' / '20210305_NE_mockData1.mcd'
     parser = McdParser(mcd_file_path)
     ac_data = parser.get_acquisition_data(1)
     assert parser.origin == "mcd"
     assert ac_data.is_valid is True
     assert ac_data.image_data.shape == (5, 60, 60)
     assert ac_data.n_channels == 5
     assert ac_data.channel_names == [
         'Ag107', 'Pr141', 'Sm147', 'Eu153', 'Yb172'
     ]
     assert ac_data.channel_labels == [
         '107Ag', 'Cytoker_651((3356))Pr141', 'Laminin_681((851))Sm147',
         'YBX1_2987((3532))Eu153', 'H3K27Ac_1977((2242))Yb172'
     ]
     assert ac_data.channel_masses == ['107', '141', '147', '153', '172']
Пример #4
0
def mcdfile2ome(mcdfile: File) -> Path:
    mcdfile = Path(mcdfile)
    ome_folder = mcdfile.parent / Path(mcdfile.stem + '-ome')

    mcd_parser = McdParser(mcdfile)
    imc_writer = ImcWriter(ome_folder, mcd_parser)
    imc_writer.write_imc_folder(create_zip=False)

    return ome_folder
Пример #5
0
    def read_imc_mcd(
            path: Union[str, Path],
            acquisition_id: int,
            channel_names_attr: str = 'channel_names') -> 'MultichannelImage':
        """Creates a new :class:`MultichannelImage` from the specified Fluidigm(TM) MCD file

        Uses :class:`imctools.io.txt.mcdparser.McdParser` for reading .mcd files.

        :param path: path to the .mcd file
        :param acquisition_id: acquisition ID to read (unique across slides)
        :param channel_names_attr: :class:`imctools.data.AcquisitionData` attribute from which the channel names will be
            taken, e.g. ``'channel_labels'``
        :return: a new :class:`MultichannelImage` instance
        """
        path = Path(path)
        parser = McdParser(path)
        acquisition_data = parser.get_acquisition_data(acquisition_id)
        img_data = xr.DataArray(data=acquisition_data.image_data,
                                dims=('c', 'y', 'x'),
                                name=path.name)
        return MultichannelImage(img_data,
                                 channel_names=getattr(acquisition_data,
                                                       channel_names_attr))
Пример #6
0
 def test_read_invalid_suffix(self):
     with pytest.raises(FileNotFoundError):
         McdParser('file.unsupported_suffix')
Пример #7
0
def mcd2ome(mcdfile: File,
            export: File,
            min_height: int = 10,
            min_width: int = 10,
            metadata: bool = False,
            slide: bool = False,
            channels: Optional[List[str]] = None,
            verbose: bool = False
            ):
    """Convert `.mcd` to `.ome.tiff` file

    Args:
        mcdfile:
        export:
        min_height:
        min_width:
        metadata:
        slide:
    """
    mcdfile = Path(mcdfile)
    export = Path(export)

    mcd_parser = McdParser(mcdfile)
    session = mcd_parser.session

    if export.is_file():
        raise NotADirectoryError(f"Cannot export to {export}, not a directory.")
    create_folder(export)

    mcd_xml = mcd_parser.get_mcd_xml()
    if metadata:
        meta_folder = create_folder(export / 'metadata')
        # Save XML metadata if available
        if mcd_xml is not None:
            with open(meta_folder / (session.metaname + "_schema.xml"), "wt") as f:
                f.write(mcd_xml)
        # Save session data in json
        session.save(meta_folder / (session.metaname + "_session.json"))

    if slide:
        slide_folder = create_folder(export / 'slide')
        for key in session.slides.keys():
            mcd_parser.save_slide_image(key, slide_folder)
        for key in session.panoramas.keys():
            mcd_parser.save_panorama_image(key, slide_folder)

    # Save acquisition images in OME-TIFF format
    for acquisition in session.acquisitions.values():
        if not ((acquisition.max_x < min_width) | (acquisition.max_y < min_height)):
            acquisition_data = mcd_parser.get_acquisition_data(acquisition.id)
            if acquisition_data.is_valid:
                # Calculate channels intensity range
                valid_channels = []
                for ch in acquisition.channels.values():
                    img = acquisition_data.get_image_by_name(ch.name)

                    if img is not None:
                        valid_channels.append(ch.name)
                #         ch.min_intensity = round(float(img.min()), 4)
                #         ch.max_intensity = round(float(img.max()), 4)

                export_name = f"{session.name}_slide{acquisition.slide.id}_ROI{acquisition.id}"
                export_name = export_name.replace(" ", "_")
                export_name = export_name.replace(".", "_")
                export_name += ".ome.tiff"
                if verbose:
                    print(f"{len(valid_channels)} channels in {export_name}", ", ".join(valid_channels))

                acquisition_data.save_ome_tiff(
                    export / export_name,
                    xml_metadata=mcd_xml,
                    names=channels,
                )
Пример #8
0
def inspect_mcd(mcd_file: Path, args: Args) -> Tuple[DataFrame, DataFrame]:
    cols = [
        "Target",
        "Metal_Tag",
        "Atom",
        "full",
        "ilastik",
    ]
    exclude_channels = ["EMPTY", "190BCKG", "80Ar", "89Y", "127I", "124Xe"]

    print(f"Started analyzing '{mcd_file}'!")

    mcd = McdParser(mcd_file)
    session = mcd.session

    # get channel labels
    ac_ids = session.acquisition_ids
    labels = pd.DataFrame({
        ac_id:
        cleanup_channel_names(session.acquisitions[ac_id].channel_labels)
        for ac_id in ac_ids
    })
    metals = pd.DataFrame(
        {ac_id: session.acquisitions[ac_id].channel_names
         for ac_id in ac_ids})
    channel_names = labels.replace({None: "<EMPTY>"}) + "(" + metals + ")"

    same_channels = bool(channel_names.nunique(1).replace(
        0, 1).all())  # np.bool is not serializable

    if same_channels:
        print("\t * All ROIs have the same markers/metals.")
        ch = channel_names.iloc[:, 0].rename("channel")
        ids = ch.str.extract(r"(?P<Target>.*)\((?P<Metal_Tag>.*)\)")
        ids.index = ch

        annot = pd.DataFrame(ids, columns=cols)
        annot["Atom"] = annot["Metal_Tag"].str.extract(r"(\d+)")[0]
        annot["full"] = (
            ~annot.index.str.contains("|".join(exclude_channels))).astype(int)
        annot["ilastik"] = (annot.index.str.contains("DNA")
                            | annot.index.str.startswith("CD")).astype(int)
        if not args.no_write:
            annot.to_csv(mcd_file.replace_(".mcd", ".channel_labels.csv"))
    else:
        annot = pd.DataFrame(columns=cols)
        print("\t * ROIs have different markers/metals.")

    # Save some metadata
    meta = session.get_csv_dict()
    meta["n_slides"] = len(session.slides)
    print(f"\t * Contains {meta['n_slides']} slides.")
    meta["n_panoramas"] = len(session.panoramas)
    print(f"\t * Contains {meta['n_panoramas']} panoramas.")
    meta["n_ROIs"] = len(session.acquisition_ids)
    print(f"\t * Contains {meta['n_ROIs']} ROIs.")
    meta["ROI_numbers"] = session.acquisition_ids
    meta["all_ROIs_same_channels"] = same_channels
    meta["consensus_channels"] = (channel_names.iloc[:, 0].to_dict()
                                  if same_channels else None)
    meta["panoramas"] = {
        p: v.get_csv_dict()
        for p, v in session.panoramas.items()
    }
    meta["acquisitions"] = {
        a: ac.get_csv_dict()
        for a, ac in session.acquisitions.items()
    }
    meta.update(session.metadata)
    if not args.no_write:
        yaml.dump(
            encode(meta),
            open(mcd_file.replace_(".mcd", ".session_metadata.yaml"), "w"),
            indent=4,
            default_flow_style=False,
            sort_keys=False,
        )

    mcd.close()
    print(f"Finished with '{mcd_file}'!")
    return meta, annot
Пример #9
0
 def __enter__(self) -> 'FileReaderBase':
     self._mcd_parser = McdParser(self._path)
     return self
Пример #10
0
        if create_zip:
            with zipfile.ZipFile(
                    self.root_output_folder /
                (self.folder_name + IMC_ZIP_SUFFIX),
                    "w",
                    compression=zipfile.ZIP_DEFLATED,
                    allowZip64=True,
            ) as imc_zip:
                for root, d, files in os.walk(str(output_folder)):
                    for fn in files:
                        imc_zip.write(os.path.join(root, fn), fn)
                        if remove_folder:
                            os.remove(os.path.join(root, fn))

        if remove_folder:
            os.removedirs(output_folder)


if __name__ == "__main__":
    import timeit

    tic = timeit.default_timer()

    with McdParser(
            "/home/anton/Documents/IMC Workshop 2019/Data/iMC_workshop_2019/20190919_FluidigmBrCa_SE/20190919_FluidigmBrCa_SE.mcd"
    ) as parser:
        imc_writer = ImcWriter("/home/anton/Downloads/imc_from_mcd", parser)
        imc_writer.write_imc_folder()

    print(timeit.default_timer() - tic)
def mcdfolder_to_imcfolder(input: Union[str, Path],
                           output_folder: Union[str, Path],
                           create_zip: bool = False,
                           parse_txt: bool = False):
    """Converts folder (or zipped folder) containing raw acquisition data (mcd and txt files) to IMC folder containing standardized files.

    Parameters
    ----------
    input
        Input folder / .zip file with  raw .mcd/.txt acquisition data files.
    output_folder
        Path to the output folder.
    create_zip
        Whether to create an output as .zip file.
    parse_txt
        Always use TXT files if present to get acquisition image data.
    """
    if isinstance(input, str):
        input = Path(input)
    tmpdir = None
    if input.is_file() and input.suffix == ZIP_FILENDING:
        tmpdir = TemporaryDirectory()
        with zipfile.ZipFile(input, allowZip64=True) as zip:
            zip.extractall(tmpdir.name)
        input_folder = Path(tmpdir.name)
    else:
        input_folder = input

    mcd_parser = None
    try:
        mcd_files = list(input_folder.rglob(f"*{MCD_FILENDING}"))
        mcd_files = [f for f in mcd_files if not f.name.startswith(".")]
        assert len(mcd_files) == 1
        input_folder = mcd_files[0].parent
        schema_files = glob.glob(str(input_folder / f"*{SCHEMA_FILENDING}"))
        schema_file = schema_files[0] if len(schema_files) > 0 else None
        try:
            mcd_parser = McdParser(mcd_files[0])
        except:
            if schema_file is not None:
                logging.error(
                    "MCD file is corrupted, trying to rescue with schema file")
                mcd_parser = McdParser(mcd_files[0],
                                       xml_metadata_filepath=schema_file)
            else:
                raise

        txt_files = glob.glob(str(input_folder /
                                  f"*[0-9]{TXT_FILE_EXTENSION}"))
        txt_acquisitions_map = {
            TxtParser.extract_acquisition_id(f): f
            for f in txt_files
        }

        imc_writer = ImcWriter(output_folder, mcd_parser, txt_acquisitions_map,
                               parse_txt)
        imc_writer.write_imc_folder(create_zip=create_zip)
    finally:
        if mcd_parser is not None:
            mcd_parser.close()
        if tmpdir is not None:
            tmpdir.cleanup()
Пример #12
0
 def test_read_imc_mcd(self, raw_path: Path):
     mcd_file_path = raw_path / '20210305_NE_mockData1' / '20210305_NE_mockData1.mcd'
     mcd_parser = McdParser(mcd_file_path)
     xml = mcd_parser.get_mcd_xml()
     mcd_xml_parser = McdXmlParser(xml, str(mcd_file_path))
     assert mcd_xml_parser.session.name == "20210305_NE_mockData1"
Пример #13
0
Файл: utils.py Проект: bzrry/imc
def mcd_to_dir(
    mcd_file: Path,
    pannel_csv: Path = None,
    ilastik_output: bool = True,
    ilastik_channels: List[str] = None,
    output_dir: Path = None,
    output_format: str = "tiff",
    overwrite: bool = False,
    sample_name: str = None,
    partition_panels: bool = False,
    filter_full: bool = True,
    export_panoramas: bool = True,
    keep_original_roi_names: bool = False,
    allow_empty_rois: bool = True,
    only_crops: bool = False,
    n_crops: int = 5,
    crop_width: int = 500,
    crop_height: int = 500,
) -> None:
    def get_dataframe_from_channels(mcd):
        return pd.DataFrame(
            [mcd.get_acquisition_channels(x) for x in session.acquisition_ids],
            index=session.acquisition_ids,
        )

    def all_channels_equal(mcd):
        chs = get_dataframe_from_channels(mcd)
        return all([(chs[c].value_counts() == mcd.n_acquisitions).all()
                    for c in chs.columns])

    def get_panel_partitions(mcd):
        chs = get_dataframe_from_channels(mcd)

        partitions = {k: set(k) for k in chs.drop_duplicates().index}
        for p in partitions:
            for _, row in chs.iterrows():
                print(p, row.name)
                if (row == chs.loc[list(partitions[p])[0]]).all():
                    partitions[p] = partitions[p].union(set([row.name]))
        return partitions.values()

    def clip_hot_pixels(img, hp_filter_shape=(3, 3), hp_threshold=0.0001):
        if hp_filter_shape[0] % 2 != 1 or hp_filter_shape[1] % 2 != 1:
            raise ValueError("Invalid hot pixel filter shape: %s" %
                             str(hp_filter_shape))
        hp_filter_footprint = np.ones(hp_filter_shape)
        hp_filter_footprint[int(hp_filter_shape[0] / 2),
                            int(hp_filter_shape[1] / 2)] = 0
        max_img = ndi.maximum_filter(img,
                                     footprint=hp_filter_footprint,
                                     mode="reflect")
        hp_mask = img - max_img > hp_threshold
        img = img.copy()
        img[hp_mask] = max_img[hp_mask]
        return img

    if partition_panels:
        raise NotImplementedError(
            "Partitioning sample per panel is not implemented yet.")

    if pannel_csv is None and ilastik_channels is None:
        raise ValueError(
            "One of `pannel_csv` or `ilastik_channels` must be given!")
    if ilastik_channels is None and pannel_csv is not None:
        panel = pd.read_csv(pannel_csv, index_col=0)
        ilastik_channels = panel.query("ilastik == 1").index.tolist()

    H5_YXC_AXISTAG = json.dumps({
        "axes": [
            {
                "key": "y",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "x",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "c",
                "typeFlags": 1,
                "resolution": 0,
                "description": "",
            },
        ]
    })

    if output_dir is None:
        output_dir = mcd_file.parent / "imc_dir"
    output_dir.mkdir(exist_ok=True, parents=True)
    dirs = ["tiffs"] + (["ilastik"] if ilastik_output else [])
    for _dir in dirs:
        (output_dir / _dir).mkdir(exist_ok=True)

    # Export panoramas
    if export_panoramas:
        get_panorama_images(
            mcd_file,
            output_file_prefix=output_dir / "Panorama",
            overwrite=overwrite,
        )

    # Parse MCD
    mcd = McdParser(mcd_file)
    session = mcd.session

    if sample_name is None:
        sample_name = session.name

    for i, ac_id in enumerate(session.acquisition_ids):
        print(ac_id, end="\t")
        try:
            ac = mcd.get_acquisition_data(ac_id)
        except Exception as e:  # imctools.io.abstractparserbase.AcquisitionError
            if allow_empty_rois:
                print(e)
                continue
            raise e

        # Get output prefix
        if keep_original_roi_names:
            prefix = (output_dir / "tiffs" /
                      (session.name.replace(" ", "_") + "_ac"))
        else:
            prefix = (output_dir / "tiffs" /
                      (sample_name + "-" + str(i + 1).zfill(2)))

        # Skip if not overwrite
        file_ending = "ome.tiff" if output_format == "ome-tiff" else "tiff"
        if (prefix + "_full." + file_ending).exists() and not overwrite:
            print(
                "TIFF images exist and overwrite is set to `False`. Continuing."
            )
            continue

        # Filter channels
        channel_labels = build_channel_name(ac.channel_labels,
                                            ac.channel_names)
        to_exp = channel_labels[channel_labels.isin(ilastik_channels)]
        to_exp_ind = [
            ac.channel_masses.index(y)
            for y in to_exp.str.extract(r".*\(..(\d+)\)")[0]
        ]
        assert to_exp_ind == to_exp.index.tolist()

        if filter_full:
            # remove background and empty channels
            # TODO: find way to do this more systematically
            channel_labels = channel_labels[~(
                channel_labels.str.contains(r"^\d")
                | channel_labels.str.contains("<EMPTY>"))].reset_index(
                    drop=True)

        # Filter hot pixels
        ac._image_data = np.asarray(
            [clip_hot_pixels(x) for x in ac.image_data])

        # Save full image
        if not only_crops:
            p = prefix + "_full."
            if output_format == "tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                    )
            elif output_format == "ome-tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_ome_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                        xml_metadata=mcd.get_mcd_xml(),
                    )
        # Save channel labels for the stack
        if (overwrite) or not (p + "csv").exists():
            channel_labels.to_csv(p + "csv")

        if not ilastik_output:
            continue

        # Make input for ilastik training
        # # zoom 2x
        s = tuple(x * 2 for x in ac.image_data.shape[1:])
        full = np.moveaxis(
            np.asarray([resize(x, s) for x in ac.image_data[to_exp_ind]]), 0,
            -1)

        # # Save input for ilastik prediction
        with h5py.File(prefix + "_ilastik_s2.h5", mode="w") as handle:
            d = handle.create_dataset("stacked_channels", data=full)
            d.attrs["axistags"] = H5_YXC_AXISTAG

        # # random crops
        iprefix = (output_dir / "ilastik" /
                   (sample_name.replace(" ", "_") + "_ac"))
        # # # make sure height/width are smaller or equal to acquisition dimensions
        if (full.shape[1] < crop_width) or (full.shape[0] < crop_height):
            msg = "Image is smaller than the requested crop size for ilastik training."
            print(msg)
            continue
        for _ in range(n_crops):
            x = np.random.choice(range(s[0] - crop_width))
            y = np.random.choice(range(s[1] - crop_height))
            crop = full[x:(x + crop_width), y:(y + crop_height), :]
            assert crop.shape == (crop_width, crop_height, len(to_exp))
            with h5py.File(
                    iprefix +
                    f"_ilastik_x{x}_y{y}_w{crop_width}_h{crop_height}.h5",
                    mode="w",
            ) as handle:
                d = handle.create_dataset("stacked_channels", data=crop)
                d.attrs["axistags"] = H5_YXC_AXISTAG

    print("")  # add a newline to the tabs
    mcd.close()