Python McdParser.close示例，imctools.io.mcd.mcdparser.McdParser.close Python示例

示例#1

0

显示文件

文件： utils.py 项目： bzrry/imc

def get_panorama_images(mcd_file: Path,
                        output_file_prefix: Path = None,
                        overwrite: bool = False) -> Optional[List[Array]]:
    import imageio

    byteoffset = 161

    mcd = McdParser(mcd_file)

    imgs = list()
    for slide in mcd.session.metadata["Panorama"]:
        start, end = (
            int(slide["ImageStartOffset"]),
            int(slide["ImageEndOffset"]),
        )
        img = mcd._get_buffer(start + byteoffset, end + byteoffset)
        if len(img) == 0:  # empty image
            continue
        if output_file_prefix is not None:
            output_file = output_file_prefix + f"_{slide['ID']}.png"
            if overwrite or (not output_file.exists()):
                with open(output_file, "wb") as f:
                    f.write(img)
        else:
            try:
                imgs.append(imageio.imread(img))
            except ValueError:
                continue
    mcd.close()
    if output_file_prefix is None:
        return imgs
    else:
        return None

示例#2

0

显示文件

文件： mcd.py 项目： BodenmillerGroup/napari-imc

class McdFileReader(FileReaderBase):
    def __init__(self, path: Union[str, Path]):
        super(McdFileReader, self).__init__(path)
        self._mcd_parser: Optional[McdParser] = None

    def _get_imc_file_panoramas(self, imc_file: IMCFileModel) -> List[IMCFilePanoramaModel]:
        return [
            IMCFilePanoramaModel(imc_file, panorama.id, panorama.image_type, panorama.description)
            for panorama in self._mcd_parser.session.panoramas.values() if panorama.image_type != 'Default'
        ]

    def _get_imc_file_acquisitions(self, imc_file: IMCFileModel) -> List[IMCFileAcquisitionModel]:
        return [
            IMCFileAcquisitionModel(imc_file, acquisition.id, acquisition.description, acquisition.channel_labels)
            for acquisition in self._mcd_parser.session.acquisitions.values() if acquisition.is_valid
        ]

    def read_panorama(self, panorama_id: int) -> Tuple[ImageDimensions, np.ndarray]:
        panorama = self._mcd_parser.session.panoramas[panorama_id]
        xs_physical = [panorama.x1, panorama.x2, panorama.x3, panorama.x4]
        ys_physical = [panorama.y1, panorama.y2, panorama.y3, panorama.y4]
        x_physical, y_physical = min(xs_physical), min(ys_physical)
        w_physical, h_physical = max(xs_physical) - x_physical, max(ys_physical) - y_physical
        data = imread(self._mcd_parser.get_panorama_image(panorama_id))
        if x_physical != panorama.x1:
            data = data[:, ::-1, :]
        if y_physical != panorama.y1:
            data = data[::-1, :, :]
        return (x_physical, y_physical, w_physical, h_physical), data

    def read_acquisition(self, acquisition_id: int, channel_label: str) -> Tuple[ImageDimensions, np.ndarray]:
        acquisition = self._mcd_parser.session.acquisitions[acquisition_id]
        xs_physical = [acquisition.roi_start_x_pos_um, acquisition.roi_end_x_pos_um]
        ys_physical = [acquisition.roi_start_y_pos_um, acquisition.roi_end_y_pos_um]
        x_physical, y_physical = min(xs_physical), min(ys_physical)
        w_physical, h_physical = max(xs_physical) - x_physical, max(ys_physical) - y_physical
        data = self._mcd_parser.get_acquisition_data(acquisition.id).get_image_by_label(channel_label)
        if x_physical != acquisition.roi_start_x_pos_um:
            data = data[:, ::-1]
        if y_physical != acquisition.roi_start_y_pos_um:
            data = data[::-1, :]
        return (x_physical, y_physical, w_physical, h_physical), data

    def __enter__(self) -> 'FileReaderBase':
        self._mcd_parser = McdParser(self._path)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._mcd_parser.close()

    @classmethod
    def accepts(cls, path: Union[str, Path]) -> bool:
        return Path(path).suffix.lower() == '.mcd'

示例#3

0

显示文件

文件： inspect_mcds.py 项目： bzrry/imc

def inspect_mcd(mcd_file: Path, args: Args) -> Tuple[DataFrame, DataFrame]:
    cols = [
        "Target",
        "Metal_Tag",
        "Atom",
        "full",
        "ilastik",
    ]
    exclude_channels = ["EMPTY", "190BCKG", "80Ar", "89Y", "127I", "124Xe"]

    print(f"Started analyzing '{mcd_file}'!")

    mcd = McdParser(mcd_file)
    session = mcd.session

    # get channel labels
    ac_ids = session.acquisition_ids
    labels = pd.DataFrame({
        ac_id:
        cleanup_channel_names(session.acquisitions[ac_id].channel_labels)
        for ac_id in ac_ids
    })
    metals = pd.DataFrame(
        {ac_id: session.acquisitions[ac_id].channel_names
         for ac_id in ac_ids})
    channel_names = labels.replace({None: "<EMPTY>"}) + "(" + metals + ")"

    same_channels = bool(channel_names.nunique(1).replace(
        0, 1).all())  # np.bool is not serializable

    if same_channels:
        print("\t * All ROIs have the same markers/metals.")
        ch = channel_names.iloc[:, 0].rename("channel")
        ids = ch.str.extract(r"(?P<Target>.*)\((?P<Metal_Tag>.*)\)")
        ids.index = ch

        annot = pd.DataFrame(ids, columns=cols)
        annot["Atom"] = annot["Metal_Tag"].str.extract(r"(\d+)")[0]
        annot["full"] = (
            ~annot.index.str.contains("|".join(exclude_channels))).astype(int)
        annot["ilastik"] = (annot.index.str.contains("DNA")
                            | annot.index.str.startswith("CD")).astype(int)
        if not args.no_write:
            annot.to_csv(mcd_file.replace_(".mcd", ".channel_labels.csv"))
    else:
        annot = pd.DataFrame(columns=cols)
        print("\t * ROIs have different markers/metals.")

    # Save some metadata
    meta = session.get_csv_dict()
    meta["n_slides"] = len(session.slides)
    print(f"\t * Contains {meta['n_slides']} slides.")
    meta["n_panoramas"] = len(session.panoramas)
    print(f"\t * Contains {meta['n_panoramas']} panoramas.")
    meta["n_ROIs"] = len(session.acquisition_ids)
    print(f"\t * Contains {meta['n_ROIs']} ROIs.")
    meta["ROI_numbers"] = session.acquisition_ids
    meta["all_ROIs_same_channels"] = same_channels
    meta["consensus_channels"] = (channel_names.iloc[:, 0].to_dict()
                                  if same_channels else None)
    meta["panoramas"] = {
        p: v.get_csv_dict()
        for p, v in session.panoramas.items()
    }
    meta["acquisitions"] = {
        a: ac.get_csv_dict()
        for a, ac in session.acquisitions.items()
    }
    meta.update(session.metadata)
    if not args.no_write:
        yaml.dump(
            encode(meta),
            open(mcd_file.replace_(".mcd", ".session_metadata.yaml"), "w"),
            indent=4,
            default_flow_style=False,
            sort_keys=False,
        )

    mcd.close()
    print(f"Finished with '{mcd_file}'!")
    return meta, annot

示例#4

0

显示文件

文件： mcdfolder2imcfolder.py 项目： BodenmillerGroup/imctools

def mcdfolder_to_imcfolder(input: Union[str, Path],
                           output_folder: Union[str, Path],
                           create_zip: bool = False,
                           parse_txt: bool = False):
    """Converts folder (or zipped folder) containing raw acquisition data (mcd and txt files) to IMC folder containing standardized files.

    Parameters
    ----------
    input
        Input folder / .zip file with  raw .mcd/.txt acquisition data files.
    output_folder
        Path to the output folder.
    create_zip
        Whether to create an output as .zip file.
    parse_txt
        Always use TXT files if present to get acquisition image data.
    """
    if isinstance(input, str):
        input = Path(input)
    tmpdir = None
    if input.is_file() and input.suffix == ZIP_FILENDING:
        tmpdir = TemporaryDirectory()
        with zipfile.ZipFile(input, allowZip64=True) as zip:
            zip.extractall(tmpdir.name)
        input_folder = Path(tmpdir.name)
    else:
        input_folder = input

    mcd_parser = None
    try:
        mcd_files = list(input_folder.rglob(f"*{MCD_FILENDING}"))
        mcd_files = [f for f in mcd_files if not f.name.startswith(".")]
        assert len(mcd_files) == 1
        input_folder = mcd_files[0].parent
        schema_files = glob.glob(str(input_folder / f"*{SCHEMA_FILENDING}"))
        schema_file = schema_files[0] if len(schema_files) > 0 else None
        try:
            mcd_parser = McdParser(mcd_files[0])
        except:
            if schema_file is not None:
                logging.error(
                    "MCD file is corrupted, trying to rescue with schema file")
                mcd_parser = McdParser(mcd_files[0],
                                       xml_metadata_filepath=schema_file)
            else:
                raise

        txt_files = glob.glob(str(input_folder /
                                  f"*[0-9]{TXT_FILE_EXTENSION}"))
        txt_acquisitions_map = {
            TxtParser.extract_acquisition_id(f): f
            for f in txt_files
        }

        imc_writer = ImcWriter(output_folder, mcd_parser, txt_acquisitions_map,
                               parse_txt)
        imc_writer.write_imc_folder(create_zip=create_zip)
    finally:
        if mcd_parser is not None:
            mcd_parser.close()
        if tmpdir is not None:
            tmpdir.cleanup()

示例#5

0

显示文件

文件： utils.py 项目： bzrry/imc

def mcd_to_dir(
    mcd_file: Path,
    pannel_csv: Path = None,
    ilastik_output: bool = True,
    ilastik_channels: List[str] = None,
    output_dir: Path = None,
    output_format: str = "tiff",
    overwrite: bool = False,
    sample_name: str = None,
    partition_panels: bool = False,
    filter_full: bool = True,
    export_panoramas: bool = True,
    keep_original_roi_names: bool = False,
    allow_empty_rois: bool = True,
    only_crops: bool = False,
    n_crops: int = 5,
    crop_width: int = 500,
    crop_height: int = 500,
) -> None:
    def get_dataframe_from_channels(mcd):
        return pd.DataFrame(
            [mcd.get_acquisition_channels(x) for x in session.acquisition_ids],
            index=session.acquisition_ids,
        )

    def all_channels_equal(mcd):
        chs = get_dataframe_from_channels(mcd)
        return all([(chs[c].value_counts() == mcd.n_acquisitions).all()
                    for c in chs.columns])

    def get_panel_partitions(mcd):
        chs = get_dataframe_from_channels(mcd)

        partitions = {k: set(k) for k in chs.drop_duplicates().index}
        for p in partitions:
            for _, row in chs.iterrows():
                print(p, row.name)
                if (row == chs.loc[list(partitions[p])[0]]).all():
                    partitions[p] = partitions[p].union(set([row.name]))
        return partitions.values()

    def clip_hot_pixels(img, hp_filter_shape=(3, 3), hp_threshold=0.0001):
        if hp_filter_shape[0] % 2 != 1 or hp_filter_shape[1] % 2 != 1:
            raise ValueError("Invalid hot pixel filter shape: %s" %
                             str(hp_filter_shape))
        hp_filter_footprint = np.ones(hp_filter_shape)
        hp_filter_footprint[int(hp_filter_shape[0] / 2),
                            int(hp_filter_shape[1] / 2)] = 0
        max_img = ndi.maximum_filter(img,
                                     footprint=hp_filter_footprint,
                                     mode="reflect")
        hp_mask = img - max_img > hp_threshold
        img = img.copy()
        img[hp_mask] = max_img[hp_mask]
        return img

    if partition_panels:
        raise NotImplementedError(
            "Partitioning sample per panel is not implemented yet.")

    if pannel_csv is None and ilastik_channels is None:
        raise ValueError(
            "One of `pannel_csv` or `ilastik_channels` must be given!")
    if ilastik_channels is None and pannel_csv is not None:
        panel = pd.read_csv(pannel_csv, index_col=0)
        ilastik_channels = panel.query("ilastik == 1").index.tolist()

    H5_YXC_AXISTAG = json.dumps({
        "axes": [
            {
                "key": "y",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "x",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "c",
                "typeFlags": 1,
                "resolution": 0,
                "description": "",
            },
        ]
    })

    if output_dir is None:
        output_dir = mcd_file.parent / "imc_dir"
    output_dir.mkdir(exist_ok=True, parents=True)
    dirs = ["tiffs"] + (["ilastik"] if ilastik_output else [])
    for _dir in dirs:
        (output_dir / _dir).mkdir(exist_ok=True)

    # Export panoramas
    if export_panoramas:
        get_panorama_images(
            mcd_file,
            output_file_prefix=output_dir / "Panorama",
            overwrite=overwrite,
        )

    # Parse MCD
    mcd = McdParser(mcd_file)
    session = mcd.session

    if sample_name is None:
        sample_name = session.name

    for i, ac_id in enumerate(session.acquisition_ids):
        print(ac_id, end="\t")
        try:
            ac = mcd.get_acquisition_data(ac_id)
        except Exception as e:  # imctools.io.abstractparserbase.AcquisitionError
            if allow_empty_rois:
                print(e)
                continue
            raise e

        # Get output prefix
        if keep_original_roi_names:
            prefix = (output_dir / "tiffs" /
                      (session.name.replace(" ", "_") + "_ac"))
        else:
            prefix = (output_dir / "tiffs" /
                      (sample_name + "-" + str(i + 1).zfill(2)))

        # Skip if not overwrite
        file_ending = "ome.tiff" if output_format == "ome-tiff" else "tiff"
        if (prefix + "_full." + file_ending).exists() and not overwrite:
            print(
                "TIFF images exist and overwrite is set to `False`. Continuing."
            )
            continue

        # Filter channels
        channel_labels = build_channel_name(ac.channel_labels,
                                            ac.channel_names)
        to_exp = channel_labels[channel_labels.isin(ilastik_channels)]
        to_exp_ind = [
            ac.channel_masses.index(y)
            for y in to_exp.str.extract(r".*\(..(\d+)\)")[0]
        ]
        assert to_exp_ind == to_exp.index.tolist()

        if filter_full:
            # remove background and empty channels
            # TODO: find way to do this more systematically
            channel_labels = channel_labels[~(
                channel_labels.str.contains(r"^\d")
                | channel_labels.str.contains("<EMPTY>"))].reset_index(
                    drop=True)

        # Filter hot pixels
        ac._image_data = np.asarray(
            [clip_hot_pixels(x) for x in ac.image_data])

        # Save full image
        if not only_crops:
            p = prefix + "_full."
            if output_format == "tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                    )
            elif output_format == "ome-tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_ome_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                        xml_metadata=mcd.get_mcd_xml(),
                    )
        # Save channel labels for the stack
        if (overwrite) or not (p + "csv").exists():
            channel_labels.to_csv(p + "csv")

        if not ilastik_output:
            continue

        # Make input for ilastik training
        # # zoom 2x
        s = tuple(x * 2 for x in ac.image_data.shape[1:])
        full = np.moveaxis(
            np.asarray([resize(x, s) for x in ac.image_data[to_exp_ind]]), 0,
            -1)

        # # Save input for ilastik prediction
        with h5py.File(prefix + "_ilastik_s2.h5", mode="w") as handle:
            d = handle.create_dataset("stacked_channels", data=full)
            d.attrs["axistags"] = H5_YXC_AXISTAG

        # # random crops
        iprefix = (output_dir / "ilastik" /
                   (sample_name.replace(" ", "_") + "_ac"))
        # # # make sure height/width are smaller or equal to acquisition dimensions
        if (full.shape[1] < crop_width) or (full.shape[0] < crop_height):
            msg = "Image is smaller than the requested crop size for ilastik training."
            print(msg)
            continue
        for _ in range(n_crops):
            x = np.random.choice(range(s[0] - crop_width))
            y = np.random.choice(range(s[1] - crop_height))
            crop = full[x:(x + crop_width), y:(y + crop_height), :]
            assert crop.shape == (crop_width, crop_height, len(to_exp))
            with h5py.File(
                    iprefix +
                    f"_ilastik_x{x}_y{y}_w{crop_width}_h{crop_height}.h5",
                    mode="w",
            ) as handle:
                d = handle.create_dataset("stacked_channels", data=crop)
                d.attrs["axistags"] = H5_YXC_AXISTAG

    print("")  # add a newline to the tabs
    mcd.close()