def test_read_imc_mcd(self, raw_path: Path): mcd_file_path = raw_path / '20210305_NE_mockData1' / '20210305_NE_mockData1.mcd' parser = McdParser(mcd_file_path) ac_data = parser.get_acquisition_data(1) assert parser.origin == "mcd" assert ac_data.is_valid is True assert ac_data.image_data.shape == (5, 60, 60) assert ac_data.n_channels == 5 assert ac_data.channel_names == [ 'Ag107', 'Pr141', 'Sm147', 'Eu153', 'Yb172' ] assert ac_data.channel_labels == [ '107Ag', 'Cytoker_651((3356))Pr141', 'Laminin_681((851))Sm147', 'YBX1_2987((3532))Eu153', 'H3K27Ac_1977((2242))Yb172' ] assert ac_data.channel_masses == ['107', '141', '147', '153', '172']
def read_imc_mcd( path: Union[str, Path], acquisition_id: int, channel_names_attr: str = 'channel_names') -> 'MultichannelImage': """Creates a new :class:`MultichannelImage` from the specified Fluidigm(TM) MCD file Uses :class:`imctools.io.txt.mcdparser.McdParser` for reading .mcd files. :param path: path to the .mcd file :param acquisition_id: acquisition ID to read (unique across slides) :param channel_names_attr: :class:`imctools.data.AcquisitionData` attribute from which the channel names will be taken, e.g. ``'channel_labels'`` :return: a new :class:`MultichannelImage` instance """ path = Path(path) parser = McdParser(path) acquisition_data = parser.get_acquisition_data(acquisition_id) img_data = xr.DataArray(data=acquisition_data.image_data, dims=('c', 'y', 'x'), name=path.name) return MultichannelImage(img_data, channel_names=getattr(acquisition_data, channel_names_attr))
def mcd2ome(mcdfile: File, export: File, min_height: int = 10, min_width: int = 10, metadata: bool = False, slide: bool = False, channels: Optional[List[str]] = None, verbose: bool = False ): """Convert `.mcd` to `.ome.tiff` file Args: mcdfile: export: min_height: min_width: metadata: slide: """ mcdfile = Path(mcdfile) export = Path(export) mcd_parser = McdParser(mcdfile) session = mcd_parser.session if export.is_file(): raise NotADirectoryError(f"Cannot export to {export}, not a directory.") create_folder(export) mcd_xml = mcd_parser.get_mcd_xml() if metadata: meta_folder = create_folder(export / 'metadata') # Save XML metadata if available if mcd_xml is not None: with open(meta_folder / (session.metaname + "_schema.xml"), "wt") as f: f.write(mcd_xml) # Save session data in json session.save(meta_folder / (session.metaname + "_session.json")) if slide: slide_folder = create_folder(export / 'slide') for key in session.slides.keys(): mcd_parser.save_slide_image(key, slide_folder) for key in session.panoramas.keys(): mcd_parser.save_panorama_image(key, slide_folder) # Save acquisition images in OME-TIFF format for acquisition in session.acquisitions.values(): if not ((acquisition.max_x < min_width) | (acquisition.max_y < min_height)): acquisition_data = mcd_parser.get_acquisition_data(acquisition.id) if acquisition_data.is_valid: # Calculate channels intensity range valid_channels = [] for ch in acquisition.channels.values(): img = acquisition_data.get_image_by_name(ch.name) if img is not None: valid_channels.append(ch.name) # ch.min_intensity = round(float(img.min()), 4) # ch.max_intensity = round(float(img.max()), 4) export_name = f"{session.name}_slide{acquisition.slide.id}_ROI{acquisition.id}" export_name = export_name.replace(" ", "_") export_name = export_name.replace(".", "_") export_name += ".ome.tiff" if verbose: print(f"{len(valid_channels)} channels in {export_name}", ", ".join(valid_channels)) acquisition_data.save_ome_tiff( export / export_name, xml_metadata=mcd_xml, names=channels, )
def mcd_to_dir( mcd_file: Path, pannel_csv: Path = None, ilastik_output: bool = True, ilastik_channels: List[str] = None, output_dir: Path = None, output_format: str = "tiff", overwrite: bool = False, sample_name: str = None, partition_panels: bool = False, filter_full: bool = True, export_panoramas: bool = True, keep_original_roi_names: bool = False, allow_empty_rois: bool = True, only_crops: bool = False, n_crops: int = 5, crop_width: int = 500, crop_height: int = 500, ) -> None: def get_dataframe_from_channels(mcd): return pd.DataFrame( [mcd.get_acquisition_channels(x) for x in session.acquisition_ids], index=session.acquisition_ids, ) def all_channels_equal(mcd): chs = get_dataframe_from_channels(mcd) return all([(chs[c].value_counts() == mcd.n_acquisitions).all() for c in chs.columns]) def get_panel_partitions(mcd): chs = get_dataframe_from_channels(mcd) partitions = {k: set(k) for k in chs.drop_duplicates().index} for p in partitions: for _, row in chs.iterrows(): print(p, row.name) if (row == chs.loc[list(partitions[p])[0]]).all(): partitions[p] = partitions[p].union(set([row.name])) return partitions.values() def clip_hot_pixels(img, hp_filter_shape=(3, 3), hp_threshold=0.0001): if hp_filter_shape[0] % 2 != 1 or hp_filter_shape[1] % 2 != 1: raise ValueError("Invalid hot pixel filter shape: %s" % str(hp_filter_shape)) hp_filter_footprint = np.ones(hp_filter_shape) hp_filter_footprint[int(hp_filter_shape[0] / 2), int(hp_filter_shape[1] / 2)] = 0 max_img = ndi.maximum_filter(img, footprint=hp_filter_footprint, mode="reflect") hp_mask = img - max_img > hp_threshold img = img.copy() img[hp_mask] = max_img[hp_mask] return img if partition_panels: raise NotImplementedError( "Partitioning sample per panel is not implemented yet.") if pannel_csv is None and ilastik_channels is None: raise ValueError( "One of `pannel_csv` or `ilastik_channels` must be given!") if ilastik_channels is None and pannel_csv is not None: panel = pd.read_csv(pannel_csv, index_col=0) ilastik_channels = panel.query("ilastik == 1").index.tolist() H5_YXC_AXISTAG = json.dumps({ "axes": [ { "key": "y", "typeFlags": 2, "resolution": 0, "description": "", }, { "key": "x", "typeFlags": 2, "resolution": 0, "description": "", }, { "key": "c", "typeFlags": 1, "resolution": 0, "description": "", }, ] }) if output_dir is None: output_dir = mcd_file.parent / "imc_dir" output_dir.mkdir(exist_ok=True, parents=True) dirs = ["tiffs"] + (["ilastik"] if ilastik_output else []) for _dir in dirs: (output_dir / _dir).mkdir(exist_ok=True) # Export panoramas if export_panoramas: get_panorama_images( mcd_file, output_file_prefix=output_dir / "Panorama", overwrite=overwrite, ) # Parse MCD mcd = McdParser(mcd_file) session = mcd.session if sample_name is None: sample_name = session.name for i, ac_id in enumerate(session.acquisition_ids): print(ac_id, end="\t") try: ac = mcd.get_acquisition_data(ac_id) except Exception as e: # imctools.io.abstractparserbase.AcquisitionError if allow_empty_rois: print(e) continue raise e # Get output prefix if keep_original_roi_names: prefix = (output_dir / "tiffs" / (session.name.replace(" ", "_") + "_ac")) else: prefix = (output_dir / "tiffs" / (sample_name + "-" + str(i + 1).zfill(2))) # Skip if not overwrite file_ending = "ome.tiff" if output_format == "ome-tiff" else "tiff" if (prefix + "_full." + file_ending).exists() and not overwrite: print( "TIFF images exist and overwrite is set to `False`. Continuing." ) continue # Filter channels channel_labels = build_channel_name(ac.channel_labels, ac.channel_names) to_exp = channel_labels[channel_labels.isin(ilastik_channels)] to_exp_ind = [ ac.channel_masses.index(y) for y in to_exp.str.extract(r".*\(..(\d+)\)")[0] ] assert to_exp_ind == to_exp.index.tolist() if filter_full: # remove background and empty channels # TODO: find way to do this more systematically channel_labels = channel_labels[~( channel_labels.str.contains(r"^\d") | channel_labels.str.contains("<EMPTY>"))].reset_index( drop=True) # Filter hot pixels ac._image_data = np.asarray( [clip_hot_pixels(x) for x in ac.image_data]) # Save full image if not only_crops: p = prefix + "_full." if output_format == "tiff": if (overwrite) or not (p + file_ending).exists(): ac.save_tiff( p + file_ending, names=channel_labels.str.extract(r"\((.*)\)")[0], ) elif output_format == "ome-tiff": if (overwrite) or not (p + file_ending).exists(): ac.save_ome_tiff( p + file_ending, names=channel_labels.str.extract(r"\((.*)\)")[0], xml_metadata=mcd.get_mcd_xml(), ) # Save channel labels for the stack if (overwrite) or not (p + "csv").exists(): channel_labels.to_csv(p + "csv") if not ilastik_output: continue # Make input for ilastik training # # zoom 2x s = tuple(x * 2 for x in ac.image_data.shape[1:]) full = np.moveaxis( np.asarray([resize(x, s) for x in ac.image_data[to_exp_ind]]), 0, -1) # # Save input for ilastik prediction with h5py.File(prefix + "_ilastik_s2.h5", mode="w") as handle: d = handle.create_dataset("stacked_channels", data=full) d.attrs["axistags"] = H5_YXC_AXISTAG # # random crops iprefix = (output_dir / "ilastik" / (sample_name.replace(" ", "_") + "_ac")) # # # make sure height/width are smaller or equal to acquisition dimensions if (full.shape[1] < crop_width) or (full.shape[0] < crop_height): msg = "Image is smaller than the requested crop size for ilastik training." print(msg) continue for _ in range(n_crops): x = np.random.choice(range(s[0] - crop_width)) y = np.random.choice(range(s[1] - crop_height)) crop = full[x:(x + crop_width), y:(y + crop_height), :] assert crop.shape == (crop_width, crop_height, len(to_exp)) with h5py.File( iprefix + f"_ilastik_x{x}_y{y}_w{crop_width}_h{crop_height}.h5", mode="w", ) as handle: d = handle.create_dataset("stacked_channels", data=crop) d.attrs["axistags"] = H5_YXC_AXISTAG print("") # add a newline to the tabs mcd.close()