Пример #1
0
 def check_image() -> bool:
     try:
         # check if exists
         out = (subprocess.check_output(
             "docker images".split(" ")).decode().strip())
         for line in out.split("\n")[1:]:
             if line.split(" ")[0] == DOCKER_IMAGE:
                 return True
     except FileNotFoundError:
         log.error("Docker installation not detected.")
         raise
     except IndexError:
         pass
     return False
Пример #2
0
def run_shell_command(cmd) -> int:
    """
    Run a system command.

    Will detect whether a separate shell is required.
    """
    # in case the command has unix pipes or bash builtins,
    # the subprocess call must have its own shell
    # this should only occur if cellprofiler is being run uncontainerized
    # and needs a command to be called prior such as conda activate, etc
    symbol = any([x in cmd for x in ["&", "&&", "|"]])
    source = cmd.startswith("source")
    shell = bool(symbol or source)
    log.debug(
        "Running command%s:\n%s",
        " in shell" if shell else "",
        textwrap.dedent(cmd) + "\n",
    )
    c = re.findall(r"\S+", cmd.replace("\\\n", ""))
    if not cfg.args.dry_run:
        if shell:
            log.debug("Running command in shell.")
            code = subprocess.call(cmd, shell=shell)
        else:
            code = subprocess.call(c, shell=shell)
        if code != 0:
            log.error(
                "Process for command below failed with error:\n'%s'\nTerminating pipeline.\n",
                textwrap.dedent(cmd),
            )
            sys.exit(code)
        if not shell:
            usage = resource.getrusage(resource.RUSAGE_SELF)
            log.debug("Maximum used memory so far: {:.2f}Gb".format(
                usage.ru_maxrss / 1e6))
    return code
Пример #3
0
def prepare() -> int:
    """
    Extract MCD files and prepare input for ilastik.
    """
    def export_acquisition() -> None:
        re_fn = re.compile(cfg.args.file_regexp)

        for fol in cfg.args.dirs["input"]:
            for fln in os.listdir(fol):
                if re_fn.match(fln):
                    fn_full = pjoin(fol, fln)
                    log.info("Extracting MCD file '%s'.", fn_full)
                    if cfg.args.dry_run:
                        continue
                    convertfolder2imcfolder.convert_folder2imcfolder(
                        fn_full, out_folder=cfg.args.dirs["ome"], dozip=False)
        if cfg.args.dry_run:
            return
        exportacquisitioncsv.export_acquisition_csv(
            cfg.args.dirs["ome"], fol_out=cfg.args.dirs["cp"])

    def join_pannel_with_acquired_channels(directory=None) -> None:
        to_replace = [
            ("-", ""),
            ("_", ""),
            (" ", ""),
        ]
        # read pannel
        pannel = pd.read_csv(cfg.args.csv_pannel, index_col=0)
        # read acquisition metadata
        if directory is None:
            pattern = pjoin(cfg.args.dirs["ome"], "*",
                            "*_AcquisitionChannel_meta.csv")
        else:
            pattern = pjoin(directory, "*_AcquisitionChannel_meta.csv")
        metas = glob(pattern)
        if not metas:
            raise ValueError(f"No '{pattern}' files  found!")
        if len(metas) != 1:
            raise ValueError(f"More than one '{pattern}' files found!")

        acquired = pd.read_csv(metas[0])
        acquired = acquired[["ChannelLabel", "ChannelName", "OrderNumber"]]

        # remove parenthesis from metal column
        acquired["ChannelName"] = (acquired["ChannelName"].str.replace(
            "(", "").str.replace(")", ""))
        # clean up the channel name
        for __k, __v in to_replace:
            acquired["ChannelLabel"] = acquired["ChannelLabel"].str.replace(
                __k, __v)
        acquired["ChannelLabel"] = acquired["ChannelLabel"].fillna("<EMPTY>")
        acquired = acquired.loc[~acquired["ChannelLabel"].
                                isin(["X", "Y", "Z"]), :].drop_duplicates()
        acquired.index = (acquired["ChannelLabel"] + "(" +
                          acquired["ChannelName"] + ")")

        # Check matches, report missing
        __c = acquired.index.isin(pannel.index)
        if not __c.all():
            miss = "\n - ".join(acquired.loc[~__c, "ChannelLabel"])
            raise ValueError(
                f"Given reference pannel '{cfg.args.csv_pannel}'"
                f" is missing the following channels: \n - {miss}")

        # align and sort by acquisition
        joint_pannel = acquired.join(pannel).sort_values("OrderNumber")

        # make sure order of ilastik channels is same as the original pannel
        # this important in order for the channels to always be the same
        # and the ilastik models to be reusable
        assert all(
            pannel.query("ilastik == True").index == joint_pannel.query(
                "ilastik == True").index)

        # If all is fine, save annotation with acquired channels and their order
        joint_pannel.to_csv(cfg.args.parsed_csv_pannel, index=True)

    def prepare_histocat() -> None:
        if not os.path.exists(cfg.args.dirs["histocat"]):
            os.makedirs(cfg.args.dirs["histocat"])
        for fol in os.listdir(cfg.args.dirs["ome"]):
            if cfg.args.dry_run:
                continue
            ome2micat.omefolder2micatfolder(
                pjoin(cfg.args.dirs["ome"], fol),
                cfg.args.dirs["histocat"],
                dtype="uint16",
            )

        pannel = (cfg.args.parsed_csv_pannel if os.path.exists(
            cfg.args.parsed_csv_pannel) else cfg.args.csv_pannel)

        for fol in os.listdir(cfg.args.dirs["ome"]):
            sub_fol = pjoin(cfg.args.dirs["ome"], fol)
            for img in os.listdir(sub_fol):
                if not img.endswith(".ome.tiff"):
                    continue
                basename = img.rstrip(".ome.tiff")
                log.info("Preparing OME-tiff directory '%s'.", img)
                for (col, suffix, addsum) in cfg.args.list_analysis_stacks:
                    if cfg.args.dry_run:
                        continue
                    ometiff2analysis.ometiff_2_analysis(
                        pjoin(sub_fol, img),
                        cfg.args.dirs["analysis"],
                        basename + suffix,
                        pannelcsv=pannel,
                        metalcolumn=cfg.args.csv_pannel_metal,
                        usedcolumn=col,
                        addsum=addsum,
                        bigtiff=False,
                        pixeltype="uint16",
                    )

    @check_requirements
    def prepare_ilastik() -> None:
        if cfg.args.containerized:
            extra = ("--name cellprofiler_prepare_ilastik --rm"
                     if cfg.args.containerized == "docker" else "")
            cmd = f"""
        {cfg.args.containerized} run \\
        {extra} \\
            {cfg.args.dirbind} {cfg.args.dirs['base']}:/data:rw \\
            {cfg.args.dirbind} {cfg.args.cellprofiler_plugin_path}:/ImcPluginsCP:ro \\
            {cfg.args.dirbind} {cfg.args.cellprofiler_pipeline_path}:/ImcSegmentationPipeline:ro \\
            {cfg.args.container_image} \\
                --run-headless --run \\
                --plugins-directory /ImcPluginsCP/plugins/ \\
                --pipeline /ImcSegmentationPipeline/cp3_pipelines/1_prepare_ilastik.cppipe \\
                -i /{cfg.args.dirs['analysis'].replace(cfg.args.dirs['base'], 'data')}/ \\
                -o /{cfg.args.dirs['ilastik'].replace(cfg.args.dirs['base'], 'data')}/"""
        else:
            cmd = f"""
            {cfg.args.cellprofiler_exec} \\
                --run-headless --run \\
                --plugins-directory {cfg.args.cellprofiler_plugin_path}/plugins/ \\
                --pipeline {cfg.args.cellprofiler_pipeline_path}/cp3_pipelines/1_prepare_ilastik.cppipe \\
                -i {cfg.args.dirs['analysis']}/ \\
                -o {cfg.args.dirs['ilastik']}/"""

        # {cfg.args.dirbind} /tmp/.X11-unix:/tmp/.X11-unix:ro \\
        # -e DISPLAY=$DISPLAY \\
        run_shell_command(cmd)

    def fix_spaces_in_folders_files(directory):
        for path, folders, files in os.walk(directory):
            for f in files:
                os.rename(
                    pjoin(path, f),
                    pjoin(path, f.replace(" ", "_")),
                )
            for i, _ in enumerate(folders):
                new_name = folders[i].replace(" ", "_")
                os.rename(pjoin(path, folders[i]), pjoin(path, new_name))
                folders[i] = new_name

    e = os.path.exists(pjoin(cfg.args.dirs["cp"], "acquisition_metadata.csv"))
    if cfg.args.overwrite or (not cfg.args.overwrite and not e):
        log.info("Expanding directories from MCD files.")
        export_acquisition()
    else:
        log.info(
            "Overwrite is false and files exist. Skipping export from MCD.")

    e = len(glob(pjoin(cfg.args.dirs["analysis"], "*_full.tiff"))) > 0
    if cfg.args.overwrite or (not cfg.args.overwrite and not e):
        if not cfg.args.dry_run:
            try:
                join_pannel_with_acquired_channels()
            except ValueError:
                log.error(
                    "Failed formatting channel names with provided pannel CSV metadata."
                )
        prepare_histocat()
    else:
        log.info(
            "Overwrite is false and files exist. Skipping conversion to OME-tiff."
        )
    e = len(glob(pjoin(cfg.args.dirs["ilastik"], "*_w500_h500.h5"))) > 0
    if cfg.args.overwrite or (not cfg.args.overwrite and not e):
        prepare_ilastik()
    else:
        log.info(
            "Overwrite is false and files exist. Skipping preparing ilastik files."
        )

    fix_spaces_in_folders_files(cfg.args.dirs["base"])
    return 0