def check_image() -> bool: try: # check if exists out = (subprocess.check_output( "docker images".split(" ")).decode().strip()) for line in out.split("\n")[1:]: if line.split(" ")[0] == DOCKER_IMAGE: return True except FileNotFoundError: log.error("Docker installation not detected.") raise except IndexError: pass return False
def run_shell_command(cmd) -> int: """ Run a system command. Will detect whether a separate shell is required. """ # in case the command has unix pipes or bash builtins, # the subprocess call must have its own shell # this should only occur if cellprofiler is being run uncontainerized # and needs a command to be called prior such as conda activate, etc symbol = any([x in cmd for x in ["&", "&&", "|"]]) source = cmd.startswith("source") shell = bool(symbol or source) log.debug( "Running command%s:\n%s", " in shell" if shell else "", textwrap.dedent(cmd) + "\n", ) c = re.findall(r"\S+", cmd.replace("\\\n", "")) if not cfg.args.dry_run: if shell: log.debug("Running command in shell.") code = subprocess.call(cmd, shell=shell) else: code = subprocess.call(c, shell=shell) if code != 0: log.error( "Process for command below failed with error:\n'%s'\nTerminating pipeline.\n", textwrap.dedent(cmd), ) sys.exit(code) if not shell: usage = resource.getrusage(resource.RUSAGE_SELF) log.debug("Maximum used memory so far: {:.2f}Gb".format( usage.ru_maxrss / 1e6)) return code
def prepare() -> int: """ Extract MCD files and prepare input for ilastik. """ def export_acquisition() -> None: re_fn = re.compile(cfg.args.file_regexp) for fol in cfg.args.dirs["input"]: for fln in os.listdir(fol): if re_fn.match(fln): fn_full = pjoin(fol, fln) log.info("Extracting MCD file '%s'.", fn_full) if cfg.args.dry_run: continue convertfolder2imcfolder.convert_folder2imcfolder( fn_full, out_folder=cfg.args.dirs["ome"], dozip=False) if cfg.args.dry_run: return exportacquisitioncsv.export_acquisition_csv( cfg.args.dirs["ome"], fol_out=cfg.args.dirs["cp"]) def join_pannel_with_acquired_channels(directory=None) -> None: to_replace = [ ("-", ""), ("_", ""), (" ", ""), ] # read pannel pannel = pd.read_csv(cfg.args.csv_pannel, index_col=0) # read acquisition metadata if directory is None: pattern = pjoin(cfg.args.dirs["ome"], "*", "*_AcquisitionChannel_meta.csv") else: pattern = pjoin(directory, "*_AcquisitionChannel_meta.csv") metas = glob(pattern) if not metas: raise ValueError(f"No '{pattern}' files found!") if len(metas) != 1: raise ValueError(f"More than one '{pattern}' files found!") acquired = pd.read_csv(metas[0]) acquired = acquired[["ChannelLabel", "ChannelName", "OrderNumber"]] # remove parenthesis from metal column acquired["ChannelName"] = (acquired["ChannelName"].str.replace( "(", "").str.replace(")", "")) # clean up the channel name for __k, __v in to_replace: acquired["ChannelLabel"] = acquired["ChannelLabel"].str.replace( __k, __v) acquired["ChannelLabel"] = acquired["ChannelLabel"].fillna("<EMPTY>") acquired = acquired.loc[~acquired["ChannelLabel"]. isin(["X", "Y", "Z"]), :].drop_duplicates() acquired.index = (acquired["ChannelLabel"] + "(" + acquired["ChannelName"] + ")") # Check matches, report missing __c = acquired.index.isin(pannel.index) if not __c.all(): miss = "\n - ".join(acquired.loc[~__c, "ChannelLabel"]) raise ValueError( f"Given reference pannel '{cfg.args.csv_pannel}'" f" is missing the following channels: \n - {miss}") # align and sort by acquisition joint_pannel = acquired.join(pannel).sort_values("OrderNumber") # make sure order of ilastik channels is same as the original pannel # this important in order for the channels to always be the same # and the ilastik models to be reusable assert all( pannel.query("ilastik == True").index == joint_pannel.query( "ilastik == True").index) # If all is fine, save annotation with acquired channels and their order joint_pannel.to_csv(cfg.args.parsed_csv_pannel, index=True) def prepare_histocat() -> None: if not os.path.exists(cfg.args.dirs["histocat"]): os.makedirs(cfg.args.dirs["histocat"]) for fol in os.listdir(cfg.args.dirs["ome"]): if cfg.args.dry_run: continue ome2micat.omefolder2micatfolder( pjoin(cfg.args.dirs["ome"], fol), cfg.args.dirs["histocat"], dtype="uint16", ) pannel = (cfg.args.parsed_csv_pannel if os.path.exists( cfg.args.parsed_csv_pannel) else cfg.args.csv_pannel) for fol in os.listdir(cfg.args.dirs["ome"]): sub_fol = pjoin(cfg.args.dirs["ome"], fol) for img in os.listdir(sub_fol): if not img.endswith(".ome.tiff"): continue basename = img.rstrip(".ome.tiff") log.info("Preparing OME-tiff directory '%s'.", img) for (col, suffix, addsum) in cfg.args.list_analysis_stacks: if cfg.args.dry_run: continue ometiff2analysis.ometiff_2_analysis( pjoin(sub_fol, img), cfg.args.dirs["analysis"], basename + suffix, pannelcsv=pannel, metalcolumn=cfg.args.csv_pannel_metal, usedcolumn=col, addsum=addsum, bigtiff=False, pixeltype="uint16", ) @check_requirements def prepare_ilastik() -> None: if cfg.args.containerized: extra = ("--name cellprofiler_prepare_ilastik --rm" if cfg.args.containerized == "docker" else "") cmd = f""" {cfg.args.containerized} run \\ {extra} \\ {cfg.args.dirbind} {cfg.args.dirs['base']}:/data:rw \\ {cfg.args.dirbind} {cfg.args.cellprofiler_plugin_path}:/ImcPluginsCP:ro \\ {cfg.args.dirbind} {cfg.args.cellprofiler_pipeline_path}:/ImcSegmentationPipeline:ro \\ {cfg.args.container_image} \\ --run-headless --run \\ --plugins-directory /ImcPluginsCP/plugins/ \\ --pipeline /ImcSegmentationPipeline/cp3_pipelines/1_prepare_ilastik.cppipe \\ -i /{cfg.args.dirs['analysis'].replace(cfg.args.dirs['base'], 'data')}/ \\ -o /{cfg.args.dirs['ilastik'].replace(cfg.args.dirs['base'], 'data')}/""" else: cmd = f""" {cfg.args.cellprofiler_exec} \\ --run-headless --run \\ --plugins-directory {cfg.args.cellprofiler_plugin_path}/plugins/ \\ --pipeline {cfg.args.cellprofiler_pipeline_path}/cp3_pipelines/1_prepare_ilastik.cppipe \\ -i {cfg.args.dirs['analysis']}/ \\ -o {cfg.args.dirs['ilastik']}/""" # {cfg.args.dirbind} /tmp/.X11-unix:/tmp/.X11-unix:ro \\ # -e DISPLAY=$DISPLAY \\ run_shell_command(cmd) def fix_spaces_in_folders_files(directory): for path, folders, files in os.walk(directory): for f in files: os.rename( pjoin(path, f), pjoin(path, f.replace(" ", "_")), ) for i, _ in enumerate(folders): new_name = folders[i].replace(" ", "_") os.rename(pjoin(path, folders[i]), pjoin(path, new_name)) folders[i] = new_name e = os.path.exists(pjoin(cfg.args.dirs["cp"], "acquisition_metadata.csv")) if cfg.args.overwrite or (not cfg.args.overwrite and not e): log.info("Expanding directories from MCD files.") export_acquisition() else: log.info( "Overwrite is false and files exist. Skipping export from MCD.") e = len(glob(pjoin(cfg.args.dirs["analysis"], "*_full.tiff"))) > 0 if cfg.args.overwrite or (not cfg.args.overwrite and not e): if not cfg.args.dry_run: try: join_pannel_with_acquired_channels() except ValueError: log.error( "Failed formatting channel names with provided pannel CSV metadata." ) prepare_histocat() else: log.info( "Overwrite is false and files exist. Skipping conversion to OME-tiff." ) e = len(glob(pjoin(cfg.args.dirs["ilastik"], "*_w500_h500.h5"))) > 0 if cfg.args.overwrite or (not cfg.args.overwrite and not e): prepare_ilastik() else: log.info( "Overwrite is false and files exist. Skipping preparing ilastik files." ) fix_spaces_in_folders_files(cfg.args.dirs["base"]) return 0