def setup_output_wranglers(self, wranglers): self._wranglers = [] if not wranglers: return if type(wranglers) is not dict: raise utils.StimelaCabRuntimeError("wranglers: dict expected") for match, actions in wranglers.items(): replace = None if type(actions) is str: actions = [actions] if type(actions) is not list: raise utils.StimelaCabRuntimeError(f"wrangler entry {match}: expected action or list of action") for action in actions: if action.startswith("replace:"): replace = action.split(":", 1)[1] elif action not in _actions: raise utils.StimelaCabRuntimeError(f"wrangler entry {match}: unknown action '{action}'") actions = [_actions[act] for act in actions if act in _actions] self._wranglers.append((re.compile(match), replace, actions))
def python_job(self, function, parameters=None): """ Run python function function : Python callable to execute name : Name of function (if not given, will used function.__name__) parameters : Parameters to parse to function label : Function label; for logging purposes """ if not callable(function): raise utils.StimelaCabRuntimeError( 'Object given as function is not callable') if self.name is None: self.name = function.__name__ self.job = { 'function': function, 'parameters': parameters, } return 0
def setup_job(self, image, config, indir=None, outdir=None, msdir=None, singularity_image_dir=None): """ Setup job image : stimela cab name, e.g. 'cab/simms' name : This name will be part of the name of the contaier that will execute the task (now optional) config : Dictionary of options to parse to the task. This will modify the parameters in the default parameter file which can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms' indir : input dirctory for cab outdir : output directory for cab msdir : MS directory for cab. Only specify if different from recipe ms_dir function : Python callable to execute name : Name of function (if not given, will used function.__name__) parameters : Parameters to parse to function label : Function label; for logging purposes """ if self.jtype == "python": self.image = image.__name__ if not callable(image): raise utils.StimelaCabRuntimeError( 'Object given as function is not callable') if self.name is None: self.name = image.__name__ self.job = { 'function': image, 'parameters': config, } self.setup_job_log() return 0 # check if name has any offending characters offenders = re.findall('[^\w .-]', self.name) if offenders: raise StimelaCabParameterError('The cab name \'{:s}\' contains invalid characters.' ' Allowed charcaters are alphanumeric, plus [-_. ].'.format(self.name)) self.setup_job_log() # make name palatable as container name pausterized_name = re.sub("[\W]", "_", self.name) name = '{0}-{1}{2}'.format(pausterized_name, id(image), str(time.time()).replace('.', '')) cont = getattr(CONT_MOD[self.jtype], "Container")(image, name, logger=self.log, workdir=CONT_IO["output"], time_out=self.time_out) cabpath = os.path.join(CAB_PATH, image.split("/")[1]) # In case the user specified a custom cab cabpath = os.path.join(self.cabpath, image.split("/")[1]) if self.cabpath else cabpath parameter_file = os.path.join(cabpath, 'parameters.json') _cab = cab.CabDefinition(indir=indir, outdir=outdir, msdir=msdir, parameter_file=parameter_file) self.setup_output_wranglers(_cab.wranglers) cont.IODEST = CONT_IO cont.cabname = _cab.task # #Example # ---------------- # casa_listobs: # tag: <tag> ## optional # version: <version> ## optional. If version is a dict, then ignore tag and priority and use <tag>:<version> pairs in dict # force: true ## Continue even if tag is specified in the parameters.json file no_tag_version = False if self.tag or self.version: tvi = None if self.tag: try: tvi = _cab.tag.index(self.tag) except ValueError: pass elif self.version: try: tvi = _cab.version.index(self.version) except ValueError: self.log.error(f"The version, {self.version}, specified for cab '{_cab.task}' is unknown. Available versions are {_cab.version}") raise ValueError if tvi is None: tvi = -1 self.tag = _cab.tag[tvi] self.version = _cab.version[tvi] else: self.tag = _cab.tag[-1] self.version = _cab.version[-1] cabspecs = self.recipe.cabspecs.get(cont.cabname, None) if cabspecs: _tag = cabspecs.get("tag", None) _version = cabspecs.get("version", None) _force_tag = cabspecs.get("force", False) if isinstance(_version, dict): if self.version in _version: self.tag = _version[self.version] elif _version: self.version = _version else: self.tag = _tag if self.version and self.version not in _cab.version: self.log.error(f"The version, {self.version}, specified for cab '{_cab.task}' is unknown. Available versions are {_cab.version}") raise ValueError if not _tag: idx = _cab.version.index(self.version) self.tag = _cab.tag[idx] self.force_tag = _force_tag if self.tag not in _cab.tag: if self.force_tag: self.log.warn(f"You have chosen to use an unverified base image '{_cab.base}:{self.tag}'. May the force be with you.") else: raise StimelaBaseImageError(f"The base image '{_cab.base}' with tag '{self.tag}' has not been verified. If you wish to continue with it, please add the 'force_tag' when adding it to your recipe") if self.jtype == "singularity": simage = _cab.base.replace("/", "_") cont.image = '{0:s}/{1:s}_{2:s}{3:s}'.format(singularity_image_dir, simage, self.tag, singularity.suffix) cont.image = os.path.abspath(cont.image) if not os.path.exists(cont.image): singularity.pull(":".join([_cab.base, self.tag]), os.path.basename(cont.image), directory=singularity_image_dir) else: cont.image = ":".join([_cab.base, self.tag]) # Container parameter file will be updated and validated before the container is executed cont._cab = _cab cont.parameter_file_name = '{0}/{1}.json'.format( self.recipe.parameter_file_dir, name) self.image = str(cont.image) # Remove dismissable kw arguments: ops_to_pop = [] for op in config: if isinstance(config[op], dismissable): ops_to_pop.append(op) for op in ops_to_pop: arg = config.pop(op)() if arg is not None: config[op] = arg cont.config = config # These are standard volumes and # environmental variables. These will be # always exist in a cab container cont.add_volume(cont.parameter_file_name, f'{cab.MOUNT}/configfile', perm='ro', noverify=True) cont.add_volume(os.path.join(cabpath, "src"), f"{cab.MOUNT}/code", "ro") cont.add_volume(os.path.join(self.workdir, "passwd"), "/etc/passwd") cont.add_volume(os.path.join(self.workdir, "group"), "/etc/group") cont.RUNSCRIPT = f"/{self.jtype}_run" if self.jtype == "singularity": cont.RUNSCRIPT = f"/{self.jtype}" if _cab.base.startswith("stimela/casa") or _cab.base.startswith("stimela/simms"): cont.add_environ("LANGUAGE", "en_US.UTF-8") cont.add_environ("LANG", "en_US.UTF-8") cont.add_environ("LC_ALL", "en_US.UTF-8") cont.execdir = self.workdir else: cont.RUNSCRIPT = f"/{self.jtype}_run" runscript = shutil.which("stimela_runscript") if runscript: cont.add_volume(runscript, cont.RUNSCRIPT, perm="ro") else: self.log.error("Stimela container runscript could not found.\ This may due to conflicting python or stimela installations in your $PATH.") raise OSError cont.add_environ('CONFIG', f'{cab.MOUNT}/configfile') cont.add_environ('HOME', cont.IODEST["output"]) cont.add_environ('STIMELA_MOUNT', cab.MOUNT) if msdir: md = cont.IODEST["msfile"] os.makedirs(msdir, exist_ok=True) cont.add_volume(msdir, md) cont.add_environ("MSDIR", md) # Keep a record of the content of the # volume dirname, dirs, files = [a for a in next(os.walk(msdir))] cont.msdir_content = { "volume": dirname, "dirs": dirs, "files": files, } self.log.debug( 'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md)) if indir: cont.add_volume(indir, cont.IODEST["input"], perm='ro') cont.add_environ("INPUT", cont.IODEST["input"]) # Keep a record of the content of the # volume dirname, dirs, files = [a for a in next(os.walk(indir))] cont.input_content = { "volume": dirname, "dirs": dirs, "files": files, } self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format( indir, cont.IODEST["input"])) os.makedirs(outdir, exist_ok=True) od = cont.IODEST["output"] cont.logfile = self.logfile cont.add_volume(outdir, od, "rw") cont.add_environ("OUTPUT", od) # temp files go into output tmpfol = os.path.join(outdir, "tmp") if not os.path.exists(tmpfol): os.mkdir(tmpfol) cont.add_volume(tmpfol, cont.IODEST["tmp"], "rw") cont.add_environ("TMPDIR", cont.IODEST["tmp"]) self.log.debug( 'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(outdir, od)) # Added and ready for execution self.job = cont return 0
def setup_job(self, image, config, indir=None, outdir=None, msdir=None, build_label=None, singularity_image_dir=None, **kw): """ Setup job image : stimela cab name, e.g. 'cab/simms' name : This name will be part of the name of the contaier that will execute the task (now optional) config : Dictionary of options to parse to the task. This will modify the parameters in the default parameter file which can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms' indir : input dirctory for cab outdir : output directory for cab msdir : MS directory for cab. Only specify if different from recipe ms_dir function : Python callable to execute name : Name of function (if not given, will used function.__name__) parameters : Parameters to parse to function label : Function label; for logging purposes """ if self.jtype == "python": self.image = image.__name__ if not callable(image): raise utils.StimelaCabRuntimeError( 'Object given as function is not callable') if self.name is None: self.name = image.__name__ self.job = { 'function': image, 'parameters': config, } return 0 # check if name has any offending characters offenders = re.findall('[^\w .-]', self.name) if offenders: raise StimelaCabParameterError('The cab name \'{:s}\' contains invalid characters.' ' Allowed charcaters are alphanumeric, plus [-_. ].'.format(self.name)) # Update I/O with values specified on command line script_context = self.recipe.stimela_context indir = script_context.get('_STIMELA_INPUT', None) or indir outdir = script_context.get('_STIMELA_OUTPUT', None) or outdir msdir = script_context.get('_STIMELA_MSDIR', None) or msdir build_label = script_context.get( '_STIMELA_BUILD_LABEL', None) or build_label self.setup_job_log() # make name palatable as container name pausterized_name = re.sub("[\W]", "_", self.name) name = '{0}-{1}{2}'.format(pausterized_name, id(image), str(time.time()).replace('.', '')) cont = getattr(CONT_MOD[self.jtype], "Container")(image, name, logger=self.log, workdir=WORKDIR, time_out=self.time_out) if self.jtype == "docker": # Get location of template parameters file cabs_loc = f"{stimela.LOG_HOME}/{build_label}_stimela_logfile.json" cabs_logger = get_cabs(cabs_loc) try: cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR'] except KeyError: main.build(["--us-only", cont.image.split("/")[-1], "--no-cache", "--build-label", build_label]) cabs_logger = get_cabs(cabs_loc) cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR'] else: cabpath = os.path.join(CAB_PATH, image.split("/")[1]) # In case the user specified a custom cab cabpath = os.path.join(self.cabpath, image.split("/")[1]) if self.cabpath else cabpath parameter_file = os.path.join(cabpath, 'parameters.json') _cab = cab.CabDefinition(indir=indir, outdir=outdir, msdir=msdir, parameter_file=parameter_file) cont.IODEST = CONT_IO cont.cabname = _cab.task if self.jtype == "docker": cont.image = '{0}_{1}'.format(build_label, image) elif self.jtype == "singularity": simage = _cab.base.replace("/", "_") if singularity_image_dir is None: singularity_image_dir = os.path.join(".", "stimela_singularity_images") cont.image = '{0:s}/{1:s}_{2:s}{3:s}'.format(singularity_image_dir, simage, _cab.tag, singularity.suffix) if not os.path.exists(cont.image): main.pull(f"-s -cb {cont.cabname} -pf {singularity_image_dir}".split()) else: cont.image = ":".join([_cab.base, _cab.tag]) # Container parameter file will be updated and validated before the container is executed cont._cab = _cab cont.parameter_file_name = '{0}/{1}.json'.format( self.recipe.parameter_file_dir, name) self.image = str(cont.image) # Remove dismissable kw arguments: ops_to_pop = [] for op in config: if isinstance(config[op], dismissable): ops_to_pop.append(op) for op in ops_to_pop: arg = config.pop(op)() if arg is not None: config[op] = arg cont.config = config # These are standard volumes and # environmental variables. These will be # always exist in a cab container cont.add_volume(self.recipe.stimela_path, '/scratch/stimela', perm='ro') cont.add_volume(cont.parameter_file_name, '/scratch/configfile', perm='ro', noverify=True) cont.add_volume(os.path.join(cabpath, "src"), "/scratch/code", "ro") if self.jtype == "singularity": cont.RUNSCRIPT = f"/{self.jtype}" else: cont.RUNSCRIPT = f"/{self.jtype}_run" cont.add_volume(f"{BIN}/stimela_runscript", cont.RUNSCRIPT, perm="ro") cont.add_environ('CONFIG', '/scratch/configfile') cont.add_environ('HOME', WORKDIR) if msdir: md = cont.IODEST["msfile"] cont.add_volume(msdir, md) cont.add_environ("MSDIR", md) # Keep a record of the content of the # volume dirname, dirs, files = [a for a in next(os.walk(msdir))] cont.msdir_content = { "volume": dirname, "dirs": dirs, "files": files, } self.log.debug( 'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md)) if indir: cont.add_volume(indir, cont.IODEST["input"], perm='ro') cont.add_environ("INPUT", cont.IODEST["input"]) # Keep a record of the content of the # volume dirname, dirs, files = [a for a in next(os.walk(indir))] cont.input_content = { "volume": dirname, "dirs": dirs, "files": files, } self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format( indir, cont.IODEST["input"])) if not os.path.exists(outdir): os.mkdir(outdir) od = cont.IODEST["output"] cont.logfile = self.logfile cont.add_volume(outdir, od, "rw") cont.add_environ("OUTPUT", od) # temp files go into output tmpfol = os.path.join(outdir, "tmp") if not os.path.exists(tmpfol): os.mkdir(tmpfol) cont.add_volume(tmpfol, cont.IODEST["tmp"], "rw") cont.add_environ("TMPDIR", cont.IODEST["tmp"]) self.log.debug( 'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(outdir, od)) # Added and ready for execution self.job = cont return 0