def __init__( self, recipes: dict = None, paths: Union[str, List[str]] = None, directories: Union[str, List[str]] = None, ): """ :param recipes: A dictionary of input values, keyed by the recipe name. :type recipes: dict :param paths: a list of ``*.yaml`` files, where each path contains a dictionary of input values, keyed by the recipe name, similar to the previous recipes name. :type paths: List[str] :param directories: a directory of ``*.yaml`` files, where the ``*`` is the recipe name. :type directories: List[str] """ self.recipes = recipes or {} self.paths: Optional[List[str]] = None self.directories: Optional[List[str]] = None self._files_by_key = None self._loaded_recipes = False if paths: if isinstance(paths, list): self.paths = [fully_qualify_filename(d) for d in paths] else: self.paths = fully_qualify_filename(paths) if directories: if isinstance(directories, list): self.directories = [ fully_qualify_filename(d) for d in directories ] else: self.directories = fully_qualify_filename(directories)
def create_task_base(self, wf: Workflow, job: PreparedJob): forbiddenids = set() if job.store_in_central_db: try: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall() ) except sqlite3.OperationalError as e: if "no such column: id" in repr(e): from shutil import move dt = datetime.utcnow() np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}" Logger.warn(f"Moving old janis-db to '{np}'") move(job.db_path, np) self._taskDB = None return self.create_task_base(wf=wf, job=job) raise submission_id = generate_new_id(forbiddenids) output_dir = fully_qualify_filename(job.output_dir) if not job.execution_dir: job.execution_dir = os.path.join(output_dir, "janis") Logger.debug( f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}" ) job.execution_dir = fully_qualify_filename(job.execution_dir) Logger.info( f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}" ) row = TaskRow( submission_id, execution_dir=job.execution_dir, output_dir=output_dir ) WorkflowManager.create_dir_structure(job.execution_dir) if job.store_in_central_db: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def __init__( self, outdir: str, wid: str, environment: Environment = None, readonly=False ): # do stuff here self.wid = wid self._failed_engine_attempts = None # hydrate from here if required self._engine_wid = None self.path = fully_qualify_filename(outdir) self.create_dir_structure(self.path) self.database = WorkflowDbManager( wid, self.get_task_path_safe(), readonly=readonly ) self.environment = environment self.dbcontainer: MySql = None self.main_queue = queue.Queue() self._prev_status = None self._engine: Optional[Engine] = None if not self.wid: self.wid = self.get_engine_wid()
def process_container_dir(container_dir): if container_dir is not None: return fully_qualify_filename(container_dir) from os import getenv envs_to_search = ["CWL_SINGULARITY_CACHE", "SINGULARITY_TMPDIR"] for env in envs_to_search: e = getenv(env) if e: return fully_qualify_filename(e) raise Exception( "Couldn't find a directory to cache singularity containers, please provide a " "'container_dir' to your template, or set one of the following env variables: " + ", ".join(envs_to_search) )
def fully_qualify_filename_array_or_single(value: Union[str, List[str], List[List[str]]]): """ :param value: :return: """ if isinstance(value, list): return [ InputFileQualifierModifier. fully_qualify_filename_array_or_single(t) for t in value ] return fully_qualify_filename(value)
def from_wid(self, wid, readonly=False): self.readonly = readonly with self.with_cursor() as cursor: path = cursor.execute( "SELECT outputdir FROM tasks where wid=?", (wid,) ).fetchone() if not path: expanded_path = fully_qualify_filename(wid) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest( expanded_path, readonly=readonly ) raise Exception(f"Couldn't find task with id='{wid}'") return WorkflowManager.from_path_with_wid(path[0], wid=wid, readonly=readonly)
def __init__(self, db_path: Optional[str], readonly=False): self.readonly = readonly if not db_path: config_dir = EnvVariables.config_dir.resolve(True) Logger.log( f"db_path wasn't provided to config manager, using config_dir: '{config_dir}/janis.db'" ) db_path = fully_qualify_filename(os.path.join(config_dir, "janis.db")) self.db_path = db_path self.is_new = not os.path.exists(db_path) cp = os.path.dirname(db_path) os.makedirs(cp, exist_ok=True) self._connection: Optional[sqlite3.Connection] = None self._taskDB: Optional[TasksDbProvider] = None
def generate_output_dir_from(wf_id, output_dir, jc_output_dir): if not output_dir and not jc_output_dir: raise Exception( f"You must specify an output directory (or specify an 'output_dir' " f"in your configuration)") default_outdir = None if jc_output_dir: default_outdir = os.path.join(jc_output_dir, wf_id) if not output_dir: od = default_outdir dt = datetime.now().strftime("%Y%m%d_%H%M%S") output_dir = os.path.join(od, dt) output_dir = fully_qualify_filename(output_dir) return output_dir
def get_from_path_or_submission_lazy( submission_id, readonly: bool, db_path: Optional[str] = None, ): """ 2020-10-01 mfranklin: Probably the method you want to get a WorkflowManager from submissionID: :return: WorkflowManager of the submission_id (or THROWS) """ expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest_manager( expanded_path, readonly=readonly ) return ConfigManager( db_path=db_path, readonly=True ).get_from_path_or_submission( submission_id=submission_id, readonly=readonly, perform_path_check=False )
def get_row_for_submission_id_or_path(self, submission_id) -> TaskRow: potential_submission = self.get_lazy_db_connection().get_by_id(submission_id) if potential_submission: return potential_submission expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): (execpath, sid) = WorkflowManager.from_path_get_latest_submission_id( expanded_path ) return TaskRow( execution_dir=execpath, submission_id=sid, output_dir=None, timestamp=None, ) raise Exception( f"Couldn't find task with id='{submission_id}', and no directory was found." )
def get_from_path_or_submission( self, submission_id, readonly: bool, perform_path_check=True ): if perform_path_check: expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest_manager( expanded_path, readonly=readonly ) potential_submission = self.get_lazy_db_connection().get_by_id(submission_id) if potential_submission: return WorkflowManager.from_path_with_submission_id( potential_submission.execution_dir, submission_id=submission_id, readonly=readonly, ) raise Exception( f"Couldn't find task with id='{submission_id}', and no directory was found " )
def init_template( templatename, stream=None, unparsed_init_args=None, output_location=None, force=False, ): """ :param templatename: :param force: :return: """ import ruamel.yaml outpath = fully_qualify_filename(output_location or EnvVariables.config_path.resolve(True)) cached_outd = None def get_config(): """ This is here to lazily instantiate the config """ nonlocal cached_outd if not cached_outd: outd = JanisConfiguration.default() if templatename: tmpl = janistemplates.get_template(templatename) schema = janistemplates.get_schema_for_template(tmpl) mapped_schema_to_default = { s.identifier: s.default for s in schema if s.default is not None } # parse extra params description = dedent(tmpl.__doc__) if tmpl.__doc__ else None parser = InitArgParser(templatename, schema, description=description) parsed = parser.parse_args(unparsed_init_args) try: # "easier to ask for forgiveness than permission" https://stackoverflow.com/a/610923 keys_to_skip = set(tmpl.ignore_init_keys) except AttributeError: Logger.log( f"Template '{templatename}' didn't have 'ignore_init_keys'" ) keys_to_skip = set() outd["engine"] = EngineType.cromwell outd["template"] = { s.id(): parsed.get(s.id(), mapped_schema_to_default.get(s.id())) for s in schema if (s.identifier in parsed) or ( s.identifier in mapped_schema_to_default and s.identifier not in keys_to_skip) } outd["template"]["id"] = templatename cached_outd = stringify_dict_keys_or_return_value(outd) return cached_outd if any(k in unparsed_init_args for k in ("-h", "--help")): get_config() does_exist = os.path.exists(outpath) if does_exist and not force: Logger.info( f"Janis will skip writing config as file exists at: '{outpath}'") else: if does_exist: Logger.info(f"Overwriting template at '{outpath}'") else: Logger.info(f"Saving Janis config to '{outpath}'") os.makedirs(os.path.dirname(outpath), exist_ok=True) val = get_config() with open(outpath, "w+") as configpath: ruamel.yaml.dump(val, configpath, default_flow_style=False) if stream: ruamel.yaml.dump(get_config(), sys.stdout, default_flow_style=False)
def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True): config = JanisConfiguration.manager() """ If you don't spec """ if not outdir and not config.outputdir: raise Exception( f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' " f"in your configuration)" ) default_outdir = None if config.outputdir: default_outdir = os.path.join(config.outputdir, wf.id()) forbiddenids = set() if store_in_centraldb: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall() ) if outdir: if os.path.exists(outdir): # this should theoretically scoop through all the ones in the taskDB and # add them to the forbidden ones, though this might cause more issues for now. forbiddenids = forbiddenids.union(set(os.listdir(outdir))) else: if os.path.exists(default_outdir): forbiddenids = forbiddenids.union(set(os.listdir(default_outdir))) wid = generate_new_id(forbiddenids) task_path = outdir if not task_path: od = default_outdir dt = datetime.now().strftime("%Y%m%d_%H%M%S") task_path = os.path.join(od, f"{dt}_{wid}/") task_path = fully_qualify_filename(task_path) Logger.info(f"Starting task with id = '{wid}'") row = TaskRow(wid, task_path) WorkflowManager.create_dir_structure(task_path) if store_in_centraldb: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def test_ignore_gcs(self): fn = "gcs://janis/readthedocs/io.txt" self.assertEqual(fn, fully_qualify_filename(fn))
def test_ignore_http(self): fn = "http://janis.readthedocs.io" self.assertEqual(fn, fully_qualify_filename(fn))
def test_nonrelative(self): fn = "/test/my.txt" self.assertEqual(fn, fully_qualify_filename(fn))
def test_user_expand(self): from os.path import expanduser fn = "~/my.txt" self.assertEqual(expanduser(fn), fully_qualify_filename(fn))
def test_dot_relative(self): fn = "my.txt" self.assertEqual(ospathjoin(self.cwd, fn), fully_qualify_filename("./" + fn))
def __init__( self, output_dir: str = EnvVariables.output_dir.resolve(False), execution_dir: str = EnvVariables.exec_dir.resolve(False), call_caching_enabled: bool = True, engine: str = EngineType.cromwell.value, cromwell: Union[JanisConfigurationCromwell, dict] = None, template: Union[JanisConfigurationTemplate, dict] = None, recipes: Union[JanisConfigurationRecipes, dict] = None, notifications: Union[JanisConfigurationNotifications, dict] = None, environment: Union[JanisConfigurationEnvironment, dict] = None, run_in_background: bool = None, digest_cache_location: str = None, container: Union[str, Container] = None, search_paths: List[str] = None, ): """ :param engine: Default engine to use :type engine: "cromwell" | "cwltool" :param template: Specify options for a Janis template for configuring an execution environment :type template: JanisConfigurationTemplate :param cromwell: A dictionary for how to configure Cromwell for Janis :type cromwell: JanisConfigurationCromwell :param recipes: Configure recipes in Janis :type recipes: JanisConfigurationRecipes :param notifications: Configure Janis notifications :type notifications: JanisConfigurationNotifications :param environment: Additional ways to configure the execution environment for Janis :type environment: JanisConfigurationEnvironment :param output_dir: A directory that Janis will use to generate a new output directory for each janis-run :param execution_dir: Move all execution to a static directory outside the regular output directory. :param call_caching_enabled: (default: true) call-caching is enabled for subsequent runs, on the SAME output directory :param run_in_background: By default, run workflows as a background process. In a SLURM environment, this might submit Janis as a SLURM job. :type run_in_background: bool :param digest_cache_location: A cache of docker tags to its digest that Janis uses replaces your docker tag with it's `digest <https://docs.docker.com/engine/reference/commandline/pull/#pull-an-image-by-digest-immutable-identifier>`_. :type digest_cache_location: str :param container: Container technology to use, important for checking if container environment is available and running mysql instance. :type container: "docker" | "singularity" :param search_paths: A list of paths to check when looking for python files and input files :type search_paths: List[str] """ self.config_dir = EnvVariables.config_dir.resolve(True) self.db_path = fully_qualify_filename( os.path.join(self.config_dir, "janis.db")) self.digest_cache_location = digest_cache_location if not digest_cache_location: self.digest_cache_location = os.path.join(self.config_dir, "digest_cache") self.output_dir = output_dir self.execution_dir = execution_dir self.search_paths = search_paths or [os.path.expanduser("~/janis/")] self.engine = engine self.call_caching_enabled = call_caching_enabled self.run_in_background = run_in_background self.recipes = parse_if_dict( JanisConfigurationRecipes, recipes or {}, "recipes", skip_if_empty=False, ) self.template = parse_if_dict(JanisConfigurationTemplate, template or {}, "template", skip_if_empty=False) self.cromwell: JanisConfigurationCromwell = parse_if_dict( JanisConfigurationCromwell, cromwell or {}, "cromwell", skip_if_empty=False) self.notifications: JanisConfigurationNotifications = parse_if_dict( JanisConfigurationNotifications, notifications or {}, "notifications", skip_if_empty=False, ) self.environment: JanisConfigurationEnvironment = parse_if_dict( JanisConfigurationEnvironment, environment or {}, "environment", skip_if_empty=False, ) # Get's set by the template for now, but eventually we should be able to look it up self.container = None if container: if isinstance(container, Container): self.container = container else: self.container = get_container_by_name(container) if self.template and self.template.template: self.template.template.post_configuration_hook(self)