def check_running_jobs(self): """ Check the status/progress of all map-reduce items and update _map status. Args: self: class instance. Returns: True. """ # check if procs are running, finished, or failed for map_item in self._map: try: if ShellWrapper.is_running( map_item['run'][map_item['attempt']]['proc'] ): map_item['status'] = 'RUNNING' else: if map_item['run'][map_item['attempt']]['proc'].returncode: map_item['status'] = 'FAILED' else: map_item['status'] = 'FINISHED' map_item['run'][map_item['attempt']]['status']\ = map_item['status'] except (OSError, AttributeError) as err: Log.a().warning( 'process polling failed for map item "%s" [%s]', map_item['filename'], str(err) ) map_item['status'] = 'FAILED' self._update_status_db(self._status, '') return True
def apply_job_modifiers(jobs_dict, job_mods): """Update the jobs_dict with the given modifiers.""" for mod in job_mods: # split at = try: parts = mod.split('=') except ValueError as err: Log.a().warning('job mod "%s" is malformed [%s]', mod, str(err)) continue # skip mod key = parts[0] if not key: Log.a().warning('empty job mod') continue # skip mod val = None if len(parts) == 1: # only one key, treat as bool switch val = True elif len(parts) == 2: # two parts, key & value val = parts[1] else: # multiple '=', include '=' in value val = '='.join(parts[1:]) # split key at . keys = key.split('.') # apply to all jobs for job in jobs_dict.values(): set_dict_key_list(job, keys, val)
def _init_data_uri(self): """ Create output data URI for the source context (local). Args: self: class instance. Returns: On success: True. On failure: False. """ # make sure the source data URI has a compatible scheme (local) if self._parsed_data_uris[self._source_context]['scheme'] != 'local': msg = 'invalid data uri scheme for this step: {}'.format( self._parsed_data_uris[self._source_context]['scheme'] ) Log.an().error(msg) return self._fatal(msg) # delete old folder if it exists and clean==True if ( DataManager.exists( parsed_uri=self._parsed_data_uris[self._source_context] ) and self._clean ): if not DataManager.delete( parsed_uri=self._parsed_data_uris[self._source_context] ): Log.a().warning( 'cannot delete existing data uri: %s', self._parsed_data_uris[self._source_context]['chopped_uri'] ) # create folder if not DataManager.mkdir( parsed_uri=self._parsed_data_uris[self._source_context], recursive=True ): msg = 'cannot create data uri: {}'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create _log folder if not DataManager.mkdir( uri='{}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ), recursive=True ): msg = 'cannot create _log folder in data uri: {}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def _copy_asset(self, asset): """ Copy app assets. Args: self: class instance asset: what to copy Returns: On success: True. On failure: False. """ if not self._copy_prefix: Log.a().warning( 'copy prefix must be specified when copying app assets' ) return False if not asset.get('dst'): Log.a().warning('asset dst required for app %s', self._app['name']) return False if not asset.get('src'): Log.a().warning('asset src required for app %s', self._app['name']) return False # create asset destination asset_path = Path(self._path / asset['dst']) asset_path.mkdir(exist_ok=True) if 'zip' in asset: # create a tar.gz of src cmd = 'tar -czf "{}" --directory="{}" .'.format( str(Path(asset_path / '{}.tar.gz'.format(asset['zip']))), str(Path(self._copy_prefix) / asset['src']) ) Log.some().info('zipping: %s', cmd) cmd_result = ShellWrapper.invoke(cmd) if cmd_result is False: Log.a().warning('cannot zip asset src: %s', cmd) return False Log.some().info('tar stdout: %s', cmd_result) else: # move without creating tar.gz cmd = 'cp -R "{}" "{}"'.format( str(Path(self._copy_prefix) / asset['src']), str(asset_path) ) Log.some().info('copying: %s', cmd) cmd_result = ShellWrapper.invoke(cmd) if cmd_result is False: Log.a().warning('cannot copy asset src: %s', cmd) return False Log.some().info('copy stdout: %s', cmd_result) return True
def _send_notifications(self, status): # construct message msg_data = { 'to': '', 'from': '*****@*****.**', 'subject': 'GeneFlow Job "{}": {}'.format( self._job['name'], status ), 'content': ( 'Your GeneFlow job status has changed to {}' '\nJob Name: {}' '\nJob ID: {}' ).format(status, self._job['name'], self._job_id) } # use agave token as header if available if 'agave' in self._workflow_context: msg_headers = { 'Authorization':'Bearer {}'.format( self._workflow_context['agave']\ .get_context_options()['agave_wrapper']\ ._agave.token.token_info.get('access_token') ) } else: msg_headers = {} Log.some().info('message headers: %s', str(msg_headers)) for notify in self._job['notifications']: Log.some().info( 'sending notification(s) to %s @ %s', str(notify['to']), notify['url'], ) to_list = notify['to'] if isinstance(notify['to'], str): to_list = [notify['to']] for to_item in to_list: msg_data['to'] = to_item try: response = requests.post( notify['url'], data=msg_data, headers=msg_headers ) except requests.exceptions.RequestException as err: Log.a().warning( 'cannot send notification to %s @ %s: %s', to_item, notify['url'], str(err) ) if response.status_code != 201: Log.a().warning( 'cannot send notification to %s @ %s: %s', to_item, notify['url'], response.text )
def retry_failed(self, map_item): """ Retry a job. Args: self: class instance. Returns: True if failed/stopped job restarted successfully False if failed/stopped job not restarted due to error """ # retry job Log.some().info( '[step.%s]: retrying agave job (%s), attempt number %s', self._step['name'], map_item['template']['output'], map_item['attempt']+1 ) # add another run to list map_item['attempt'] += 1 map_item['run'].append({}) if not self._run_map(map_item): Log.a().warning( '[step.%s]: cannot retry agave job (%s), attempt number %s', self._step['name'], map_item['template']['output'], map_item['attempt'] ) return False return True
def compile_template(template_path, template_name, compiled_name, **kwargs): """ Compile a GeneFlow template file. Args: template_path: search path for templates. If omitted, the GeneFlow package path of data/templates is used. template_name: name of the template file, must be stored in data/templates of the GeneFlow source package. compiled_name: full path of the compiled target file. kwargs: data to populate the template. Returns: On success: True. On failure: False. """ # set default template path if not template_path: template_path = GF_PACKAGE_PATH / 'data/templates' # load template try: template_loader = jinja2.FileSystemLoader( searchpath=str(template_path)) template_env = jinja2.Environment(loader=template_loader, trim_blocks=True, lstrip_blocks=True) template = template_env.get_template(template_name) except jinja2.TemplateSyntaxError as err: Log.an().warning('cannot load template, syntax error: %s [%s, %s]', template_name, str(err), str(err.lineno)) return False except jinja2.TemplateError as err: Log.an().warning('cannot load template: %s [%s]', template_name, str(err)) return False # compile and write try: with open(str(compiled_name), 'w') as compiled_file: compiled_file.write(template.render(**kwargs)) except IOError as err: Log.an().warning('cannot write compiled template file: %s [%s]', compiled_name, str(err)) return False except jinja2.TemplateError as err: Log.a().warning('cannot compile template: %s [%s]', template_name, str(err)) return False return True
def __del__(self): """ Disconnect from drmaa session when workflow class is deleted. Args: None. Returns: Nothing. """ try: self._drmaa_session.exit() except drmaa.errors.DrmaaException as err: Log.a().warning('cannot exit drmaa session: [%s]', str(err))
def _re_init(): """Reinit drmaa session.""" # exit existing session try: self._drmaa_session.exit() except drmaa.errors.DrmaaException as err: Log.a().warning('cannot exit drmaa session: [%s]', str(err)) # initialize session again try: self._drmaa_session.initialize() except drmaa.errors.DrmaaException as err: Log.an().error('cannot initialize drmaa session: [%s]', str(err)) return False return True
def apply_job_modifiers(jobs_dict, job_mods): """Update the jobs_dict with the given modifiers.""" for mod in job_mods: # split at = try: key, val = mod.split('=') except ValueError as err: Log.a().warning('job mod "%s" is malformed [%s]', mod, str(err)) continue # skip mod # split key at . keys = key.split('.') # apply to all jobs for job in jobs_dict.values(): set_dict_key_list(job, keys, val)
def check_running_jobs(self): """ Check the status/progress of all map-reduce items and update _map status. Args: self: class instance. Returns: True. """ # check if jobs are running, finished, or failed for map_item in self._map: if map_item['status'] != 'FINISHED' and map_item[ 'status'] != 'FAILED': # can only get job status if it has not already been disposed with "wait" status = self._gridengine['drmaa_session'].jobStatus( map_item['run'][map_item['attempt']]['hpc_job_id']) map_item['status'] = self._job_status_map[status] if map_item['status'] == 'FINISHED' or map_item[ 'status'] == 'FAILED': # check exit status job_info = self._gridengine['drmaa_session'].wait( map_item['run'][map_item['attempt']]['hpc_job_id'], self._gridengine['drmaa_session'].TIMEOUT_NO_WAIT) Log.a().debug('[step.%s]: exit status: %s -> %s', self._step['name'], map_item['template']['output'], job_info.exitStatus) if job_info.exitStatus > 0: # job actually failed map_item['status'] = 'FAILED' map_item['run'][map_item['attempt']]['status'] = map_item['status'] if map_item['status'] == 'FAILED' and map_item['attempt'] < 5: # retry job if not at limit if not self.retry_failed(map_item): Log.a().warning( '[step.%s]: cannot retry failed gridengine job (%s)', self._step['name'], map_item['template']['output']) self._update_status_db(self._status, '') return True
def check_running_jobs(self): """ Check the status/progress of all map-reduce items and update _map status. Args: self: class instance. Returns: True. """ # check if procs are running, finished, or failed for map_item in self._map: if map_item['status'] in ['RUNNING', 'UNKNOWN']: try: if not ShellWrapper.is_running( map_item['run'][map_item['attempt']]['proc']): returncode = map_item['run'][ map_item['attempt']]['proc'].returncode if returncode: map_item['status'] = 'FAILED' else: map_item['status'] = 'FINISHED' Log.a().debug('[step.%s]: exit status: %s -> %s', self._step['name'], map_item['template']['output'], returncode) # decrease num running procs if self._num_running > 0: self._num_running -= 1 except (OSError, AttributeError) as err: Log.a().warning( 'process polling failed for map item "%s" [%s]', map_item['filename'], str(err)) map_item['status'] = 'UNKNOWN' map_item['run'][map_item['attempt']]['status']\ = map_item['status'] self._update_status_db(self._status, '') return True
def check_running_jobs(self): """ Check the status/progress of all map-reduce items and update _map status. Args: self: class instance. Returns: True. """ # check if jobs are running, finished, or failed for map_item in self._map: if map_item['status'] not in ['FINISHED', 'FAILED', 'PENDING']: try: # can only get job status if it has not already been disposed with "wait" status = self._slurm['drmaa_session'].jobStatus( map_item['run'][map_item['attempt']]['hpc_job_id']) map_item['status'] = self._job_status_map[status] except drmaa.DrmCommunicationException as err: msg = 'cannot get job status for step "{}" [{}]'\ .format(self._step['name'], str(err)) Log.a().warning(msg) map_item['status'] = 'UNKNOWN' if map_item['status'] in ['FINISHED', 'FAILED']: # check exit status job_info = self._slurm['drmaa_session'].wait( map_item['run'][map_item['attempt']]['hpc_job_id'], self._slurm['drmaa_session'].TIMEOUT_NO_WAIT) Log.a().debug('[step.%s]: exit status: %s -> %s', self._step['name'], map_item['template']['output'], job_info.exitStatus) if job_info.exitStatus > 0: # job actually failed map_item['status'] = 'FAILED' # decrease num running procs if self._num_running > 0: self._num_running -= 1 map_item['run'][map_item['attempt']]['status'] = map_item['status'] if map_item['status'] == 'FAILED' and map_item['attempt'] < 5: if self._throttle_limit == 0 or self._num_running < self._throttle_limit: # retry job if not at retry or throttle limit if not self.retry_failed(map_item): Log.a().warning( '[step.%s]: cannot retry failed slurm job (%s)', self._step['name'], map_item['template']['output']) else: self._num_running += 1 self._update_status_db(self._status, '') return True
def upload_agave_test_data(self): """ Upload Agave test data from workflow package. Args: self: class instance. Returns: None """ if (not self._agave or not self._agave_params or not self._agave_params.get('agave')): Log.a().warning( 'must provide agave parameters to upload test data') return False # create main test data URI parsed_base_test_uri = URIParser.parse('agave://{}/{}'.format( self._agave_params['agave']['deploymentSystem'], self._agave_params['agave']['testDataDir'])) Log.some().info('creating base test data uri: %s', parsed_base_test_uri['chopped_uri']) if not DataManager.mkdir(parsed_uri=parsed_base_test_uri, recursive=True, agave={ 'agave': self._agave, 'agave_config': self._config['agave'] }): Log.a().warning('cannot create base test data uri: %s', parsed_base_test_uri['chopped_uri']) return False # upload test data parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data')) parsed_agave_test_uri = URIParser.parse('{}/{}'.format( parsed_base_test_uri['chopped_uri'], Path(self._path).name)) Log.some().info('copying test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri']) if not DataManager.copy(parsed_src_uri=parsed_local_test_uri, parsed_dest_uri=parsed_agave_test_uri, local={}, agave={ 'agave': self._agave, 'agave_config': self._config['agave'] }): Log.a().warning('cannot copy test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri']) return False return True
def _update_status_db(self, status, msg): """ Update workflow status in DB. Args: self: class instance status: Workflow status msg: Success, error or warning message Returns: On success: True. On failure: False. """ try: data_source = DataSource(self._config['database']) except DataSourceException as err: msg = 'data source initialization error [{}]'.format(str(err)) Log.an().error(msg) return False # set start time (if started, or errored immediatedly) if ( status in ['RUNNING', 'ERROR'] and self._status == 'PENDING' ): if not data_source.set_job_started(self._job_id): Log.a().warning('cannot set job start time in data source') data_source.rollback() # set finished time (even on error) if status in ['FINISHED', 'ERROR']: if not data_source.set_job_finished(self._job_id): Log.a().warning('cannot set job finish time in data source') data_source.rollback() # if state change, contact notification endpoint if status != self._status: if self._job['notifications']: self._send_notifications(status) # update database self._status = status if not data_source.update_job_status(self._job_id, status, msg): Log.a().warning('cannot update job status in data source') data_source.rollback() data_source.commit() return True
def switch_context(cls, uri, new_base_uri): """ Change the context of uri to the new_base. new_base can have a different scheme and base URL. If uri has no 'name' (e.g., ends with /), then the new context URI is identical to the normalized new_base_uri. Args: uri: URI to change context. new_base_uri: base URI of the new context. Returns: On success: parsed URI in new context. On failure: False. """ # validate URIs parsed_uri = cls.parse(uri) if not parsed_uri: Log.a().debug('invalid uri: %s', uri) return False parsed_new_base_uri = cls.parse(new_base_uri) if not parsed_new_base_uri: Log.a().debug('invalid new base uri: %s', new_base_uri) return False # construct URI in new context new_uri = '{}:{}{}{}'.format( parsed_new_base_uri['scheme'], ('//{}'.format(parsed_new_base_uri['authority']) if parsed_new_base_uri['authority'] else ''), parsed_new_base_uri['chopped_path'], ('{}' if parsed_new_base_uri['chopped_path'] == '/' else '/{}').format(parsed_uri['name'])) # parse the new URI to validate parsed_new_uri = cls.parse(new_uri) if not parsed_new_uri: Log.a().debug('invalid new uri: %s', new_uri) return False return parsed_new_uri
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map). Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: if map_item['template'][input_key]: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(map_item['template'][input_key]), safe='/:' ) else: if self._app['inputs'][input_key]['default']: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(self._app['inputs'][input_key]['default']), safe='/:' ) # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: if self._app['parameters'][param_key]['type'] in ['int', 'long']: parameters[param_key] = int(map_item['template'][param_key]) elif self._app['parameters'][param_key]['type'] == ['float', 'double']: parameters[param_key] = float(map_item['template'][param_key]) else: parameters[param_key] = str(map_item['template'][param_key]) else: if self._app['parameters'][param_key]['default'] not in [None, '']: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # add execution init commands if 'init' param given if 'init' in self._step['execution']['parameters']: parameters['exec_init'] = self._step['execution']['parameters']['init'] # construct agave app template name = 'gf-{}-{}-{}'.format( str(map_item['attempt']), slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'), slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+') ) name = name[:62]+'..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name ) app_template = { 'name': name, 'appId': self._app['implementation']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } # specify processors if 'slots' param given if 'slots' in self._step['execution']['parameters']: app_template['processorsPerNode'] = int( self._step['execution']['parameters']['slots'] ) # specify memory if 'mem' param given if 'mem' in self._step['execution']['parameters']: app_template['memoryPerNode'] = '{}'.format( self._step['execution']['parameters']['mem'] ) Log.some().debug( "[step.%s]: agave app template:\n%s", self._step['name'], pprint.pformat(app_template) ) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name ) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name'] ) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug( '[step.%s]: agave job id: %s -> %s', self._step['name'], map_item['template']['output'], job['id'] ) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map) Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: inputs[input_key] = urllib.parse.quote(str( map_item['template'][input_key] or ''), safe='/:') else: inputs[input_key] = urllib.parse.quote(str( self._app['inputs'][input_key]['default'] or ''), safe='/:') # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: parameters[param_key] = map_item['template'][param_key] else: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # construct agave app template name = 'gf-{}-{}-{}'.format(str(map_item['attempt']), slugify(self._step['name']), slugify(map_item['template']['output'])) name = name[:62] + '..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name) app_template = { 'name': name, 'appId': self._app['definition']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } Log.some().debug("agave app template:\n%s", pprint.pformat(app_template)) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name']) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug('agave job id: %s -> %s', map_item['template']['output'], job['id']) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def parse(cls, uri): """ Parse a URI and return components. If the scheme is missing, it.. defaults to "local". Args: uri: A generic URI string. Returns: On success: A dict that contains "uri", "scheme", "authority", and "path", etc: { "uri": original URI "chopped_uri": normalized URI "scheme": "authority": "path": full path "chopped_path": normalized path "folder": folder part of path (to last slash, not including last slash "name": folder/file name, part of path after last slash } On failure: False. """ matched = re.match(cls.uri_regex, str(uri)) if not matched: Log.a().debug('invalid uri: %s', uri) return False # extract scheme, e.g., local, agave, http, etc. scheme = matched.group(2) if not scheme: scheme = 'local' # authority can be '' (e.g., server, or storage system) authority = matched.group(4) if matched.group(4) else '' path = matched.group(5) if matched.group(5) else '/' # replace one or more consecutive slashes with single slash path = re.sub('/+', '/', path) # get folder and name from path matched = re.match(cls.path_regex, path) if not matched: Log.a().debug('invalid path of uri: %s', path) return False folder = matched.group(1) if matched.group(1) else matched.group(2) name = matched.group(3) if matched.group(3) else '' # "normalized" path without extra slashes chopped_path = (folder + name if folder == '/' or folder == '' else folder + '/' + name) if name else folder # "normalized" URI without extra slashes and with scheme chopped_uri = '{}{}{}'.format( '{}:'.format(scheme), ('//{}'.format(authority) if authority else ''), chopped_path) return { 'uri': uri, # original URI 'chopped_uri': chopped_uri, 'scheme': scheme, 'authority': authority, 'path': path, 'chopped_path': chopped_path, 'folder': folder, 'name': name }
def install_assets(self): """ Install app assets. Args: self: class instance Returns: On success: True. On failure: False. """ # set asset type default_asset = self._app_asset # if not set on CLI, use asset type specified in workflow apps-repo if not default_asset: default_asset = self._app.get('asset') # if not set in workflow apps-repo, use app default if not default_asset: default_asset = self._config.get('default_asset') Log.some().info('installing app asset type: %s', str(default_asset)) if not default_asset: # no asset type specified, nothing left to do return True if 'assets' not in self._config: # app is not configured with any assets return True if default_asset not in self._config['assets']: # if asset type is not listed in config, display warning and # continue Log.a().warning( 'unconfigured asset type specified: %s', str(default_asset) ) return True assets = self._config['assets'][default_asset] # install all components for asset for asset in assets: Log.some().info('app asset:\n%s', pprint.pformat(asset)) if 'type' not in asset: Log.a().warning('asset type missing for app "%s"', self._app['name']) continue if asset['type'] == 'copy': if not self._copy_asset(asset): Log.a().warning( 'cannot copy assets for app "%s"', self._app['name'] ) continue elif asset['type'] == 'build': if not self._build_asset(asset): Log.a().warning( 'cannot build assets for app "%s"', self._app['name'] ) continue else: Log.a().warning( 'invalid asset type "%s" for app "%s"', asset['type'], self._app['name'] ) return True
def _build_asset(self, asset): """ Build app assets. Args: self: class instance asset: what to build Returns: On success: True. On failure: False. """ # make sure the build path exists build_path = self._path / 'build' build_path.mkdir(exist_ok=True) build_repo_path = None if not asset.get('folder'): Log.a().warning( 'repo folder must be set when specifying a build asset' ) return False # clone build repo build_repo_path = build_path / asset['folder'] if asset.get('repo'): # if repo is set, clone and build it try: if asset.get('tag'): Repo.clone_from( asset['repo'], str(build_repo_path), branch=asset['tag'], config='http.sslVerify=false' ) else: Repo.clone_from( asset['repo'], str(build_repo_path), config='http.sslVerify=false' ) except GitError as err: Log.an().error( 'cannot clone git repo for build: %s [%s]', asset['repo'], str(err) ) return False # if repo is not set, packaged build scripts are included with the # workflow in the build_repo_path # build cmd = 'make -C "{}"'.format(str(build_repo_path)) Log.some().info('build command: %s', cmd) cmd_result = ShellWrapper.invoke(cmd) if cmd_result is False: Log.a().warning('cannot build app: %s', cmd) return False Log.some().info('make stdout: %s', cmd_result) # move built assets # make sure asset folder exists if not asset.get('dst'): Log.a().warning('asset dst required for app %s', self._app['name']) return False if not asset.get('src'): Log.a().warning('asset src required for app %s', self._app['name']) return False # create asset destination asset_path = self._path / asset['dst'] asset_path.mkdir(exist_ok=True) # set src path src_path = self._path / asset['src'] if 'zip' in asset: # create a tar.gz of src cmd = 'tar -czf "{}" --directory="{}" .'.format( str(asset_path / '{}.tar.gz'.format(asset['zip'])), str(src_path) ) Log.some().info('zipping: %s', cmd) cmd_result = ShellWrapper.invoke(cmd) if cmd_result is False: Log.a().warning('cannot zip asset src: %s', cmd) return False Log.some().info('tar stdout: %s', cmd_result) else: # move without creating tar.gz cmd = 'mv "{}" "{}"'.format(str(src_path), str(asset_path)) Log.some().info('moving: %s', cmd) cmd_result = ShellWrapper.invoke(cmd) if cmd_result is False: Log.a().warning('cannot move asset src: %s', cmd) return False Log.some().info('mv stdout: %s', cmd_result) return True
def load(self, yaml_path): """ Load and validate GeneFlow definition from a multi-doc YAML file. Read a GeneFlow definition file, which can contain apps, workflows, and jobs. Loaded docs are appended to the _apps, _workflows, and _jobs arrays. Load may be called multiple times. Docs are only added if successfully validated. Args: yaml_path: path to GeneFlow YAML definition file. Returns: On success: True On failure: False. """ # load multi-doc yaml file gf_def = self.load_yaml(yaml_path) if gf_def is False: Log.an().error('cannot load yaml file: %s', yaml_path) return False # iterate through yaml docs for gf_doc in gf_def: # class must be specified, either app or workflow if 'class' not in gf_doc: Log.a().error('unspecified document class') return False if gf_doc['class'] == 'app': if 'apps' in gf_doc: # this is a list of apps for app in gf_doc['apps']: if not self.add_app(app): Log.an().error('invalid app in definition: %s', yaml_path) return False else: # only one app if not self.add_app(gf_doc): Log.an().error('invalid app in definition: %s', yaml_path) return False elif gf_doc['class'] == 'workflow': # only one workflow per yaml file allowed if not self.add_workflow(gf_doc): Log.an().error('invalid workflow in definition: %s', yaml_path) return False elif gf_doc['class'] == 'job': if 'jobs' in gf_doc: # this is a list of jobs for job in gf_doc['jobs']: if not self.add_job(job): Log.an().error('invalid job in definition: %s', yaml_path) return False else: # only one job if not self.add_job(gf_doc): Log.an().error('invalid job in definition: %s', yaml_path) return False else: Log.a().error('invalid document class: %s', gf_doc['class']) return False return True
def _run_map(self, map_item): """ Run a job for each map item and store the proc and PID. Args: self: class instance. map_item: map item object (item of self._map). Returns: On success: True. On failure: False. """ # load default app inputs, overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: inputs[input_key] = map_item['template'][input_key] else: if self._app['inputs'][input_key]['default']: inputs[input_key] = self._app['inputs'][input_key]['default'] # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: parameters[param_key] = map_item['template'][param_key] else: if self._app['parameters'][param_key]['default'] not in [None, '']: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # construct shell command cmd = self._app['implementation']['local']['script'] for input_key in inputs: if inputs[input_key]: cmd += ' --{}="{}"'.format( input_key, URIParser.parse(inputs[input_key])['chopped_path'] ) for param_key in parameters: if param_key == 'output': cmd += ' --output="{}/{}"'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], parameters['output'] ) else: cmd += ' --{}="{}"'.format( param_key, parameters[param_key] ) # add exeuction method cmd += ' --exec_method="{}"'.format(self._step['execution']['method']) # specify execution init commands if 'init' param given if 'init' in self._step['execution']['parameters']: cmd += ' --exec_init="{}"'.format(self._step['execution']['parameters']['init']) # add stdout and stderr log_path = '{}/_log/gf-{}-{}-{}'.format( self._parsed_data_uris[self._source_context]['chopped_path'], map_item['attempt'], slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'), slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+') ) cmd += ' > "{}.out" 2> "{}.err"'.format(log_path, log_path) Log.a().debug('command: %s', cmd) # launch process proc = ShellWrapper.spawn(cmd) if proc is False: msg = 'shell process error: {}'.format(cmd) Log.an().error(msg) return self._fatal(msg) # record job info map_item['run'][map_item['attempt']]['proc'] = proc map_item['run'][map_item['attempt']]['pid'] = proc.pid # set status of process map_item['status'] = 'RUNNING' map_item['run'][map_item['attempt']]['status'] = 'RUNNING' return True
def register_agave_app(self, agave, agave_config, agave_params, agave_publish): """ Register app in Agave. Args: self: class instance Returns: On success: True. On failure: False. """ Log.some().info('registering agave app %s', str(self._path)) Log.some().info('app version: %s', self._config['version']) # compile agave app template if not TemplateCompiler.compile_template( self._path, 'agave-app-def.json.j2', self._path / 'agave-app-def.json', version=self._config['version'], agave=agave_params['agave'] ): Log.a().warning( 'cannot compile agave app "%s" definition from template', self._app['name'] ) return False # create main apps URI parsed_agave_apps_uri = URIParser.parse( 'agave://{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'] ) ) Log.some().info( 'creating main apps uri: %s', parsed_agave_apps_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_agave_apps_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning('cannot create main agave apps uri') return False # delete app uri if it exists parsed_app_uri = URIParser.parse( 'agave://{}/{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'], self._app['folder'] ) ) Log.some().info( 'deleting app uri if it exists: %s', parsed_app_uri['chopped_uri'] ) if not DataManager.delete( parsed_uri=parsed_app_uri, agave={ 'agave': agave, 'agave_config': agave_config } ): # log warning, but ignore.. deleting non-existant uri returns False Log.a().warning( 'cannot delete app uri: %s', parsed_app_uri['chopped_uri'] ) # upload app assets parsed_assets_uri = URIParser.parse(str(self._path / 'assets')) Log.some().info( 'copying app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_assets_uri, parsed_dest_uri=parsed_app_uri, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) return False # upload test script parsed_test_uri = URIParser.parse( '{}/{}'.format( parsed_app_uri['chopped_uri'], 'test' ) ) Log.some().info( 'creating test uri: %s', parsed_test_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_test_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot create test uri: %s', parsed_test_uri['chopped_uri'] ) return False parsed_local_test_script = URIParser.parse( str(self._path / 'test' / 'test.sh') ) parsed_agave_test_script = URIParser.parse( '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh') ) Log.some().info( 'copying test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_local_test_script, parsed_dest_uri=parsed_agave_test_script, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) return False # update existing app, or register new app Log.some().info('registering agave app') app_definition = self._yaml_to_dict( str(self._path / 'agave-app-def.json') ) if not app_definition: Log.a().warning( 'cannot load agave app definition: %s', str(self._path / 'agave-app-def.json') ) return False agwrap = AgaveAppsAddUpdate( agave, agave_config ) app_add_result = agwrap.call(app_definition) if not app_add_result: Log.a().warning( 'cannot register agave app:\n%s', pprint.pformat(app_definition) ) return False register_result = {} # publish app if agave_publish: Log.some().info('publishing agave app') agwrap = AgaveAppsPublish( agave, agave_config ) app_publish_result = agwrap.call(app_add_result['id']) if not app_publish_result: Log.a().warning( 'cannot publish agave app: %s', app_add_result['id'] ) return False # return published id and revision register_result = { 'id': app_publish_result['id'], 'version': self._config['version'], 'revision': 'u{}'.format(app_publish_result['revision']) } else: # return un-published id and blank revision register_result = { 'id': app_add_result['id'], 'version': self._config['version'], 'revision': '' } return register_result
def install_apps(self): """ Install apps for the workflow package. Args: self: class instance. Returns: None """ apps_path = Path(self._path) / 'workflow' / 'apps' if self._clean: # remove apps folder if apps_path.is_dir(): shutil.rmtree(str(apps_path)) # create apps folder if not already there apps_path.mkdir(exist_ok=True) for app in self._apps_repo['apps']: if self._app_name == app['name'] or not self._app_name: Log.some().info('app:\n%s', pprint.pformat(app)) repo_path = apps_path / app['folder'] # create AppInstaller instance app_installer = AppInstaller(str(repo_path), app, self._app_asset, self._copy_prefix) # clone app into install location if not app_installer.clone_git_repo(): Log.an().error('cannot clone app to %s', str(repo_path)) # skip app continue if not app_installer.load_config(): Log.an().error('cannot load app config.yaml') # skip app continue if self._make_apps: if not app_installer.make(): Log.an().error('cannot compile app templates') # skip app continue if not app_installer.install_assets(): Log.an().error('cannot install app assets') # skip app continue # register in Agave if (self._agave and self._agave_params and self._agave_params.get('agave')): register_result = app_installer.register_agave_app( self._agave, self._config['agave'], self._agave_params, self._agave_publish) if not register_result: Log.a().warning('cannot register app "%s" in agave', app['name']) # skip app continue Log.some().info('registered agave app:\n%s', pprint.pformat(register_result)) # compile jinja template for published app definition if not TemplateCompiler.compile_template( repo_path, 'app.yaml.j2', repo_path / 'app.yaml', agave=self._agave_params['agave'], version=register_result['version'], revision=register_result['revision']): Log.a().warning( 'cannot compile app "%s" definition from template', app['name']) # skip app continue else: # compile jinja template for app definition if not TemplateCompiler.compile_template( repo_path, 'app.yaml.j2', repo_path / 'app.yaml'): Log.a().warning( 'cannot compile app "%s" definition from template', app['name']) # skip app continue return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map). Returns: On success: True. On failure: False. """ # load default app inputs, overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: inputs[input_key] = map_item['template'][input_key] else: if self._app['inputs'][input_key]['default']: inputs[input_key] = self._app['inputs'][input_key][ 'default'] # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: parameters[param_key] = map_item['template'][param_key] else: if self._app['parameters'][param_key]['default'] not in [ None, '' ]: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # get full path of wrapper script path = shutil.which(self._app['implementation']['local']['script']) if not path: msg = 'wrapper script not found in path: %s'.format( self._app['implementation']['local']['script']) Log.an().error(msg) return self._fatal(msg) # construct argument list for wrapper script args = [path] for input_key in inputs: if inputs[input_key]: args.append('--{}={}'.format( input_key, URIParser.parse(inputs[input_key])['chopped_path'])) for param_key in parameters: if param_key == 'output': args.append('--output={}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], parameters['output'] )) else: args.append('--{}={}'.format(param_key, parameters[param_key])) # add exeuction method args.append('--exec_method={}'.format( self._step['execution']['method'])) # specify execution init commands if 'init' param given if 'init' in self._step['execution']['parameters']: args.append('--exec_init={}'.format( self._step['execution']['parameters']['init'])) Log.a().debug('[step.%s]: command: %s -> %s', self._step['name'], map_item['template']['output'], ' '.join(args)) # construct job name name = 'gf-{}-{}-{}'.format( map_item['attempt'], slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'), slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')) # construct paths for logging stdout and stderr log_path = '{}/_log/{}'.format( self._parsed_data_uris[self._source_context][0]['chopped_path'], name) # create and populate job template jt = self._slurm['drmaa_session'].createJobTemplate() jt.remoteCommand = '/bin/bash' jt.args = args jt.jobName = name jt.errorPath = ':{}.err'.format(log_path) jt.outputPath = ':{}.out'.format(log_path) # pass execution parameters to job template native_spec = ' --nodes=1 --ntasks=1' if 'queue' in self._step['execution']['parameters']: native_spec += ' -p {}'.format( self._step['execution']['parameters']['queue']) if 'slots' in self._step['execution']['parameters']: native_spec += ' --cpus-per-task={}'.format( self._step['execution']['parameters']['slots']) if 'other' in self._step['execution']['parameters']: native_spec += ' {}'.format( self._step['execution']['parameters']['other']) jt.nativeSpecification = native_spec # submit hpc job using drmaa library try: job_id = self._slurm['drmaa_session'].runJob(jt) except drmaa.DrmCommunicationException as err: msg = 'cannot submit slurm job for step "{}" [{}]'\ .format(self._step['name'], str(err)) Log.a().warning(msg) # set to failed, but return True so that it's retried map_item['status'] = 'FAILED' map_item['run'][map_item['attempt']]['status'] = 'FAILED' return True self._slurm['drmaa_session'].deleteJobTemplate(jt) Log.a().debug('[step.%s]: hpc job id: %s -> %s', self._step['name'], map_item['template']['output'], job_id) # record job info map_item['run'][map_item['attempt']]['hpc_job_id'] = job_id # set status of process map_item['status'] = 'QUEUED' map_item['run'][map_item['attempt']]['status'] = 'QUEUED' return True
def check_running_jobs(self): """ Check the status/progress of all map-reduce items.. And update _map status. Args: self: class instance. Returns: True. """ # check if jobs are still running for map_item in self._map: map_item['status'] = self._agave['agave_wrapper'].jobs_get_status( map_item['run'][map_item['attempt']]['agave_job_id'] ) # for status failures, set to 'UNKNOWN' if not map_item['status']: msg = 'cannot get job status for step "{}"'\ .format(self._step['name']) Log.a().warning(msg) map_item['status'] = 'UNKNOWN' # set status of run-attempt map_item['run'][map_item['attempt']]['status'] = map_item['status'] # check hpc job ids if map_item['run'][map_item['attempt']]['hpc_job_id']: # already have it continue # job id listed in history response = self._agave['agave_wrapper'].jobs_get_history( map_item['run'][map_item['attempt']]['agave_job_id'] ) if not response: msg = 'cannot get hpc job id for job: agave_job_id={}'.format( map_item['run'][map_item['attempt']]['agave_job_id'] ) Log.a().warning(msg) continue for item in response: if item['status'] == 'QUEUED': match = re.match( r'^HPC.*local job (\d*)$', item['description'] ) if match: map_item['run'][map_item['attempt']]['hpc_job_id'] \ = match.group(1) # log hpc job id Log.some().debug( '[step.%s]: hpc job id: %s -> %s', self._step['name'], map_item['template']['output'], match.group(1) ) break if map_item['status'] == 'FAILED' and map_item['attempt'] < 5: # retry job if not at limit if not self.retry_failed(map_item): Log.a().warning( '[step.%s]: cannot retry failed agave job (%s)', self._step['name'], map_item['template']['output'] ) self._update_status_db(self._status, '') return True
def check_running_jobs(self): """ Check the status/progress of all map-reduce items.. And update _map status. Args: self: class instance. Returns: True. """ # check if jobs are still running for map_item in self._map: if map_item['status'] not in ['FINISHED','FAILED','PENDING']: map_item['status'] = self._agave['agave_wrapper'].jobs_get_status( map_item['run'][map_item['attempt']]['agave_job_id'] ) # for status failures, set to 'UNKNOWN' if not map_item['status']: msg = 'cannot get job status for step "{}"'\ .format(self._step['name']) Log.a().warning(msg) map_item['status'] = 'UNKNOWN' if map_item['status'] in ['FINISHED','FAILED']: # status changed to finished or failed Log.a().debug( '[step.%s]: exit status: %s -> %s', self._step['name'], map_item['template']['output'], map_item['status'] ) # decrease num running procs if self._num_running > 0: self._num_running -= 1 # check hpc job ids if ( map_item['status'] != 'PENDING' \ and not map_item['run'][map_item['attempt']].get('hpc_job_id', '') ): # job id listed in history response = self._agave['agave_wrapper'].jobs_get_history( map_item['run'][map_item['attempt']]['agave_job_id'] ) if not response: msg = 'cannot get hpc job id for job: agave_job_id={}'.format( map_item['run'][map_item['attempt']]['agave_job_id'] ) Log.a().warning(msg) else: for item in response: if item['status'] == 'QUEUED': match = re.match( r'^HPC.*local job (\d*)$', item['description'] ) if match: map_item['run'][map_item['attempt']]['hpc_job_id'] \ = match.group(1) # log hpc job id Log.some().debug( '[step.%s]: hpc job id: %s -> %s', self._step['name'], map_item['template']['output'], match.group(1) ) break map_item['run'][map_item['attempt']]['status'] = map_item['status'] if map_item['status'] == 'FAILED' and map_item['attempt'] < 5: if self._throttle_limit == 0 or self._num_running < self._throttle_limit: # retry job if not at retry or throttle limit if not self.retry_failed(map_item): Log.a().warning( '[step.%s]: cannot retry failed agave job (%s)', self._step['name'], map_item['template']['output'] ) else: self._num_running += 1 self._update_status_db(self._status, '') return True
def run(args, other_args, subparser): """ Run GeneFlow workflow engine. Args: args.workflow_path: workflow definition or package directory. args.job: path to job definition Returns: On success: True. On failure: False. """ # get absolute path to workflow workflow_path = resolve_workflow_path(args.workflow_path) if workflow_path: Log.some().info('workflow definition found: %s', workflow_path) else: Log.an().error('cannot find workflow definition: %s', args.workflow_path) return False # setup environment env = Environment(workflow_path=workflow_path) if not env.initialize(): Log.an().error('cannot initialize geneflow environment') return False # create default config file and SQLite db cfg = Config() cfg.default(env.get_sqlite_db_path()) cfg.write(env.get_config_path()) config_dict = cfg.config('local') # load workflow into db try: data_source = DataSource(config_dict['database']) except DataSourceException as err: Log.an().error('data source initialization error [%s]', str(err)) return False defs = data_source.import_definition(workflow_path) if not defs: Log.an().error('workflow definition load failed: %s', workflow_path) return False if not defs['workflows']: Log.an().error('workflow definition load failed: %s', workflow_path) return False data_source.commit() for workflow in defs['workflows']: Log.some().info('workflow loaded: %s -> %s', workflow, defs['workflows'][workflow]) # get workflow definition back from database to ensure # that it's a valid definition workflow_id = next(iter(defs['workflows'].values())) workflow_dict = data_source.get_workflow_def_by_id(workflow_id) if not workflow_dict: Log.an().error( 'cannot get workflow definition from data source: workflow_id=%s', workflow_id) return False ### define arg parsing methods def parse_dynamic_args(workflow_dict): """ Parse dynamic args based on workflow dictionary as well as some static args. Args: other_args: List of remaining args from initial parse of workflow path. workflow_dict: Workflow dictionary Returns: On success: List of parsed arguments. On failure: False. """ # parse dynamic args. these are determined from workflow definition dynamic_parser = argparse.ArgumentParser() dynamic_parser.add_argument('-j', '--job', type=str, default=None, dest='job_path', help='Job Definition(s)') for input_key in workflow_dict['inputs']: dynamic_parser.add_argument( '--in.{}'.format(input_key), dest='inputs.{}'.format(input_key), required=False, default=workflow_dict['inputs'][input_key]['default'], help=workflow_dict['inputs'][input_key]['label']) for param_key in workflow_dict['parameters']: dynamic_parser.add_argument( '--param.{}'.format(param_key), dest='parameters.{}'.format(param_key), required=False, default=workflow_dict['parameters'][param_key]['default'], help=workflow_dict['parameters'][param_key]['label']) dynamic_parser.add_argument('-o', '--output', type=str, default='~/geneflow-output', help='Output Folder') dynamic_parser.add_argument('-n', '--name', type=str, default='geneflow-job', help='Name of Job') dynamic_parser.add_argument('-w', '--work', nargs='+', type=str, default=[], help='Work Directory') dynamic_parser.add_argument('--exec-context', '--ec', nargs='+', type=str, dest='exec_context', default=[], help='Execution Contexts') dynamic_parser.add_argument('--exec-method', '--em', nargs='+', type=str, dest='exec_method', default=[], help='Execution Methods') dynamic_parser.add_argument('--exec-param', '--ep', nargs='+', type=str, dest='exec_param', default=[], help='Execution Parameters') dynamic_args = dynamic_parser.parse_known_args(other_args) return dynamic_args[0] if 'gooey' in sys.modules: @Gooey(program_name='GeneFlow: {}'.format(workflow_dict['name']), program_description=workflow_dict['description'], target='gf --log-level={} run {}'.format( args.log_level, args.workflow_path), monospace_display=True) def parse_dynamic_args_gui(workflow_dict): """ Parse dynamic args based on workflow dictionary as well as some static args. Display a GUI interface. Args: other_args: List of remaining args from initial parse of workflow path. workflow_dict: Workflow dictionary Returns: On success: List of parsed arguments. On failure: False. """ # parse dynamic args. these are determined from workflow definition dynamic_parser = GooeyParser() input_group = dynamic_parser.add_argument_group( "Workflow Inputs", "Files or folders to be passed to the workflow") for input_key in workflow_dict['inputs']: widget = 'FileChooser' if workflow_dict['inputs'][input_key]['type'] == 'Directory': widget = 'DirChooser' input_group.add_argument( '--in.{}'.format(input_key), dest='inputs.{}'.format(input_key), required=False, default=workflow_dict['inputs'][input_key]['default'], help=workflow_dict['inputs'][input_key]['label'], widget=widget) param_group = dynamic_parser.add_argument_group( "Workflow Parameters", "Number or string parameters to be passed to the workflow") for param_key in workflow_dict['parameters']: param_group.add_argument( '--param.{}'.format(param_key), dest='parameters.{}'.format(param_key), required=False, default=workflow_dict['parameters'][param_key]['default'], help=workflow_dict['parameters'][param_key]['label']) job_group = dynamic_parser.add_argument_group( "Job Options", "Output/intermediate folders and job name") job_group.add_argument('-o', '--output', type=str, default='~/geneflow-output', help='Output Folder', widget='DirChooser') job_group.add_argument('-n', '--name', type=str, default='geneflow-job', help='Name of Job') job_group.add_argument('-w', '--work', nargs='+', type=str, default=[], help='Work Directory') exec_group = dynamic_parser.add_argument_group( "Execution Options", "Customize workflow execution") exec_group.add_argument('--exec-context', '--ec', nargs='+', type=str, dest='exec_context', default=[], help='Execution Contexts') exec_group.add_argument('--exec-method', '--em', nargs='+', type=str, dest='exec_method', default=[], help='Execution Methods') exec_group.add_argument('--exec-param', '--ep', nargs='+', type=str, dest='exec_param', default=[], help='Execution Parameters') dynamic_args = dynamic_parser.parse_args(other_args) return dynamic_args # get dynamic args if args.gui and 'gooey' in sys.modules: dynamic_args = parse_dynamic_args_gui(workflow_dict) else: dynamic_args = parse_dynamic_args(workflow_dict) # get absolute path to job file if provided job_path = None if dynamic_args.job_path: job_path = Path(dynamic_args.job_path).absolute() # load job definition if provided jobs_dict = {} gf_def = Definition() if job_path: if not gf_def.load(job_path): Log.an().error('Job definition load failed') return False jobs_dict = gf_def.jobs() else: # create default definition jobs_dict = { 'job': { 'name': 'GeneFlow job', 'output_uri': 'geneflow_output', 'work_uri': { 'local': '~/.geneflow/work' } } } # override with known cli parameters apply_job_modifiers(jobs_dict, [ 'name={}'.format(dynamic_args.name), 'output_uri={}'.format( dynamic_args.output) ]) # insert workflow name into job, if not provided workflow_name = next(iter(defs['workflows'])) for job in jobs_dict.values(): if 'workflow_name' not in job: job['workflow_name'] = workflow_name # add inputs and parameters to job definition apply_job_modifiers( jobs_dict, [ '{}={}'.format(dynamic_arg, getattr(dynamic_args, dynamic_arg)) for dynamic_arg in vars(dynamic_args) \ if dynamic_arg.startswith('inputs.') or dynamic_arg.startswith('parameters.') ] ) # add work URIs to job definition work_uris = {} for work_arg in dynamic_args.work: parsed_work_uri = URIParser.parse(work_arg) if not parsed_work_uri: # skip if invalid URI Log.a().warning('invalid work uri: %s', work_arg) else: work_uris[ parsed_work_uri['scheme']] = parsed_work_uri['chopped_uri'] apply_job_modifiers(jobs_dict, [ 'work_uri.{}={}'.format(context, work_uris[context]) for context in work_uris ]) # add execution options to job definition apply_job_modifiers(jobs_dict, [ 'execution.context.{}={}'.format(*exec_arg.split(':', 1)[0:2]) for exec_arg in dynamic_args.exec_context ] + [ 'execution.method.{}={}'.format(*exec_arg.split(':', 1)[0:2]) for exec_arg in dynamic_args.exec_method ] + [ 'execution.parameters.{}={}'.format(*exec_arg.split(':', 1)[0:2]) for exec_arg in dynamic_args.exec_param ]) # get default values from workflow definition for job in jobs_dict.values(): if 'inputs' not in job: job['inputs'] = {} if 'parameters' not in job: job['parameters'] = {} for input_key in workflow_dict['inputs']: if input_key not in job['inputs']: job['inputs'][input_key]\ = workflow_dict['inputs'][input_key]['default'] for param_key in workflow_dict['parameters']: if param_key not in job['parameters']: job['parameters'][param_key]\ = workflow_dict['parameters'][param_key]['default'] # expand URIs for job in jobs_dict.values(): # output URI parsed_uri = URIParser.parse(job['output_uri']) if not parsed_uri: Log.an().error('invalid output uri: %s', job['output_uri']) return False # expand relative path if local if parsed_uri['scheme'] == 'local': job['output_uri'] = str( Path(parsed_uri['chopped_path']).expanduser().resolve()) # work URIs for context in job['work_uri']: parsed_uri = URIParser.parse(job['work_uri'][context]) if not parsed_uri: Log.an().error('invalid work uri: %s', job['work_uri']) return False # expand relative path if local if parsed_uri['scheme'] == 'local': job['work_uri'][context] = str( Path(parsed_uri['chopped_path']).expanduser().resolve()) # input URIs for input_key in job['inputs']: parsed_uri = URIParser.parse(job['inputs'][input_key]) if not parsed_uri: Log.an().error('invalid input uri: %s', job['inputs'][input_key]) return False # expand relative path if local if parsed_uri['scheme'] == 'local': job['inputs'][input_key] = str( Path(parsed_uri['chopped_path']).expanduser().resolve()) # import jobs into database job_ids = data_source.import_jobs_from_dict(jobs_dict) if job_ids is False: Log.an().error('cannot import jobs') return False data_source.commit() # create process pool to run workflows in parallel pool = Pool(min(5, len(job_ids))) jobs = [{'name': job, 'id': job_ids[job], 'log': None} for job in job_ids] result = pool.map( partial(geneflow.cli.common.run_workflow, config=config_dict, log_level=args.log_level), jobs) pool.close() pool.join() if not all(result): Log.an().error('some jobs failed') return result
def run_pending(args): """ Run any jobs in database in the PENDING state. Args: args.config_file: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ config_file = args.config_file environment = args.environment log_location = args.log_location # load config file cfg = Config() if not cfg.load(config_file): Log.an().error('cannot load config file: %s', config_file) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False # connect to data source try: data_source = DataSource(config_dict['database']) except DataSourceException as err: Log.an().error('data source initialization error [%s]', str(err)) return False # get pending jobs from database pending_jobs = data_source.get_pending_jobs() if pending_jobs is False: Log.an().error('cannot query for pending jobs') return False if not pending_jobs: # no jobs found return True Log.some().info('pending jobs found:\n%s', pprint.pformat(pending_jobs)) # set job status to QUEUED to minimize the chance that another # process will try to run it for job in pending_jobs: if not data_source.update_job_status(job['id'], 'QUEUED', ''): Log.a().warning('cannot update job status in data source') data_source.rollback() data_source.commit() # create a thread pool to run at most 5 jobs concurrently pool = Pool(min(5, len(pending_jobs))) jobs = [{ 'name': job['name'], 'id': job['id'], 'log': str(Path(log_location) / (job['id'] + '.log')) } for job in pending_jobs] result = pool.map( partial(geneflow.cli.common.run_workflow, config=config_dict, log_level=args.log_level), jobs) pool.close() pool.join() if not all(result): Log.an().error('some jobs failed') return result