def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], '_log' ) # copy data for each map item for map_item in self._map: # copy step output if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context]['authority'], self._parsed_data_uris[self._source_context]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check for any agave log files (*.out and *.err files) agave_log_list = DataManager.list( uri=map_item['run'][map_item['attempt']]['archive_uri'], agave=self._agave) if not agave_log_list: msg = 'cannot get agave log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err for item in agave_log_list: if re.match('^gf-\d*-.*\.(out|err)$', item): if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], '_log' ), item, '{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], item ) ): msg = 'cannot copy agave log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log') if DataManager.exists(uri=src_log_dir, agave=self._agave): # create dest _log dir if it doesn't exist if not DataManager.exists(uri=dest_log_dir, agave=self._agave): if not DataManager.mkdir(uri=dest_log_dir, agave=self._agave): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list(uri=src_log_dir, agave=self._agave) if not log_list: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True
def _init_data_uri(self): """ Create output data URI for the source context (agave). Args: self: class instance. Returns: On success: True. On failure: False. """ # make sure the source data URI has a compatible scheme (agave) if self._parsed_data_uris[self._source_context][0]['scheme'] != 'agave': msg = 'invalid data uri scheme for this step: {}'.format( self._parsed_data_uris[self._source_context][0]['scheme'] ) Log.an().error(msg) return self._fatal(msg) # delete folder if it already exists and clean==True if ( DataManager.exists( parsed_uri=self._parsed_data_uris[self._source_context][0], agave=self._agave ) and self._clean ): if not DataManager.delete( parsed_uri=self._parsed_data_uris[self._source_context][0], agave=self._agave ): Log.a().warning( 'cannot delete existing data uri: %s', self._parsed_data_uris[self._source_context][0]['chopped_uri'] ) # create folder if not DataManager.mkdir( parsed_uri=self._parsed_data_uris[self._source_context][0], recursive=True, agave=self._agave ): msg = 'cannot create data uri: {}'.format( self._parsed_data_uris[self._source_context][0]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create _log folder if not DataManager.mkdir( uri='{}/_log'.format( self._parsed_data_uris[self._source_context][0]['chopped_uri'] ), recursive=True, agave=self._agave ): msg = 'cannot create _log folder in data uri: {}/_log'.format( self._parsed_data_uris[self._source_context][0]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def upload_agave_test_data(self): """ Upload Agave test data from workflow package. Args: self: class instance. Returns: None """ if ( not self._agave_wrapper or not self._agave_params or not self._agave_params.get('agave') ): Log.a().warning('must provide agave parameters to upload test data') return False # create main test data URI parsed_base_test_uri = URIParser.parse( 'agave://{}/{}'.format( self._agave_params['agave']['deploymentSystem'], self._agave_params['agave']['testDataDir'] ) ) Log.some().info( 'creating base test data uri: %s', parsed_base_test_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_base_test_uri, recursive=True, agave={ 'agave_wrapper': self._agave_wrapper } ): Log.a().warning( 'cannot create base test data uri: %s', parsed_base_test_uri['chopped_uri'] ) return False # upload test data parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data')) parsed_agave_test_uri = URIParser.parse( '{}/{}'.format( parsed_base_test_uri['chopped_uri'], Path(self._path).name ) ) Log.some().info( 'copying test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_local_test_uri, parsed_dest_uri=parsed_agave_test_uri, local={}, agave={ 'agave_wrapper': self._agave_wrapper } ): Log.a().warning( 'cannot copy test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri'] ) return False return True
def _init_context_uris(self): """ Generate all context URIs for this workflow run. Context URIs are generated based on contexts given in _parsed_job_work_uri, and the "final" context for steps given in the _parsed_job_output_uri. Args: None. Returns: On failure: Raises WorkflowDAGException. """ self._context_uris['inputs'] = {} self._context_uris['steps'] = {'final': {}} self._parsed_context_uris['inputs'] = {} self._parsed_context_uris['steps'] = {'final': {}} # init all data contexts for context in { Contexts.get_data_scheme_of_exec_context(con) for con in self._exec_contexts } | self._data_contexts: self._context_uris['inputs'][context] = {} self._parsed_context_uris['inputs'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'input': if node['source_context'] == context: # use original input URI parsed_uri = URIParser.parse( self._workflow['inputs'][node['name']]['value']) if not parsed_uri: msg = 'invalid input uri: {}'.format( self._workflow['inputs'][ node['name']]['value']) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = parsed_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = parsed_uri else: # skip if _parsed_job_work_uri is not defined for this context # this implies that there is no execution defined for that context, # so no need to setup the data staging location at the work_uri if context not in self._parsed_job_work_uri: continue # switch context of input URI new_base_uri = '{}/_input-{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')) # create new base URI if not DataManager.mkdir( uri=new_base_uri, recursive=True, **{context: self._context_options[context]}): msg = 'cannot create new base uri for input: {}'\ .format(new_base_uri) Log.an().error(msg) raise WorkflowDAGException(msg) # switch input URI base switched_uri = URIParser.switch_context( self._workflow['inputs'][node['name']]['value'], new_base_uri) if not switched_uri: msg = ( 'cannot switch input uri context to ' 'new base URI: {}->{}' ).format( self._workflow['inputs'][node['name']]\ ['value'], new_base_uri ) Log.an().error(msg) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = switched_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = switched_uri for context in { Contexts.get_data_scheme_of_exec_context(con) for con in self._exec_contexts }: self._context_uris['steps'][context] = {} self._parsed_context_uris['steps'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps'][context][node['name']]\ = '{}/{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+') ) self._parsed_context_uris['steps'][context][node['name']]\ = URIParser.parse( self._context_uris['steps'][context][node['name']] ) # init final contexts for steps for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps']['final'][node['name']]\ = '{}/{}'.format( self._parsed_job_output_uri['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+') ) self._parsed_context_uris['steps']['final'][node['name']]\ = URIParser.parse( self._context_uris['steps']['final'][node['name']] )
def register_agave_app(self, agave, agave_config, agave_params, agave_publish): """ Register app in Agave. Args: self: class instance Returns: On success: True. On failure: False. """ Log.some().info('registering agave app %s', str(self._path)) Log.some().info('app version: %s', self._config['version']) # compile agave app template if not TemplateCompiler.compile_template( self._path, 'agave-app-def.json.j2', self._path / 'agave-app-def.json', version=self._config['version'], agave=agave_params['agave'] ): Log.a().warning( 'cannot compile agave app "%s" definition from template', self._app['name'] ) return False # create main apps URI parsed_agave_apps_uri = URIParser.parse( 'agave://{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'] ) ) Log.some().info( 'creating main apps uri: %s', parsed_agave_apps_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_agave_apps_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning('cannot create main agave apps uri') return False # delete app uri if it exists parsed_app_uri = URIParser.parse( 'agave://{}/{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'], self._app['folder'] ) ) Log.some().info( 'deleting app uri if it exists: %s', parsed_app_uri['chopped_uri'] ) if not DataManager.delete( parsed_uri=parsed_app_uri, agave={ 'agave': agave, 'agave_config': agave_config } ): # log warning, but ignore.. deleting non-existant uri returns False Log.a().warning( 'cannot delete app uri: %s', parsed_app_uri['chopped_uri'] ) # upload app assets parsed_assets_uri = URIParser.parse(str(self._path / 'assets')) Log.some().info( 'copying app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_assets_uri, parsed_dest_uri=parsed_app_uri, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) return False # upload test script parsed_test_uri = URIParser.parse( '{}/{}'.format( parsed_app_uri['chopped_uri'], 'test' ) ) Log.some().info( 'creating test uri: %s', parsed_test_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_test_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot create test uri: %s', parsed_test_uri['chopped_uri'] ) return False parsed_local_test_script = URIParser.parse( str(self._path / 'test' / 'test.sh') ) parsed_agave_test_script = URIParser.parse( '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh') ) Log.some().info( 'copying test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_local_test_script, parsed_dest_uri=parsed_agave_test_script, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) return False # update existing app, or register new app Log.some().info('registering agave app') app_definition = self._yaml_to_dict( str(self._path / 'agave-app-def.json') ) if not app_definition: Log.a().warning( 'cannot load agave app definition: %s', str(self._path / 'agave-app-def.json') ) return False agwrap = AgaveAppsAddUpdate( agave, agave_config ) app_add_result = agwrap.call(app_definition) if not app_add_result: Log.a().warning( 'cannot register agave app:\n%s', pprint.pformat(app_definition) ) return False register_result = {} # publish app if agave_publish: Log.some().info('publishing agave app') agwrap = AgaveAppsPublish( agave, agave_config ) app_publish_result = agwrap.call(app_add_result['id']) if not app_publish_result: Log.a().warning( 'cannot publish agave app: %s', app_add_result['id'] ) return False # return published id and revision register_result = { 'id': app_publish_result['id'], 'version': self._config['version'], 'revision': 'u{}'.format(app_publish_result['revision']) } else: # return un-published id and blank revision register_result = { 'id': app_add_result['id'], 'version': self._config['version'], 'revision': '' } return register_result
def _init_context_uris(self): """ Generate all context URIs for this workflow run. Context URIs are generated based on contexts given in _parsed_job_work_uri, and the "final" context for steps given in the _parsed_job_output_uri. Args: None. Returns: On failure: Raises WorkflowDAGException. """ self._context_uris['inputs'] = {} self._context_uris['steps'] = {'final': {}} self._parsed_context_uris['inputs'] = {} self._parsed_context_uris['steps'] = {'final': {}} # init contexts in parsed_job_work_uri for inputs and steps for context in self._parsed_job_work_uri: self._context_uris['inputs'][context] = {} self._context_uris['steps'][context] = {} self._parsed_context_uris['inputs'][context] = {} self._parsed_context_uris['steps'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'input': if node['source_context'] == context: # use original input URI parsed_uri = URIParser.parse( self._workflow['inputs'][node['name']]['value']) if not parsed_uri: msg = 'invalid input uri: {}'.format( self._workflow['inputs'][ node['name']]['value']) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = parsed_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = parsed_uri else: # switch context of input URI new_base_uri = '{}/_input-{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'])) # create new base URI if not DataManager.mkdir( uri=new_base_uri, recursive=True, **{context: self._context_options[context]}): msg = 'cannot create new base uri for input: {}'\ .format(new_base_uri) Log.an().error(msg) raise WorkflowDAGException(msg) # switch input URI base switched_uri = URIParser.switch_context( self._workflow['inputs'][node['name']]['value'], new_base_uri) if not switched_uri: msg = ( 'cannot switch input uri context to ' 'new base URI: {}->{}' ).format( self._workflow['inputs'][node['name']]\ ['value'], new_base_uri ) Log.an().error(msg) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = switched_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = switched_uri else: # node['type'] == 'step' self._context_uris['steps'][context][node['name']]\ = '{}/{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name']) ) self._parsed_context_uris['steps'][context][node['name']]\ = URIParser.parse( self._context_uris['steps'][context][node['name']] ) # init final contexts for steps for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps']['final'][node['name']]\ = '{}/{}'.format( self._parsed_job_output_uri['chopped_uri'], slugify(node['name']) ) self._parsed_context_uris['steps']['final'][node['name']]\ = URIParser.parse( self._context_uris['steps']['final'][node['name']] )
def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], '_log' ) # create instance of agave wrapper class for data import agwrap = AgaveFilesImportDataFromAgave( self._agave['agave'], self._config['agave'] ) # copy data for each map item for map_item in self._map: # copy step output if not agwrap.call( self._parsed_data_uris[self._source_context]['authority'], self._parsed_data_uris[self._source_context]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log' ) if DataManager.exists( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): # create dest _log dir if it doesn't exist if not DataManager.exists( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): if not DataManager.mkdir( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ) if not log_list: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not agwrap.call( self._parsed_data_uris[self._source_context]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True