def get_extra_file_format(event): '''if the file extension matches the regular file format, returns None if it matches one of the format of an extra file, returns that format (e.g. 'pairs_px2' ''' # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) upload_key = event['Records'][0]['s3']['object']['key'] uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] extension = object_key.replace(accession, '') tibanna = Tibanna(env=env) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta: file_format = meta.get('file_format') fe_map = get_format_extension_map(tibanna.ff_keys) file_extension = fe_map.get(file_format) if extension == file_extension: return None else: for extra in meta.get('extra_files', []): extra_format = extra.get('file_format') extra_extension = fe_map.get(extra_format) if extension == extra_extension: return extra_format raise Exception("file extension not matching") else: raise Exception("Cannot get input metadata")
def get_status_for_extra_file(event, extra_format): if not extra_format: return None upload_key = event['Records'][0]['s3']['object']['key'] if upload_key.endswith('html'): return False uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) try: tibanna = Tibanna(env=env) except Exception as e: raise TibannaStartException("%s" % e) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta and 'extra_files' in meta: for exf in meta['extra_files']: if parse_formatstr(exf['file_format']) == extra_format: return exf.get('status', None) return None
def test_proc_file_for_arg_name(run_awsem_event_data_processed_files, proc_file_in_webdev): of = [{ "workflow_argument_name": "output_file1", "uuid": proc_file_in_webdev['uuid'] }, { "workflow_argument_name": "output_file2", "uuid": "f4864029-a8ad-4bb8-93e7-5108f46bbbbb" }] tibanna_settings = run_awsem_event_data_processed_files.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna( env, ff_keys=run_awsem_event_data_processed_files.get('ff_keys'), settings=tibanna_settings) file_with_type = proc_file_in_webdev.copy() file_with_type['@type'] = ['FileProcessed', 'Item', 'whatever'] with mock.patch('core.pony_utils.get_metadata', return_value=file_with_type): pf, resp = proc_file_for_arg_name(of, 'output_file1', tibanna) assert type(pf) == ProcessedFileMetadata assert pf.__dict__ == proc_file_in_webdev
def test_handle_processed_files(run_awsem_event_data_secondary_files): data = run_awsem_event_data_secondary_files tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) workflow_uuid = data['workflow_uuid'] wf_meta = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object') output_files, pf_meta = create_wfr_output_files_and_processed_files(wf_meta, tibanna) assert(output_files) assert len(output_files) == 3 for of in output_files: if of['format'] == 'pairs': assert of['secondary_file_formats'] == ['pairs_px2'] assert of['extra_files'] else: assert 'secondary_files_formats' not in of assert(pf_meta) assert len(pf_meta) == 3 for pf in pf_meta: pdict = pf.__dict__ if pdict['file_format'] == 'pairs': assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}] else: assert 'extra_files' not in pdict
def test_md5_updater_newmd5(update_ffmeta_event_data_newmd5): event = update_ffmeta_event_data_newmd5 tibanna_settings = event.get('_tibanna', {}) tibanna = Tibanna(**tibanna_settings) awsem = Awsem(update_ffmeta_event_data_newmd5) ouf = awsem.output_files()[0] md5_updater('uploaded', ouf, None, tibanna)
def test_handle_processed_files(run_awsem_event_data_secondary_files): data = run_awsem_event_data_secondary_files tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) workflow_uuid = data['workflow_uuid'] workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys) with mock.patch('core.pony_utils.post_metadata') as mock_request: output_files, pf_meta = handle_processed_files(workflow_info, tibanna) assert mock_request.call_count == 3 assert (output_files) assert len(output_files) == 3 for of in output_files: if of['extension'] == '.pairs.gz': assert of['secondary_file_extensions'] == ['.pairs.gz.px2'] assert of['secondary_file_formats'] == ['pairs_px2'] assert of['extra_files'] else: assert 'secondary_files_extension' not in of assert 'secondary_files_formats' not in of assert (pf_meta) assert len(pf_meta) == 3 for pf in pf_meta: pdict = pf.__dict__ if pdict['file_format'] == 'pairs': assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}] else: assert 'extra_files' not in pdict
def test_add_secondary_files_to_args(run_awsem_event_data): input_file = { "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", "workflow_argument_name": "input_pairs", "uuid": ["d2c897ec-bdb2-47ce-b1b1-845daccaa571", "d2c897ec-bdb2-47ce-b1b1-845daccaa571"], "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"] } args = { 'input_files': { 'input_pairs': { 'bucket': 'elasticbeanstalk-fourfront-webdev-wfoutput', 'object_key': [ 'd2c897ec-bdb2-47ce-b1b1-845daccaa571/4DNFI25JXLLI.pairs.gz', 'd2c897ec-bdb2-47ce-b1b1-845daccaa571/4DNFI25JXLLI.pairs.gz' ] } } } data = run_awsem_event_data tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) add_secondary_files_to_args(input_file, tibanna.ff_keys, tibanna.env, args)
def get_status(event): print("is status uploading: %s" % event) upload_key = event['Records'][0]['s3']['object']['key'] if upload_key.endswith('html'): return False uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) try: tibanna = Tibanna(env=env) except Exception as e: raise TibannaStartException("%s" % e) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta: return meta.get('status', '') else: return ''
def test_register_to_higlass3(used_env): bucket = 'elasticbeanstalk-fourfront-webdev-wfoutput' bigbed_key = 'a34d5ea5-eada-4def-a4a7-c227b0d32395/4DNFIC624FKJ.bb' tibanna = Tibanna(used_env) with mock.patch('requests.post') as mock_request: res = register_to_higlass(tibanna, bucket, bigbed_key, 'bigwig', 'vector') mock_request.assert_called_once() printlog(res) assert res
def test_register_to_higlass2(used_env): bucket = 'elasticbeanstalk-fourfront-webdev-wfoutput' bigwig_key = 'a940cf00-6001-473e-80d1-1e4a43866863/4DNFI75GAT6T.bw' tibanna = Tibanna(used_env) with mock.patch('requests.post') as mock_request: res = register_to_higlass(tibanna, bucket, bigwig_key, 'bigwig', 'vector') mock_request.assert_called_once() printlog(res) assert res
def test_format_extension_map(run_awsem_event_data): tibanna_settings = run_awsem_event_data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=run_awsem_event_data.get('ff_keys'), settings=tibanna_settings) fe_map = FormatExtensionMap(tibanna.ff_keys) assert (fe_map) assert 'pairs' in fe_map.fe_dict.keys()
def test_get_extra_file_key(run_awsem_event_data): tibanna_settings = run_awsem_event_data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=run_awsem_event_data.get('ff_keys'), settings=tibanna_settings) fe_map = FormatExtensionMap(tibanna.ff_keys) infile_key = 'hahaha/lalala.bedGraph.gz' infile_format = 'bg' extra_file_format = 'bw' extra_file_key = get_extra_file_key(infile_format, infile_key, extra_file_format, fe_map) assert extra_file_key == 'hahaha/lalala.bw'
def test_process_input_file_info(run_awsem_event_data): input_file = { "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", "workflow_argument_name": "input_pairs", "uuid": ["d2c897ec-bdb2-47ce-b1b1-845daccaa571", "d2c897ec-bdb2-47ce-b1b1-845daccaa571"], "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"] } args = {'input_files': {"some_input": {}, "some_other_input": {}}} data = run_awsem_event_data tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args) assert len(args['input_files']) == 3 assert 'secondary_files' in args
def test_handle_processed_files2(run_awsem_event_data_processed_files2): data = run_awsem_event_data_processed_files2 tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) workflow_uuid = data['workflow_uuid'] workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys) output_files, pf_meta = handle_processed_files(workflow_info, tibanna, custom_fields=data.get('custom_pf_fields')) assert(pf_meta) assert(output_files) for pf in pf_meta: pdict = pf.__dict__ assert 'genome_assembly' in pdict assert pdict['genome_assembly'] == 'GRCh38'
def test_output_target_for_input_extra(): tibanna = Tibanna('fourfront-webdev', settings={"run_type": "bedGraphToBigWig", "env": "fourfront-webdev"}) target_inf = {'workflow_argument_name': 'bgfile', 'value': '83a80cf8-ca2c-421a-bee9-118bd0572424'} of = {'format': 'bw'} ff_utils.patch_metadata({'extra_files': []}, '83a80cf8-ca2c-421a-bee9-118bd0572424', key=tibanna.ff_keys) time.sleep(10) target_key = output_target_for_input_extra(target_inf, of, tibanna) assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw' with pytest.raises(Exception) as expinfo: target_key = output_target_for_input_extra(target_inf, of, tibanna) assert "input already has extra: 'User overwrite_input_extra'" in str(expinfo.value) target_key = output_target_for_input_extra(target_inf, of, tibanna, True) assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'
def get_file_format(event): '''if the file extension matches the regular file format, returns (format, None) if it matches one of the format of an extra file, returns (format (e.g. 'pairs_px2'), 'extra') ''' # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) if env == 'fourfront-webprod': env = 'data' upload_key = event['Records'][0]['s3']['object']['key'] uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] extension = object_key.replace(accession + '.', '') try: tibanna = Tibanna(env=env) except Exception as e: raise TibannaStartException("%s" % e) file_format, extra_formats = get_fileformats_for_accession( accession, tibanna.ff_keys, env) if file_format: fe_map = FormatExtensionMap(tibanna.ff_keys) printlog(fe_map) if extension == fe_map.get_extension(file_format): return (file_format, None) elif extension in fe_map.get_other_extensions(file_format): return (file_format, None) else: for extra_format in extra_formats: if extension == fe_map.get_extension(extra_format): return (extra_format, 'extra') elif extension in fe_map.get_other_extensions(extra_format): return (extra_format, 'extra') raise Exception( "file extension not matching: %s vs %s (%s)" % (extension, fe_map.get_extension(file_format), file_format)) else: raise Exception("Cannot get input metadata")
def test__input_extra_updater(): tibanna = Tibanna('fourfront-webdev', settings={"run_type": "bedGraphToBigWig", "env": "fourfront-webdev"}) accession = '4DNFIF14KRAK' _input_extra_updater('uploaded', tibanna, accession, 'bw', 'some_md5', 1234, 'some_higlass_uid') res = ff_utils.get_metadata(accession, tibanna.ff_keys, tibanna.env, add_on='frame=object', check_queue=True) assert res['extra_files'][0]['file_format'] == '/file-formats/bw/' assert res['extra_files'][0]['status'] == 'uploaded' assert res['extra_files'][0]['md5sum'] == 'some_md5' assert res['extra_files'][0]['file_size'] == 1234 assert res['higlass_uid'] == 'some_higlass_uid' _input_extra_updater('upload failed', tibanna, '4DNFIF14KRAK', 'bw', 'some_other_md5', 5678) res = ff_utils.get_metadata(accession, tibanna.ff_keys, tibanna.env, add_on='frame=object', check_queue=True) assert res['extra_files'][0]['file_format'] == '/file-formats/bw/' assert res['extra_files'][0]['status'] == 'upload failed' assert res['extra_files'][0]['md5sum'] == 'some_md5' assert res['extra_files'][0]['file_size'] == 1234 with pytest.raises(Exception) as expinfo: _input_extra_updater('uploaded', tibanna, accession, 'lalala') assert "inconsistency - extra file metadata deleted during workflow run?" in str(expinfo.value)
def test_merge_source_experiment(run_awsem_event_data): input_file = { "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", "workflow_argument_name": "input_pairs", "uuid": [ "d2c897ec-bdb2-47ce-b1b1-845daccaa571", "d2c897ec-bdb2-47ce-b1b1-845daccaa571" ], "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"] } data = run_awsem_event_data tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) res = merge_source_experiments(input_file['uuid'], tibanna.ff_keys, tibanna.env) printlog(res) assert 'fake_source_experiment' in res
def test_tibanna(): data = {'env': 'fourfront-webdev', 'settings': {'1': '1'}} tibanna = Tibanna(**data) assert tibanna assert tibanna.as_dict() == data
def real_handler(event, context): ''' this is generic function to run awsem workflow based on the data passed in workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name. Note multiple workflow_uuids can be available for an app_name (different versions of the same app could have a different uuid) ''' # get incomming data input_file_list = event.get('input_files') for infile in input_file_list: if not infile: raise ("malformed input, check your input_files") app_name = event.get('app_name') print(app_name) workflow_uuid = event.get('workflow_uuid') output_bucket = event.get('output_bucket') parameters = ff_utils.convert_param(event.get('parameters'), True) tibanna_settings = event.get('_tibanna', {}) tag = event.get('tag') # if they don't pass in env guess it from output_bucket try: env = tibanna_settings.get('env', '-'.join(output_bucket.split('-')[1:-1])) # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=event.get('ff_keys'), settings=tibanna_settings) except Exception as e: raise TibannaStartException("%s" % e) args = dict() # get argument format & type info from workflow workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object') print("workflow info %s" % workflow_info) LOG.info("workflow info %s" % workflow_info) if 'error' in workflow_info.get('@type', []): raise Exception("FATAL, can't lookup workflow info for %s fourfront" % workflow_uuid) # get cwl info from workflow_info for k in [ 'app_name', 'app_version', 'cwl_directory_url', 'cwl_main_filename', 'cwl_child_filenames' ]: print(workflow_info.get(k)) LOG.info(workflow_info.get(k)) args[k] = workflow_info.get(k) if not args['cwl_child_filenames']: args['cwl_child_filenames'] = [] # switch to v1 if available if 'cwl_directory_url_v1' in workflow_info: # use CWL v1 args['cwl_directory_url'] = workflow_info['cwl_directory_url_v1'] args['cwl_version'] = 'v1' else: args['cwl_version'] = 'draft3' # input file args for awsem for input_file in input_file_list: process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args) # create the ff_meta output info input_files_for_ffmeta = create_ffmeta_input_files_from_pony_input_file_list( input_file_list) # source experiments input_file_uuids = [_['uuid'] for _ in input_file_list] pf_source_experiments = merge_source_experiments(input_file_uuids, tibanna.ff_keys, tibanna.env) # processed file metadata output_files, pf_meta = handle_processed_files( workflow_info, tibanna, pf_source_experiments, custom_fields=event.get('custom_pf_fields'), user_supplied_output_files=event.get('output_files')) print("output files= %s" % str(output_files)) # 4DN dcic award and lab are used here, unless provided in wfr_meta ff_meta = create_ffmeta_awsem( workflow_uuid, app_name, input_files_for_ffmeta, tag=tag, run_url=tibanna.settings.get('url', ''), output_files=output_files, parameters=parameters, extra_meta=event.get('wfr_meta'), ) print("ff_meta is %s" % ff_meta.__dict__) LOG.info("ff_meta is %s" % ff_meta.__dict__) # store metadata so we know the run has started ff_meta.post(key=tibanna.ff_keys) # parameters args['input_parameters'] = event.get('parameters') # output target args['output_target'] = dict() args['secondary_output_target'] = dict() for of in ff_meta.output_files: arg_name = of.get('workflow_argument_name') if of.get('type') == 'Output processed file': args['output_target'][arg_name] = of.get('upload_key') else: random_tag = str(int(random.random() * 1000000000000)) # add a random tag at the end for non-processed file e.g. md5 report, # so that if two or more wfr are trigerred (e.g. one with parent file, one with extra file) # it will create a different output. Not implemented for processed files - # it's tricky because processed files must have a specific name. args['output_target'][ arg_name] = ff_meta.uuid + '/' + arg_name + random_tag if 'secondary_file_formats' in of: # takes only the first secondary file. args['secondary_output_target'][arg_name] \ = [_.get('upload_key') for _ in of.get('extra_files', [{}, ])] # output bucket args['output_S3_bucket'] = event.get('output_bucket') # dependencies if 'dependency' in event: args['dependency'] = event['dependency'] # initialize config parameters as null for benchmarking config = event['config'] if 'instance_type' not in config: config['instance_type'] = '' if 'EBS_optimized' not in config: config['EBS_optimized'] = '' if 'ebs_size' not in config: config['ebs_size'] = 0 if 'public_postrun_json' not in config: config['public_postrun_json'] = True event.update({ "ff_meta": ff_meta.as_dict(), 'pf_meta': [meta.as_dict() for meta in pf_meta], "_tibanna": tibanna.as_dict(), "args": args }) return (event)
def real_handler(event, context): ''' this is generic function to run awsem workflow based on the data passed in workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name. Note multiple workflow_uuids can be available for an app_name (different versions of the same app could have a different uuid) ''' # keep the input json on s3 logbucket = event.get('config', {}).get('log_bucket', '') jobid = event.get('jobid', '') if logbucket and jobid: boto3.client('s3').put_object(Body=json.dumps( event, indent=4).encode('ascii'), Key=jobid + '.input.json', Bucket=logbucket) # get incomming data input_file_list = event.get('input_files') for infile in input_file_list: if not infile: raise ("malformed input, check your input_files") workflow_uuid = event.get('workflow_uuid') output_bucket = event.get('output_bucket') parameters = ff_utils.convert_param(event.get('parameters'), True) tibanna_settings = event.get('_tibanna', {}) if 'overwrite_input_extra' in event.get('config'): overwrite_input_extra = event.get('config')['overwrite_input_extra'] else: overwrite_input_extra = event.get('overwrite_input_extra', False) tag = event.get('tag') # if they don't pass in env guess it from output_bucket try: env = tibanna_settings.get('env', '-'.join(output_bucket.split('-')[1:-1])) printlog("Tibanna setting : env= " + env) # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=event.get('ff_keys'), settings=tibanna_settings) printlog("Tibanna ff_keys url : " + tibanna.ff_keys['server']) printlog("Tibanna.s3.url: " + tibanna.s3.url) except Exception as e: raise TibannaStartException("%s" % e) args = dict() # get argument format & type info from workflow wf_meta = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object') printlog("workflow info %s" % wf_meta) if 'error' in wf_meta.get('@type', []): raise Exception("FATAL, can't lookup workflow info for %s fourfront" % workflow_uuid) # get cwl info from wf_meta for k in [ 'app_name', 'app_version', 'cwl_directory_url', 'cwl_main_filename', 'cwl_child_filenames', 'wdl_directory_url', 'wdl_main_filename', 'wdl_child_filenames' ]: printlog(wf_meta.get(k)) args[k] = wf_meta.get(k, '') if not args['cwl_child_filenames']: args['cwl_child_filenames'] = [] if not args['wdl_child_filenames']: args['wdl_child_filenames'] = [] if 'workflow_language' in wf_meta and wf_meta['workflow_language'] == 'WDL': args['language'] = 'wdl' else: # switch to v1 if available if 'cwl_directory_url_v1' in wf_meta: # use CWL v1 args['cwl_directory_url'] = wf_meta['cwl_directory_url_v1'] args['cwl_version'] = 'v1' else: args['cwl_version'] = 'draft3' # input file args for awsem for input_file in input_file_list: process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args) # create the ff_meta output info input_files_for_ffmeta = create_ffmeta_input_files_from_pony_input_file_list( input_file_list) # source experiments input_file_uuids = [_['uuid'] for _ in input_file_list] pf_source_experiments = merge_source_experiments(input_file_uuids, tibanna.ff_keys, tibanna.env) # processed file metadata output_files, pf_meta = \ create_wfr_output_files_and_processed_files(wf_meta, tibanna, pf_source_experiments, custom_fields=event.get('custom_pf_fields'), user_supplied_output_files=event.get('output_files')) print("output files= %s" % str(output_files)) # 4DN dcic award and lab are used here, unless provided in wfr_meta ff_meta = create_ffmeta_awsem(workflow_uuid, args['app_name'], args['app_version'], input_files_for_ffmeta, tag=tag, run_url=tibanna.settings.get('url', ''), output_files=output_files, parameters=parameters, extra_meta=event.get('wfr_meta'), jobid=jobid) printlog("ff_meta is %s" % ff_meta.__dict__) # store metadata so we know the run has started ff_meta.post(key=tibanna.ff_keys) # parameters args['input_parameters'] = event.get('parameters') # output target args['output_target'] = dict() args['secondary_output_target'] = dict() for of in ff_meta.output_files: arg_name = of.get('workflow_argument_name') if of.get('type') == 'Output processed file': args['output_target'][arg_name] = of.get('upload_key') elif of.get('type') == 'Output to-be-extra-input file': target_inf = input_files_for_ffmeta[ 0] # assume only one input for now target_key = output_target_for_input_extra(target_inf, of, tibanna, overwrite_input_extra) args['output_target'][arg_name] = target_key else: random_tag = str(int(random.random() * 1000000000000)) # add a random tag at the end for non-processed file e.g. md5 report, # so that if two or more wfr are trigerred (e.g. one with parent file, one with extra file) # it will create a different output. Not implemented for processed files - # it's tricky because processed files must have a specific name. args['output_target'][ arg_name] = ff_meta.uuid + '/' + arg_name + random_tag if 'secondary_file_formats' in of and 'extra_files' in of and of[ 'extra_files']: for ext in of.get('extra_files'): if arg_name not in args['secondary_output_target']: args['secondary_output_target'] = { arg_name: [ext.get('upload_key')] } else: args['secondary_output_target'][arg_name].append( ext.get('upload_key')) # output bucket args['output_S3_bucket'] = event.get('output_bucket') # dependencies if 'dependency' in event: args['dependency'] = event['dependency'] # initialize config parameters as null for benchmarking config = event['config'] if 'instance_type' not in config: config['instance_type'] = '' if 'EBS_optimized' not in config: config['EBS_optimized'] = '' if 'ebs_size' not in config: config['ebs_size'] = 0 if 'public_postrun_json' not in config: config['public_postrun_json'] = True event.update({ "ff_meta": ff_meta.as_dict(), 'pf_meta': [meta.as_dict() for meta in pf_meta], "_tibanna": tibanna.as_dict(), "args": args }) return (event)
def real_handler(event, context): # check the status and other details of import ''' this is to check if the task run is done: http://docs.sevenbridges.com/reference#get-task-execution-details ''' # get data # used to automatically determine the environment tibanna_settings = event.get('_tibanna', {}) try: tibanna = Tibanna(tibanna_settings['env'], settings=tibanna_settings) except Exception as e: raise TibannaStartException("%s" % e) ff_meta = create_ffmeta_awsem( app_name=event.get('ff_meta').get('awsem_app_name'), **event.get('ff_meta')) if event.get('error', False): ff_meta.run_status = 'error' ff_meta.description = event.get('error') patch_res = ff_meta.patch(key=tibanna.ff_keys) printlog("patch response: " + str(patch_res)) # sending a notification email before throwing error if 'email' in event['config'] and event['config']['email']: try: send_notification_email( event['_tibanna']['settings']['run_name'], event['jobid'], ff_meta.run_status, event['_tibanna']['settings']['url']) except Exception as e: printlog("Cannot send email: %s" % e) raise Exception(event.get('error')) metadata_only = event.get('metadata_only', False) pf_meta = [ProcessedFileMetadata(**pf) for pf in event.get('pf_meta')] custom_qc_fields = event.get('custom_qc_fields', None) # ensure this bad boy is always initialized awsem = Awsem(event) # go through this and replace awsemfile_report with awsf format # actually interface should be look through ff_meta files and call # give me the status of this thing from the runner, and runner.output_files.length # so we just build a runner with interface to sbg and awsem # runner.output_files.length() # runner.output_files.file.status # runner.output_files.file.loc # runner.output_files.file.get awsem_output = awsem.output_files() awsem_output_extra = awsem.secondary_output_files() ff_output = len(ff_meta.output_files) if len(awsem_output) != ff_output: ff_meta.run_status = 'error' ff_meta.description = "%d files output expected %s" % ( ff_output, len(awsem_output)) ff_meta.patch(key=tibanna.ff_keys) raise Exception( "Failing the workflow because outputed files = %d and ffmeta = %d" % (awsem_output, ff_output)) def update_metadata_from_awsemfile_list(awsemfile_list): patch_meta = False for awsemfile in awsemfile_list: patch_meta = update_ffmeta_from_awsemfile(awsemfile, ff_meta, tibanna, custom_qc_fields) if not metadata_only: update_pfmeta_from_awsemfile(awsemfile, pf_meta, tibanna) # allow for a simple way for updater to add appropriate meta_data if patch_meta: ff_meta.__dict__.update(patch_meta) update_metadata_from_awsemfile_list(awsem_output) update_metadata_from_awsemfile_list(awsem_output_extra) # if we got all the awsemfiles let's go ahead and update our ff_metadata object ff_meta.run_status = "complete" # add postrunjson log file to ff_meta as a url ff_meta.awsem_postrun_json = get_postrunjson_url(event) # make all the file awsemfile meta-data stuff here # TODO: fix bugs with ff_meta mapping for output and input file try: ff_meta.patch(key=tibanna.ff_keys) except Exception as e: raise Exception("Failed to update run_status %s" % str(e)) # patch processed files - update only status, extra_files, md5sum and file_size if pf_meta: patch_fields = [ 'uuid', 'status', 'extra_files', 'md5sum', 'file_size', 'higlass_uid' ] try: for pf in pf_meta: printlog(pf.as_dict()) pf.patch(key=tibanna.ff_keys, fields=patch_fields) except Exception as e: raise Exception("Failed to update processed metadata %s" % str(e)) event['ff_meta'] = ff_meta.as_dict() event['pf_meta'] = [_.as_dict() for _ in pf_meta] # sending a notification email after the job finishes if 'email' in event['config'] and event['config']['email']: try: send_notification_email(event['_tibanna']['settings']['run_name'], event['jobid'], event['ff_meta']['run_status'], event['_tibanna']['settings']['url']) except Exception as e: printlog("Cannot send email: %s" % e) return event