示例#1
0
def get_extra_file_format(event):
    '''if the file extension matches the regular file format,
    returns None
    if it matches one of the format of an extra file,
    returns that format (e.g. 'pairs_px2'
    '''
    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])
    upload_key = event['Records'][0]['s3']['object']['key']
    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]
    extension = object_key.replace(accession, '')

    tibanna = Tibanna(env=env)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta:
        file_format = meta.get('file_format')
        fe_map = get_format_extension_map(tibanna.ff_keys)
        file_extension = fe_map.get(file_format)
        if extension == file_extension:
            return None
        else:
            for extra in meta.get('extra_files', []):
                extra_format = extra.get('file_format')
                extra_extension = fe_map.get(extra_format)
                if extension == extra_extension:
                    return extra_format
        raise Exception("file extension not matching")
    else:
        raise Exception("Cannot get input metadata")
示例#2
0
def get_status_for_extra_file(event, extra_format):
    if not extra_format:
        return None
    upload_key = event['Records'][0]['s3']['object']['key']
    if upload_key.endswith('html'):
        return False

    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]

    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta and 'extra_files' in meta:
        for exf in meta['extra_files']:
            if parse_formatstr(exf['file_format']) == extra_format:
                return exf.get('status', None)
    return None
示例#3
0
def test_proc_file_for_arg_name(run_awsem_event_data_processed_files,
                                proc_file_in_webdev):
    of = [{
        "workflow_argument_name": "output_file1",
        "uuid": proc_file_in_webdev['uuid']
    }, {
        "workflow_argument_name": "output_file2",
        "uuid": "f4864029-a8ad-4bb8-93e7-5108f46bbbbb"
    }]

    tibanna_settings = run_awsem_event_data_processed_files.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(
        env,
        ff_keys=run_awsem_event_data_processed_files.get('ff_keys'),
        settings=tibanna_settings)

    file_with_type = proc_file_in_webdev.copy()
    file_with_type['@type'] = ['FileProcessed', 'Item', 'whatever']
    with mock.patch('core.pony_utils.get_metadata',
                    return_value=file_with_type):
        pf, resp = proc_file_for_arg_name(of, 'output_file1', tibanna)
        assert type(pf) == ProcessedFileMetadata
        assert pf.__dict__ == proc_file_in_webdev
示例#4
0
def test_handle_processed_files(run_awsem_event_data_secondary_files):
    data = run_awsem_event_data_secondary_files
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env, ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    workflow_uuid = data['workflow_uuid']
    wf_meta = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object')

    output_files, pf_meta = create_wfr_output_files_and_processed_files(wf_meta, tibanna)
    assert(output_files)
    assert len(output_files) == 3
    for of in output_files:
        if of['format'] == 'pairs':
            assert of['secondary_file_formats'] == ['pairs_px2']
            assert of['extra_files']
        else:
            assert 'secondary_files_formats' not in of

    assert(pf_meta)
    assert len(pf_meta) == 3
    for pf in pf_meta:
        pdict = pf.__dict__
        if pdict['file_format'] == 'pairs':
            assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}]
        else:
            assert 'extra_files' not in pdict
示例#5
0
def test_md5_updater_newmd5(update_ffmeta_event_data_newmd5):
    event = update_ffmeta_event_data_newmd5
    tibanna_settings = event.get('_tibanna', {})
    tibanna = Tibanna(**tibanna_settings)
    awsem = Awsem(update_ffmeta_event_data_newmd5)
    ouf = awsem.output_files()[0]
    md5_updater('uploaded', ouf, None, tibanna)
示例#6
0
def test_handle_processed_files(run_awsem_event_data_secondary_files):
    data = run_awsem_event_data_secondary_files
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env,
                      ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    workflow_uuid = data['workflow_uuid']
    workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys)

    with mock.patch('core.pony_utils.post_metadata') as mock_request:
        output_files, pf_meta = handle_processed_files(workflow_info, tibanna)
        assert mock_request.call_count == 3
    assert (output_files)
    assert len(output_files) == 3
    for of in output_files:
        if of['extension'] == '.pairs.gz':
            assert of['secondary_file_extensions'] == ['.pairs.gz.px2']
            assert of['secondary_file_formats'] == ['pairs_px2']
            assert of['extra_files']
        else:
            assert 'secondary_files_extension' not in of
            assert 'secondary_files_formats' not in of

    assert (pf_meta)
    assert len(pf_meta) == 3
    for pf in pf_meta:
        pdict = pf.__dict__
        if pdict['file_format'] == 'pairs':
            assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}]
        else:
            assert 'extra_files' not in pdict
示例#7
0
def test_add_secondary_files_to_args(run_awsem_event_data):
    input_file = {
        "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput",
        "workflow_argument_name": "input_pairs",
        "uuid": ["d2c897ec-bdb2-47ce-b1b1-845daccaa571", "d2c897ec-bdb2-47ce-b1b1-845daccaa571"],
        "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"]
    }
    args = {
        'input_files': {
            'input_pairs': {
                'bucket': 'elasticbeanstalk-fourfront-webdev-wfoutput',
                'object_key': [
                    'd2c897ec-bdb2-47ce-b1b1-845daccaa571/4DNFI25JXLLI.pairs.gz',
                    'd2c897ec-bdb2-47ce-b1b1-845daccaa571/4DNFI25JXLLI.pairs.gz'
                ]
            }
        }
    }
    data = run_awsem_event_data
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env, ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    add_secondary_files_to_args(input_file, tibanna.ff_keys, tibanna.env, args)
示例#8
0
def get_status(event):
    print("is status uploading: %s" % event)
    upload_key = event['Records'][0]['s3']['object']['key']
    if upload_key.endswith('html'):
        return False

    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]

    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta:
        return meta.get('status', '')
    else:
        return ''
示例#9
0
def test_register_to_higlass3(used_env):
    bucket = 'elasticbeanstalk-fourfront-webdev-wfoutput'
    bigbed_key = 'a34d5ea5-eada-4def-a4a7-c227b0d32395/4DNFIC624FKJ.bb'
    tibanna = Tibanna(used_env)
    with mock.patch('requests.post') as mock_request:
        res = register_to_higlass(tibanna, bucket, bigbed_key, 'bigwig', 'vector')
        mock_request.assert_called_once()
    printlog(res)
    assert res
示例#10
0
def test_register_to_higlass2(used_env):
    bucket = 'elasticbeanstalk-fourfront-webdev-wfoutput'
    bigwig_key = 'a940cf00-6001-473e-80d1-1e4a43866863/4DNFI75GAT6T.bw'
    tibanna = Tibanna(used_env)
    with mock.patch('requests.post') as mock_request:
        res = register_to_higlass(tibanna, bucket, bigwig_key, 'bigwig', 'vector')
        mock_request.assert_called_once()
        printlog(res)
        assert res
示例#11
0
def test_format_extension_map(run_awsem_event_data):
    tibanna_settings = run_awsem_event_data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env,
                      ff_keys=run_awsem_event_data.get('ff_keys'),
                      settings=tibanna_settings)

    fe_map = FormatExtensionMap(tibanna.ff_keys)
    assert (fe_map)
    assert 'pairs' in fe_map.fe_dict.keys()
示例#12
0
def test_get_extra_file_key(run_awsem_event_data):
    tibanna_settings = run_awsem_event_data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env,
                      ff_keys=run_awsem_event_data.get('ff_keys'),
                      settings=tibanna_settings)
    fe_map = FormatExtensionMap(tibanna.ff_keys)
    infile_key = 'hahaha/lalala.bedGraph.gz'
    infile_format = 'bg'
    extra_file_format = 'bw'
    extra_file_key = get_extra_file_key(infile_format, infile_key,
                                        extra_file_format, fe_map)
    assert extra_file_key == 'hahaha/lalala.bw'
示例#13
0
def test_process_input_file_info(run_awsem_event_data):
    input_file = {
        "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput",
        "workflow_argument_name": "input_pairs",
        "uuid": ["d2c897ec-bdb2-47ce-b1b1-845daccaa571", "d2c897ec-bdb2-47ce-b1b1-845daccaa571"],
        "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"]
    }
    args = {'input_files': {"some_input": {}, "some_other_input": {}}}
    data = run_awsem_event_data
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env, ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args)
    assert len(args['input_files']) == 3
    assert 'secondary_files' in args
示例#14
0
def test_handle_processed_files2(run_awsem_event_data_processed_files2):
    data = run_awsem_event_data_processed_files2
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env, ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    workflow_uuid = data['workflow_uuid']
    workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys)

    output_files, pf_meta = handle_processed_files(workflow_info, tibanna,
                                                   custom_fields=data.get('custom_pf_fields'))
    assert(pf_meta)
    assert(output_files)
    for pf in pf_meta:
        pdict = pf.__dict__
        assert 'genome_assembly' in pdict
        assert pdict['genome_assembly'] == 'GRCh38'
示例#15
0
def test_output_target_for_input_extra():
    tibanna = Tibanna('fourfront-webdev',
                      settings={"run_type": "bedGraphToBigWig", "env": "fourfront-webdev"})
    target_inf = {'workflow_argument_name': 'bgfile', 'value': '83a80cf8-ca2c-421a-bee9-118bd0572424'}
    of = {'format': 'bw'}

    ff_utils.patch_metadata({'extra_files': []},
                            '83a80cf8-ca2c-421a-bee9-118bd0572424',
                            key=tibanna.ff_keys)
    time.sleep(10)
    target_key = output_target_for_input_extra(target_inf, of, tibanna)
    assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'

    with pytest.raises(Exception) as expinfo:
        target_key = output_target_for_input_extra(target_inf, of, tibanna)
        assert "input already has extra: 'User overwrite_input_extra'" in str(expinfo.value)

    target_key = output_target_for_input_extra(target_inf, of, tibanna, True)
    assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'
示例#16
0
def get_file_format(event):
    '''if the file extension matches the regular file format,
    returns (format, None)
    if it matches one of the format of an extra file,
    returns (format (e.g. 'pairs_px2'), 'extra')
    '''
    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])
    if env == 'fourfront-webprod':
        env = 'data'
    upload_key = event['Records'][0]['s3']['object']['key']
    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]
    extension = object_key.replace(accession + '.', '')

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    file_format, extra_formats = get_fileformats_for_accession(
        accession, tibanna.ff_keys, env)
    if file_format:
        fe_map = FormatExtensionMap(tibanna.ff_keys)
        printlog(fe_map)
        if extension == fe_map.get_extension(file_format):
            return (file_format, None)
        elif extension in fe_map.get_other_extensions(file_format):
            return (file_format, None)
        else:
            for extra_format in extra_formats:
                if extension == fe_map.get_extension(extra_format):
                    return (extra_format, 'extra')
                elif extension in fe_map.get_other_extensions(extra_format):
                    return (extra_format, 'extra')
        raise Exception(
            "file extension not matching: %s vs %s (%s)" %
            (extension, fe_map.get_extension(file_format), file_format))
    else:
        raise Exception("Cannot get input metadata")
示例#17
0
def test__input_extra_updater():
    tibanna = Tibanna('fourfront-webdev',
                      settings={"run_type": "bedGraphToBigWig",
                                "env": "fourfront-webdev"})
    accession = '4DNFIF14KRAK'
    _input_extra_updater('uploaded', tibanna, accession, 'bw', 'some_md5', 1234, 'some_higlass_uid')
    res = ff_utils.get_metadata(accession, tibanna.ff_keys, tibanna.env,
                                add_on='frame=object', check_queue=True)
    assert res['extra_files'][0]['file_format'] == '/file-formats/bw/'
    assert res['extra_files'][0]['status'] == 'uploaded'
    assert res['extra_files'][0]['md5sum'] == 'some_md5'
    assert res['extra_files'][0]['file_size'] == 1234
    assert res['higlass_uid'] == 'some_higlass_uid'
    _input_extra_updater('upload failed', tibanna, '4DNFIF14KRAK', 'bw', 'some_other_md5', 5678)
    res = ff_utils.get_metadata(accession, tibanna.ff_keys, tibanna.env,
                                add_on='frame=object', check_queue=True)
    assert res['extra_files'][0]['file_format'] == '/file-formats/bw/'
    assert res['extra_files'][0]['status'] == 'upload failed'
    assert res['extra_files'][0]['md5sum'] == 'some_md5'
    assert res['extra_files'][0]['file_size'] == 1234
    with pytest.raises(Exception) as expinfo:
        _input_extra_updater('uploaded', tibanna, accession, 'lalala')
        assert "inconsistency - extra file metadata deleted during workflow run?" in str(expinfo.value)
示例#18
0
def test_merge_source_experiment(run_awsem_event_data):
    input_file = {
        "bucket_name":
        "elasticbeanstalk-fourfront-webdev-wfoutput",
        "workflow_argument_name":
        "input_pairs",
        "uuid": [
            "d2c897ec-bdb2-47ce-b1b1-845daccaa571",
            "d2c897ec-bdb2-47ce-b1b1-845daccaa571"
        ],
        "object_key": ["4DNFI25JXLLI.pairs.gz", "4DNFI25JXLLI.pairs.gz"]
    }
    data = run_awsem_event_data
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env,
                      ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    res = merge_source_experiments(input_file['uuid'], tibanna.ff_keys,
                                   tibanna.env)
    printlog(res)
    assert 'fake_source_experiment' in res
示例#19
0
def test_tibanna():
    data = {'env': 'fourfront-webdev', 'settings': {'1': '1'}}
    tibanna = Tibanna(**data)
    assert tibanna
    assert tibanna.as_dict() == data
示例#20
0
def real_handler(event, context):
    '''
    this is generic function to run awsem workflow
    based on the data passed in

    workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name.
    Note multiple workflow_uuids can be available for an app_name
    (different versions of the same app could have a different uuid)
    '''
    # get incomming data
    input_file_list = event.get('input_files')
    for infile in input_file_list:
        if not infile:
            raise ("malformed input, check your input_files")
    app_name = event.get('app_name')
    print(app_name)
    workflow_uuid = event.get('workflow_uuid')
    output_bucket = event.get('output_bucket')
    parameters = ff_utils.convert_param(event.get('parameters'), True)
    tibanna_settings = event.get('_tibanna', {})
    tag = event.get('tag')
    # if they don't pass in env guess it from output_bucket
    try:
        env = tibanna_settings.get('env',
                                   '-'.join(output_bucket.split('-')[1:-1]))
        # tibanna provides access to keys based on env and stuff like that
        tibanna = Tibanna(env,
                          ff_keys=event.get('ff_keys'),
                          settings=tibanna_settings)
    except Exception as e:
        raise TibannaStartException("%s" % e)

    args = dict()

    # get argument format & type info from workflow
    workflow_info = ff_utils.get_metadata(workflow_uuid,
                                          key=tibanna.ff_keys,
                                          ff_env=tibanna.env,
                                          add_on='frame=object')
    print("workflow info  %s" % workflow_info)
    LOG.info("workflow info  %s" % workflow_info)
    if 'error' in workflow_info.get('@type', []):
        raise Exception("FATAL, can't lookup workflow info for %s fourfront" %
                        workflow_uuid)

    # get cwl info from workflow_info
    for k in [
            'app_name', 'app_version', 'cwl_directory_url',
            'cwl_main_filename', 'cwl_child_filenames'
    ]:
        print(workflow_info.get(k))
        LOG.info(workflow_info.get(k))
        args[k] = workflow_info.get(k)
    if not args['cwl_child_filenames']:
        args['cwl_child_filenames'] = []

    # switch to v1 if available
    if 'cwl_directory_url_v1' in workflow_info:  # use CWL v1
        args['cwl_directory_url'] = workflow_info['cwl_directory_url_v1']
        args['cwl_version'] = 'v1'
    else:
        args['cwl_version'] = 'draft3'

    # input file args for awsem
    for input_file in input_file_list:
        process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args)

    # create the ff_meta output info
    input_files_for_ffmeta = create_ffmeta_input_files_from_pony_input_file_list(
        input_file_list)

    # source experiments
    input_file_uuids = [_['uuid'] for _ in input_file_list]
    pf_source_experiments = merge_source_experiments(input_file_uuids,
                                                     tibanna.ff_keys,
                                                     tibanna.env)

    # processed file metadata
    output_files, pf_meta = handle_processed_files(
        workflow_info,
        tibanna,
        pf_source_experiments,
        custom_fields=event.get('custom_pf_fields'),
        user_supplied_output_files=event.get('output_files'))
    print("output files= %s" % str(output_files))

    # 4DN dcic award and lab are used here, unless provided in wfr_meta
    ff_meta = create_ffmeta_awsem(
        workflow_uuid,
        app_name,
        input_files_for_ffmeta,
        tag=tag,
        run_url=tibanna.settings.get('url', ''),
        output_files=output_files,
        parameters=parameters,
        extra_meta=event.get('wfr_meta'),
    )

    print("ff_meta is %s" % ff_meta.__dict__)
    LOG.info("ff_meta is %s" % ff_meta.__dict__)

    # store metadata so we know the run has started
    ff_meta.post(key=tibanna.ff_keys)

    # parameters
    args['input_parameters'] = event.get('parameters')

    # output target
    args['output_target'] = dict()
    args['secondary_output_target'] = dict()
    for of in ff_meta.output_files:
        arg_name = of.get('workflow_argument_name')
        if of.get('type') == 'Output processed file':
            args['output_target'][arg_name] = of.get('upload_key')
        else:
            random_tag = str(int(random.random() * 1000000000000))
            # add a random tag at the end for non-processed file e.g. md5 report,
            # so that if two or more wfr are trigerred (e.g. one with parent file, one with extra file)
            # it will create a different output. Not implemented for processed files -
            # it's tricky because processed files must have a specific name.
            args['output_target'][
                arg_name] = ff_meta.uuid + '/' + arg_name + random_tag
        if 'secondary_file_formats' in of:
            # takes only the first secondary file.
            args['secondary_output_target'][arg_name] \
                = [_.get('upload_key') for _ in of.get('extra_files', [{}, ])]

    # output bucket
    args['output_S3_bucket'] = event.get('output_bucket')

    # dependencies
    if 'dependency' in event:
        args['dependency'] = event['dependency']

    # initialize config parameters as null for benchmarking
    config = event['config']
    if 'instance_type' not in config:
        config['instance_type'] = ''
    if 'EBS_optimized' not in config:
        config['EBS_optimized'] = ''
    if 'ebs_size' not in config:
        config['ebs_size'] = 0
    if 'public_postrun_json' not in config:
        config['public_postrun_json'] = True

    event.update({
        "ff_meta": ff_meta.as_dict(),
        'pf_meta': [meta.as_dict() for meta in pf_meta],
        "_tibanna": tibanna.as_dict(),
        "args": args
    })
    return (event)
示例#21
0
def real_handler(event, context):
    '''
    this is generic function to run awsem workflow
    based on the data passed in

    workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name.
    Note multiple workflow_uuids can be available for an app_name
    (different versions of the same app could have a different uuid)
    '''
    # keep the input json on s3
    logbucket = event.get('config', {}).get('log_bucket', '')
    jobid = event.get('jobid', '')
    if logbucket and jobid:
        boto3.client('s3').put_object(Body=json.dumps(
            event, indent=4).encode('ascii'),
                                      Key=jobid + '.input.json',
                                      Bucket=logbucket)

    # get incomming data
    input_file_list = event.get('input_files')
    for infile in input_file_list:
        if not infile:
            raise ("malformed input, check your input_files")
    workflow_uuid = event.get('workflow_uuid')
    output_bucket = event.get('output_bucket')
    parameters = ff_utils.convert_param(event.get('parameters'), True)
    tibanna_settings = event.get('_tibanna', {})
    if 'overwrite_input_extra' in event.get('config'):
        overwrite_input_extra = event.get('config')['overwrite_input_extra']
    else:
        overwrite_input_extra = event.get('overwrite_input_extra', False)
    tag = event.get('tag')
    # if they don't pass in env guess it from output_bucket
    try:
        env = tibanna_settings.get('env',
                                   '-'.join(output_bucket.split('-')[1:-1]))
        printlog("Tibanna setting : env= " + env)
        # tibanna provides access to keys based on env and stuff like that
        tibanna = Tibanna(env,
                          ff_keys=event.get('ff_keys'),
                          settings=tibanna_settings)
        printlog("Tibanna ff_keys url : " + tibanna.ff_keys['server'])
        printlog("Tibanna.s3.url: " + tibanna.s3.url)
    except Exception as e:
        raise TibannaStartException("%s" % e)

    args = dict()

    # get argument format & type info from workflow
    wf_meta = ff_utils.get_metadata(workflow_uuid,
                                    key=tibanna.ff_keys,
                                    ff_env=tibanna.env,
                                    add_on='frame=object')
    printlog("workflow info  %s" % wf_meta)
    if 'error' in wf_meta.get('@type', []):
        raise Exception("FATAL, can't lookup workflow info for %s fourfront" %
                        workflow_uuid)

    # get cwl info from wf_meta
    for k in [
            'app_name', 'app_version', 'cwl_directory_url',
            'cwl_main_filename', 'cwl_child_filenames', 'wdl_directory_url',
            'wdl_main_filename', 'wdl_child_filenames'
    ]:
        printlog(wf_meta.get(k))
        args[k] = wf_meta.get(k, '')
    if not args['cwl_child_filenames']:
        args['cwl_child_filenames'] = []
    if not args['wdl_child_filenames']:
        args['wdl_child_filenames'] = []

    if 'workflow_language' in wf_meta and wf_meta['workflow_language'] == 'WDL':
        args['language'] = 'wdl'
    else:
        # switch to v1 if available
        if 'cwl_directory_url_v1' in wf_meta:  # use CWL v1
            args['cwl_directory_url'] = wf_meta['cwl_directory_url_v1']
            args['cwl_version'] = 'v1'
        else:
            args['cwl_version'] = 'draft3'

    # input file args for awsem
    for input_file in input_file_list:
        process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args)

    # create the ff_meta output info
    input_files_for_ffmeta = create_ffmeta_input_files_from_pony_input_file_list(
        input_file_list)

    # source experiments
    input_file_uuids = [_['uuid'] for _ in input_file_list]
    pf_source_experiments = merge_source_experiments(input_file_uuids,
                                                     tibanna.ff_keys,
                                                     tibanna.env)

    # processed file metadata
    output_files, pf_meta = \
        create_wfr_output_files_and_processed_files(wf_meta, tibanna,
                                                    pf_source_experiments,
                                                    custom_fields=event.get('custom_pf_fields'),
                                                    user_supplied_output_files=event.get('output_files'))
    print("output files= %s" % str(output_files))

    # 4DN dcic award and lab are used here, unless provided in wfr_meta
    ff_meta = create_ffmeta_awsem(workflow_uuid,
                                  args['app_name'],
                                  args['app_version'],
                                  input_files_for_ffmeta,
                                  tag=tag,
                                  run_url=tibanna.settings.get('url', ''),
                                  output_files=output_files,
                                  parameters=parameters,
                                  extra_meta=event.get('wfr_meta'),
                                  jobid=jobid)

    printlog("ff_meta is %s" % ff_meta.__dict__)

    # store metadata so we know the run has started
    ff_meta.post(key=tibanna.ff_keys)

    # parameters
    args['input_parameters'] = event.get('parameters')

    # output target
    args['output_target'] = dict()
    args['secondary_output_target'] = dict()
    for of in ff_meta.output_files:
        arg_name = of.get('workflow_argument_name')
        if of.get('type') == 'Output processed file':
            args['output_target'][arg_name] = of.get('upload_key')
        elif of.get('type') == 'Output to-be-extra-input file':
            target_inf = input_files_for_ffmeta[
                0]  # assume only one input for now
            target_key = output_target_for_input_extra(target_inf, of, tibanna,
                                                       overwrite_input_extra)
            args['output_target'][arg_name] = target_key
        else:
            random_tag = str(int(random.random() * 1000000000000))
            # add a random tag at the end for non-processed file e.g. md5 report,
            # so that if two or more wfr are trigerred (e.g. one with parent file, one with extra file)
            # it will create a different output. Not implemented for processed files -
            # it's tricky because processed files must have a specific name.
            args['output_target'][
                arg_name] = ff_meta.uuid + '/' + arg_name + random_tag
        if 'secondary_file_formats' in of and 'extra_files' in of and of[
                'extra_files']:
            for ext in of.get('extra_files'):
                if arg_name not in args['secondary_output_target']:
                    args['secondary_output_target'] = {
                        arg_name: [ext.get('upload_key')]
                    }
                else:
                    args['secondary_output_target'][arg_name].append(
                        ext.get('upload_key'))

    # output bucket
    args['output_S3_bucket'] = event.get('output_bucket')

    # dependencies
    if 'dependency' in event:
        args['dependency'] = event['dependency']

    # initialize config parameters as null for benchmarking
    config = event['config']
    if 'instance_type' not in config:
        config['instance_type'] = ''
    if 'EBS_optimized' not in config:
        config['EBS_optimized'] = ''
    if 'ebs_size' not in config:
        config['ebs_size'] = 0
    if 'public_postrun_json' not in config:
        config['public_postrun_json'] = True

    event.update({
        "ff_meta": ff_meta.as_dict(),
        'pf_meta': [meta.as_dict() for meta in pf_meta],
        "_tibanna": tibanna.as_dict(),
        "args": args
    })
    return (event)
示例#22
0
def real_handler(event, context):
    # check the status and other details of import
    '''
    this is to check if the task run is done:
    http://docs.sevenbridges.com/reference#get-task-execution-details
    '''
    # get data
    # used to automatically determine the environment
    tibanna_settings = event.get('_tibanna', {})
    try:
        tibanna = Tibanna(tibanna_settings['env'], settings=tibanna_settings)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    ff_meta = create_ffmeta_awsem(
        app_name=event.get('ff_meta').get('awsem_app_name'),
        **event.get('ff_meta'))

    if event.get('error', False):
        ff_meta.run_status = 'error'
        ff_meta.description = event.get('error')
        patch_res = ff_meta.patch(key=tibanna.ff_keys)
        printlog("patch response: " + str(patch_res))
        # sending a notification email before throwing error
        if 'email' in event['config'] and event['config']['email']:
            try:
                send_notification_email(
                    event['_tibanna']['settings']['run_name'], event['jobid'],
                    ff_meta.run_status, event['_tibanna']['settings']['url'])
            except Exception as e:
                printlog("Cannot send email: %s" % e)
        raise Exception(event.get('error'))

    metadata_only = event.get('metadata_only', False)

    pf_meta = [ProcessedFileMetadata(**pf) for pf in event.get('pf_meta')]
    custom_qc_fields = event.get('custom_qc_fields', None)

    # ensure this bad boy is always initialized
    awsem = Awsem(event)
    # go through this and replace awsemfile_report with awsf format
    # actually interface should be look through ff_meta files and call
    # give me the status of this thing from the runner, and runner.output_files.length
    # so we just build a runner with interface to sbg and awsem
    # runner.output_files.length()
    # runner.output_files.file.status
    # runner.output_files.file.loc
    # runner.output_files.file.get

    awsem_output = awsem.output_files()
    awsem_output_extra = awsem.secondary_output_files()
    ff_output = len(ff_meta.output_files)
    if len(awsem_output) != ff_output:
        ff_meta.run_status = 'error'
        ff_meta.description = "%d files output expected %s" % (
            ff_output, len(awsem_output))
        ff_meta.patch(key=tibanna.ff_keys)
        raise Exception(
            "Failing the workflow because outputed files = %d and ffmeta = %d"
            % (awsem_output, ff_output))

    def update_metadata_from_awsemfile_list(awsemfile_list):
        patch_meta = False
        for awsemfile in awsemfile_list:
            patch_meta = update_ffmeta_from_awsemfile(awsemfile, ff_meta,
                                                      tibanna,
                                                      custom_qc_fields)
            if not metadata_only:
                update_pfmeta_from_awsemfile(awsemfile, pf_meta, tibanna)
        # allow for a simple way for updater to add appropriate meta_data
        if patch_meta:
            ff_meta.__dict__.update(patch_meta)

    update_metadata_from_awsemfile_list(awsem_output)
    update_metadata_from_awsemfile_list(awsem_output_extra)

    # if we got all the awsemfiles let's go ahead and update our ff_metadata object
    ff_meta.run_status = "complete"

    # add postrunjson log file to ff_meta as a url
    ff_meta.awsem_postrun_json = get_postrunjson_url(event)

    # make all the file awsemfile meta-data stuff here
    # TODO: fix bugs with ff_meta mapping for output and input file
    try:
        ff_meta.patch(key=tibanna.ff_keys)
    except Exception as e:
        raise Exception("Failed to update run_status %s" % str(e))
    # patch processed files - update only status, extra_files, md5sum and file_size
    if pf_meta:
        patch_fields = [
            'uuid', 'status', 'extra_files', 'md5sum', 'file_size',
            'higlass_uid'
        ]
        try:
            for pf in pf_meta:
                printlog(pf.as_dict())
                pf.patch(key=tibanna.ff_keys, fields=patch_fields)
        except Exception as e:
            raise Exception("Failed to update processed metadata %s" % str(e))

    event['ff_meta'] = ff_meta.as_dict()
    event['pf_meta'] = [_.as_dict() for _ in pf_meta]

    # sending a notification email after the job finishes
    if 'email' in event['config'] and event['config']['email']:
        try:
            send_notification_email(event['_tibanna']['settings']['run_name'],
                                    event['jobid'],
                                    event['ff_meta']['run_status'],
                                    event['_tibanna']['settings']['url'])
        except Exception as e:
            printlog("Cannot send email: %s" % e)

    return event