Exemplo n.º 1
0
def main():  # pragma: no cover
    # initial set up
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    # bucket addresses
    ff_health = get_metadata('/health', auth)
    source_bucket = ff_health['file_upload_bucket']
    target_bucket = ff_health['processed_file_bucket']
    s3 = boto3.resource('s3')

    # get the uuids for the files
    query = 'type=FileVistrack'
    uids = scu.get_item_ids_from_args([query], auth, True)
    files2copy = [get_metadata(uid, auth).get('upload_key') for uid in uids]

    for file_key in files2copy:
        copy_source = {'Bucket': source_bucket, 'Key': file_key}
        try:
            # print(file_key + ' cp from ' + source_bucket + ' to ' + target_bucket)
            s3.meta.client.copy(copy_source, target_bucket, file_key)
        except Exception:
            print('Can not find file on source', file_key)
            continue
        print('{} file copied'.format(file_key))
Exemplo n.º 2
0
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    dryrun = not args.dbupdate

    file_list = scu.get_item_ids_from_args(args.input, auth, args.search)
    wf_data = get_metadata(args.workflow, auth)
    for f in file_list:
        file_info = get_metadata(f, auth)
        parents = file_info.get('produced_from')
        if parents:
            inputs = []
            for p in parents:
                inputs.append(get_metadata(p, auth))
            wfr_json = create_wfr_meta_only_json(auth, wf_data, inputs,
                                                 [file_info])
            if dryrun:
                print('DRY RUN -- will post')
                print(wfr_json)
            else:
                res = post_metadata(wfr_json, 'workflow_run_awsem', auth)
                # and add a notes_to_tsv to the file
                patchstatus = add_notes_to_tsv(file_info, auth)
                print(res)
                print(patchstatus)
Exemplo n.º 3
0
 def get_source_sample(self, input_file_uuid):
     """
     Connects to fourfront and get source experiment info as a unique list
     Takes a single input file uuid.
     """
     pf_source_samples_set = set()
     inf_uuids = aslist(flatten(input_file_uuid))
     for inf_uuid in inf_uuids:
         infile_meta = get_metadata(
             inf_uuid,
             key=self.tbn.ff_keys,
             ff_env=self.tbn.env,
             add_on='frame=object&datastore=database')
         if infile_meta.get('samples'):
             for exp in infile_meta.get('samples'):
                 exp_obj = get_metadata(
                     exp,
                     key=self.tbn.ff_keys,
                     ff_env=self.tbn.env,
                     add_on='frame=raw&datastore=database')
                 pf_source_samples_set.add(exp_obj['uuid'])
         if infile_meta.get('source_samples'):
             # this field is an array of strings, not linkTo's
             pf_source_samples_set.update(infile_meta.get('source_samples'))
     return list(pf_source_samples_set)
Exemplo n.º 4
0
def test_fastqc():
    key = dev_key()
    data = get_test_json('fastqc.json')
    fq_uuid = post_new_fastqfile(key=key,
                                 upload_file=os.path.join(
                                     FILE_DIR, 'fastq/A.R2.fastq.gz'))
    data['input_files'][0]['uuid'] = fq_uuid
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    res = ff_utils.get_metadata(fq_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key)
    assert 'quality_metric' in res
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(wfr_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['run_status'] == 'complete'
    assert 'quality_metric' in res
Exemplo n.º 5
0
def extract_file_info(obj_id, arg_name, env, rename=[]):
    auth = ff_utils.get_authentication_with_server({}, ff_env=env)
    my_s3_util = s3Utils(env=env)

    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket
    """Creates the formatted dictionary for files.
    """
    # start a dictionary
    template = {"workflow_argument_name": arg_name}
    if rename:
        change_from = rename[0]
        change_to = rename[1]
    # if it is list of items, change the structure
    if isinstance(obj_id, list):
        object_key = []
        uuid = []
        buckets = []
        for obj in obj_id:
            metadata = ff_utils.get_metadata(obj, key=auth)
            object_key.append(metadata['display_title'])
            uuid.append(metadata['uuid'])
            # get the bucket
            if 'FileProcessed' in metadata['@type']:
                my_bucket = out_bucket
            else:  # covers cases of FileFastq, FileReference, FileMicroscopy
                my_bucket = raw_bucket
            buckets.append(my_bucket)
        # check bucket consistency
        try:
            assert len(list(set(buckets))) == 1
        except AssertionError:
            print('Files from different buckets', obj_id)
            return
        template['object_key'] = object_key
        template['uuid'] = uuid
        template['bucket_name'] = buckets[0]
        if rename:
            template['rename'] = [
                i.replace(change_from, change_to)
                for i in template['object_key']
            ]

    # if obj_id is a string
    else:
        metadata = ff_utils.get_metadata(obj_id, key=auth)
        template['object_key'] = metadata['display_title']
        template['uuid'] = metadata['uuid']
        # get the bucket
        if 'FileProcessed' in metadata['@type']:
            my_bucket = out_bucket
        else:  # covers cases of FileFastq, FileReference, FileMicroscopy
            my_bucket = raw_bucket
        template['bucket_name'] = my_bucket
        if rename:
            template['rename'] = template['object_key'].replace(
                change_from, change_to)
    return template
Exemplo n.º 6
0
def add_preliminary_processed_files(item_id, list_pc, auth, run_type="hic"):
    titles = {
        "hic": "HiC Processing Pipeline - Preliminary Files",
        "repliseq": "Repli-Seq Pipeline - Preliminary Files",
        'chip': "ENCODE ChIP-Seq Pipeline - Preliminary Files",
        'atac': "ENCODE ATAC-Seq Pipeline - Preliminary Files"
    }
    if run_type in titles:
        pc_set_title = titles[run_type]
    else:
        pc_set_title = run_type
    resp = ff_utils.get_metadata(item_id, key=auth)

    # check if this items are in processed files field
    # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one
    ex_pc = resp.get('processed_files')
    if ex_pc:
        ex_pc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]]
                     for a in ex_pc]
        ex_pc_ids = [a for i in ex_pc_ids for a in i]
        for i in list_pc:
            if i in ex_pc_ids:
                print('Error - Cannot add files to pc')
                print(i, 'is already in processed files')
                return

    # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one
    ex_opc = resp.get('other_processed_files')
    if ex_opc:
        # check the titles
        all_existing_titles = [a['title'] for a in ex_opc]
        if pc_set_title in all_existing_titles:
            print('Error - Cannot add files to opc')
            print('The same title already in other processed files')
            return
        # check  the individual files
        ex_opc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]]
                      for i in ex_opc for a in i['files']]
        ex_opc_ids = [a for i in ex_opc_ids for a in i]
        for i in list_pc:
            if i in ex_opc_ids:
                print('Error - Cannot add files to opc')
                print(i, 'is already in other processed files')
                return

    # we need raw to get the existing piece, to patch back with the new ones
    patch_data = ff_utils.get_metadata(
        item_id, key=auth, add_on='frame=raw').get('other_processed_files')
    if not patch_data:
        patch_data = []

    new_data = {'title': pc_set_title, 'type': 'preliminary', 'files': list_pc}
    patch_data.append(new_data)
    patch = {'other_processed_files': patch_data}
    ff_utils.patch_metadata(patch, obj_id=item_id, key=auth)
Exemplo n.º 7
0
def get_item_if_you_can(auth, value, itype=None):
    try:
        value.get('uuid')
        return value
    except AttributeError:
        svalue = str(value)
        item = get_metadata(svalue, auth)
        try:
            item.get('uuid')
            return item
        except AttributeError:
            if itype is not None:
                svalue = '/' + itype + svalue + '/?datastore=database'
                return get_metadata(svalue, auth)
Exemplo n.º 8
0
def get_format_extension_map(ff_keys):
    try:
        fp_schema = get_metadata("profiles/file_processed.json", key=ff_keys)
        fe_map = fp_schema.get('file_format_file_extension')
        fp_schema2 = get_metadata("profiles/file_fastq.json", key=ff_keys)
        fe_map2 = fp_schema2.get('file_format_file_extension')
        fp_schema3 = get_metadata("profiles/file_reference.json", key=ff_keys)
        fe_map3 = fp_schema3.get('file_format_file_extension')
        fe_map.update(fe_map2)
        fe_map.update(fe_map3)
    except Exception as e:
        raise Exception(
            "Can't get format-extension map from file_processed schema. %s\n" %
            e)
    return fe_map
Exemplo n.º 9
0
def get_extra_file_format(event):
    '''if the file extension matches the regular file format,
    returns None
    if it matches one of the format of an extra file,
    returns that format (e.g. 'pairs_px2'
    '''
    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])
    upload_key = event['Records'][0]['s3']['object']['key']
    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]
    extension = object_key.replace(accession, '')

    tibanna = Tibanna(env=env)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta:
        file_format = meta.get('file_format')
        fe_map = get_format_extension_map(tibanna.ff_keys)
        file_extension = fe_map.get(file_format)
        if extension == file_extension:
            return None
        else:
            for extra in meta.get('extra_files', []):
                extra_format = extra.get('file_format')
                extra_extension = fe_map.get(extra_format)
                if extension == extra_extension:
                    return extra_format
        raise Exception("file extension not matching")
    else:
        raise Exception("Cannot get input metadata")
Exemplo n.º 10
0
def run(keypairs_file, schema_name):

    assert os.path.isfile(str(keypairs_file))

    try:
        key = FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e
    try:
        connection = FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e
    try:
        response = ff_utils.get_metadata("/" + schema_name,
                                         key=connection.key,
                                         frame=None)
    except Exception as e:
        print(e)
        print("post error")
        raise e

    print(response)
Exemplo n.º 11
0
 def get(cls, uuid, key, ff_env=None, check_queue=False, file_format=None):
     data = get_metadata(uuid,
                         key=key,
                         ff_env=ff_env,
                         add_on='frame=object',
                         check_queue=check_queue)
     if type(data) is not dict:
         raise Exception("unable to find object with unique key of %s" %
                         uuid)
     if 'FileProcessed' not in data.get('@type', {}):
         raise Exception(
             "you can only load ProcessedFiles into this object")
     if 'extra_files' not in data:
         return None
     if len(data['extra_files']) == 1:
         if not file_format or file_format == data['extra_files'][0][
                 'file_format']:
             return ExtraFileMetadata(**data['extra_files'][0])
         else:
             raise Exception("extra file format not matching")
     elif not file_format:
         raise Exception("Two or more extra files - specify file format")
     for ef in data['extra_files']:
         if ef['file_format'] == file_format:
             return ExtraFileMetadata(ef)
     raise Exception("no matching format for extra file")
Exemplo n.º 12
0
def test_bed2beddb():
    key = dev_key()
    # prep new File
    data = get_test_json('bedtobeddb.json')
    bed_content = b'chr1\t1000000\t2000000\tregion1'
    gzipped_content = gzip.compress(bed_content)
    bed_uuid = post_new_processedfile(file_format='bed',
                                      key=key,
                                      upload_content=gzipped_content,
                                      extension='bed.gz')
    data['input_files'][0]['uuid'] = bed_uuid
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(bed_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['extra_files']
    assert len(res['extra_files']) == 1
    extra = res['extra_files'][0]
    assert extra['file_format']['display_title'] == 'beddb'
    ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key)
    ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
Exemplo n.º 13
0
def get_status(event):
    print("is status uploading: %s" % event)
    upload_key = event['Records'][0]['s3']['object']['key']
    if upload_key.endswith('html'):
        return False

    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]

    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta:
        return meta.get('status', '')
    else:
        return ''
Exemplo n.º 14
0
def test_handle_processed_files(run_awsem_event_data_secondary_files):
    data = run_awsem_event_data_secondary_files
    tibanna_settings = data.get('_tibanna', {})
    # if they don't pass in env guess it from output_bucket
    env = tibanna_settings.get('env')
    # tibanna provides access to keys based on env and stuff like that
    tibanna = Tibanna(env,
                      ff_keys=data.get('ff_keys'),
                      settings=tibanna_settings)
    workflow_uuid = data['workflow_uuid']
    workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys)

    output_files, pf_meta = handle_processed_files(workflow_info, tibanna)
    assert (output_files)
    assert len(output_files) == 3
    for of in output_files:
        if of['extension'] == '.pairs.gz':
            assert of['secondary_file_extensions'] == ['.pairs.gz.px2']
            assert of['secondary_file_formats'] == ['pairs_px2']
            assert of['extra_files']
        else:
            assert 'secondary_files_extension' not in of
            assert 'secondary_files_formats' not in of

    assert (pf_meta)
    assert len(pf_meta) == 3
    for pf in pf_meta:
        pdict = pf.__dict__
        if pdict['file_format'] == 'pairs':
            assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}]
        else:
            assert 'extra_files' not in pdict
Exemplo n.º 15
0
def parse_fdn_xls(fdn_xls):
    biosamples = read_sheet(fdn_xls, 'Biosample', Biosample_4dn,
                            ['aliases', 'treatments', 'modifications', '*biosource'])
    org_dict = {'dmelanogaster': 'Drosophila melanogaster',
                'mouse': 'Mus musculus',
                'human': 'H**o sapiens'}
    # start with BioSample
    # get list of biosources
    biosource_ids = [sample.biosource for sample in biosamples]
    # while doing this either take info from Biosource sheet, or look up biosource on portal
    biosources = []
    for item in biosource_ids:
        # source name: get biosource cell line
        result = ff.get_metadata(item, ff_env="data", frame="embedded")
        source_name, cell_line = get_source_name(result)
        alias = item
        indiv = result['individual']
        org = result.get('individual').get('organism').get('display_title')
        # check for modifications
        if result.get('modifications'):
            pass
        else:
            mods = None
        biosources.append(Biosource_4dn(alias, source_name, cell_line, org_dict[org], mods))
    # parse treatments

    # next parse FileFastq sheet

    # next parse Experiment sheet(s)
    book = xlrd.open_workbook(fdn_xls)
    sheetnames = [name for name in book.sheet_names() if name.startswith('Experiment') and
                  'Set' not in name and 'Mic' not in name]
Exemplo n.º 16
0
 def __init__(self, key4dn):
     # passed key object stores the key dict in con_key
     self.check = False
     self.key = key4dn.con_key
     # check connection and find user uuid
     # TODO: we should not need try/except, since if me page fails, there is
     # no need to proggress, but the test are failing without this Part
     # make mocked connections and remove try/except
     # is public connection using submit4dn a realistic case?
     try:
         me_page = ff_utils.get_metadata('me', key=self.key)
         self.user = me_page['@id']
         self.email = me_page['email']
         self.check = True
     except:
         print('Can not establish connection, please check your keys')
         me_page = {}
     if not me_page:
         sys.exit(1)
     if me_page.get('submits_for') is not None:
         # get all the labs that the user making the connection submits_for
         self.labs = [l['@id'] for l in me_page['submits_for']]
         # take the first one as default value for the connection - reset in
         # import_data if needed by calling set_lab_award
         self.lab = self.labs[0]
         self.set_award(self.lab)  # set as default first
     else:
         self.labs = None
         self.lab = None
         self.award = None
Exemplo n.º 17
0
def extract_nz_file(acc, auth):
    mapping = {"HindIII": "6", "DpnII": "4", "MboI": "4", "NcoI": "6"}
    exp_resp = ff_utils.get_metadata(acc, key=auth)
    exp_type = exp_resp.get('experiment_type')
    # get enzyme
    nz_num = ""
    nz = exp_resp.get('digestion_enzyme')
    if nz:
        nz_num = mapping.get(nz['display_title'])
    if nz_num:
        pass
    # Soo suggested assigning 6 for Chiapet
    # Burak asked for running all without an NZ with paramter 6
    elif exp_type in [
            'CHIA-pet', 'ChIA-PET', 'micro-C', 'DNase Hi-C', 'TrAC-loop'
    ]:
        nz_num = '6'
    else:
        return (None, None)
    # get organism
    biosample = exp_resp['biosample']
    organisms = list(
        set([
            bs['individual']['organism']['name']
            for bs in biosample['biosource']
        ]))
    chrsize = ''
    if len(organisms) == 1:
        chrsize = chr_size.get(organisms[0])
    # if organism is not available return empty
    if not chrsize:
        print(organisms[0], 'not covered')
        return (None, None)
    # return result if both exist
    return nz_num, chrsize
Exemplo n.º 18
0
def get_status_for_extra_file(event, extra_format):
    if not extra_format:
        return None
    upload_key = event['Records'][0]['s3']['object']['key']
    if upload_key.endswith('html'):
        return False

    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]

    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta and 'extra_files' in meta:
        for exf in meta['extra_files']:
            if parse_formatstr(exf['file_format']) == extra_format:
                return exf.get('status', None)
    return None
Exemplo n.º 19
0
def finalize_user_content(spawner):
    """
    This function is called after the singleuser notebook stops.
    Responsible for:
    - adding date_culled to the TrackingItem given by FF_TRACKING_ID
    """
    # grab this info fresh every time
    err_output = []
    ff_keys = recompute_ff_keys(err_output)

    if not os.environ.get('FF_TRACKING_ID'):
        return
    # get current item
    track_id = os.environ['FF_TRACKING_ID']
    try:
        track_res = ff_utils.get_metadata(track_id, key=ff_keys)
    except:
        pass  # Nothing to do here
    else:
        session = track_res.get('jupyterhub_session')
        if session and isinstance(session, dict):
            session['date_culled'] = datetime.datetime.utcnow().isoformat(
            ) + '+00:00'
            try:
                ff_utils.patch_metadata({'jupyterhub_session': session},
                                        track_id,
                                        key=ff_keys)
            except:
                pass
Exemplo n.º 20
0
 def set_award(self, lab, dontPrompt=True):
     '''Sets the award for the connection for use in import_data
        if dontPrompt is False will ask the User to choose if there
        are more than one award for the connection.lab otherwise
        the first award for the lab will be used
     '''
     self.award = None
     labjson = ff_utils.get_metadata(lab, key=self.key)
     if labjson.get('awards') is not None:
         awards = labjson.get('awards')
         # if don't prompt is active take first lab
         if dontPrompt:
             self.award = awards[0]['@id']
             return
         # if there is one lab return it as lab
         if len(awards) == 1:
             self.award = awards[0]['@id']
             return
         # if there are multiple labs
         achoices = []
         print("Multiple awards for {labname}:".format(labname=lab))
         for i, awd in enumerate(awards):
             ch = str(i + 1)
             achoices.append(ch)
             print("  ({choice}) {awdname}".format(choice=ch,
                                                   awdname=awd['@id']))
         # re try the input until a valid choice is input
         awd_resp = ''
         while awd_resp not in achoices:
             awd_resp = str(
                 input(
                     "Select the award for this session {choices}: ".format(
                         choices=achoices)))
         self.award = awards[int(awd_resp) - 1]['@id']
         return
Exemplo n.º 21
0
def testrun_md5(workflow_name='tibanna_pony', env='webdev'):
    """Creates a random file object with no md5sum/content_md5sum and run md5 workflow.
    It waits for 6 mintues till the workflow run finishes and checks the input file object
    has been updated.
    """
    bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput"
    ff_key = get_authentication_with_server(ff_env='fourfront-' + env)
    newfile = post_random_file(bucket, ff_key)
    uuid = newfile['uuid']
    accession = newfile['accession']
    input_json = {
        "config": {
            "ebs_type": "io1",
            "ebs_iops": 500,
            "s3_access_arn":
            "arn:aws:iam::643366669028:instance-profile/S3_access",
            "ami_id": "ami-cfb14bb5",
            "json_bucket": "4dn-aws-pipeline-run-json",
            "shutdown_min": 30,
            "copy_to_s3": True,
            "launch_instance": True,
            "log_bucket": "tibanna-output",
            "script_url":
            "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/",
            "key_name": "4dn-encode",
            "password": ""
        },
        "_tibanna": {
            "env": "fourfront-webdev",
            "run_type": "md5"
        },
        "parameters": {},
        "app_name":
        "md5",
        "workflow_uuid":
        "c77a117b-9a58-477e-aaa5-291a109a99f6",
        "input_files": [{
            "workflow_argument_name": "input_file",
            "bucket_name": bucket,
            "uuid": uuid,
            "object_key": accession + '.pairs.gz'
        }],
        "output_bucket":
        bucket
    }
    resp = run_workflow(input_json, workflow=workflow_name)
    print(resp)

    # check result
    time.sleep(6 * 60)  # wait for 6 minutes
    filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database')
    content_md5sum = filemeta.get('content_md5sum')
    md5sum = filemeta.get('md5sum')
    if content_md5sum and md5sum:
        print(content_md5sum)
        print(md5sum)
        patch_metadata({'status': 'deleted'}, uuid, key=ff_key)
    else:
        raise Exception('md5 step function run failed')
Exemplo n.º 22
0
def release_files(set_id, list_items, auth, status=None):
    if status:
        item_status = status
    else:
        item_status = ff_utils.get_metadata(set_id, key=auth)['status']
    # bring files to same status as experiments and sets
    if item_status in ['released', 'released to project', 'pre-release']:
        for a_file in list_items:
            it_resp = ff_utils.get_metadata(a_file, key=auth)
            workflow = it_resp.get('workflow_run_outputs')
            # release the wfr that produced the file
            if workflow:
                ff_utils.patch_metadata({"status": item_status},
                                        obj_id=workflow[0]['uuid'],
                                        key=auth)
            ff_utils.patch_metadata({"status": item_status},
                                    obj_id=a_file,
                                    key=auth)
Exemplo n.º 23
0
def get_types_that_can_have_field(auth, field):
    """find items that have the passed in fieldname in their properties
        even if there is currently no value for that field"""
    profiles = get_metadata('/profiles/', auth, add_on='frame=raw')
    types_w_field = []
    for t, j in profiles.items():
        if j['properties'].get(field):
            types_w_field.append(t)
    return types_w_field
Exemplo n.º 24
0
def get_schema_names(con_key):
    schema_name = {}
    profiles = ff_utils.get_metadata('/profiles/', key=con_key, add_on='frame=raw')
    for key, value in profiles.items():
        try:
            schema_name[key] = value['id'].split('/')[-1][:-5]
        except:
            continue
    return schema_name
Exemplo n.º 25
0
def get_format_extension_map(ff_keys):
    try:
        fp_schema = get_metadata("profiles/file_processed.json", key=ff_keys)
        fe_map = fp_schema.get('file_format_file_extension')
    except Exception as e:
        raise Exception(
            "Can't get format-extension map from file_processed schema. %s\n" %
            e)
    return fe_map
Exemplo n.º 26
0
def output_target_for_input_extra(target_inf,
                                  of,
                                  tibanna,
                                  overwrite_input_extra=False):
    extrafileexists = False
    printlog("target_inf = %s" % str(target_inf))  # debugging
    target_inf_meta = ff_utils.get_metadata(target_inf.get('value'),
                                            key=tibanna.ff_keys,
                                            ff_env=tibanna.env,
                                            add_on='frame=object',
                                            check_queue=True)
    target_format = parse_formatstr(of.get('format'))
    if target_inf_meta.get('extra_files'):
        for exf in target_inf_meta.get('extra_files'):
            if parse_formatstr(exf.get('file_format')) == target_format:
                extrafileexists = True
                if overwrite_input_extra:
                    exf['status'] = 'to be uploaded by workflow'
                break
        if not extrafileexists:
            new_extra = {
                'file_format': target_format,
                'status': 'to be uploaded by workflow'
            }
            target_inf_meta['extra_files'].append(new_extra)
    else:
        new_extra = {
            'file_format': target_format,
            'status': 'to be uploaded by workflow'
        }
        target_inf_meta['extra_files'] = [new_extra]
    if overwrite_input_extra or not extrafileexists:
        # first patch metadata
        printlog("extra_files_to_patch: %s" %
                 str(target_inf_meta.get('extra_files')))  # debugging
        ff_utils.patch_metadata(
            {'extra_files': target_inf_meta.get('extra_files')},
            target_inf.get('value'),
            key=tibanna.ff_keys,
            ff_env=tibanna.env)
        # target key
        # NOTE : The target bucket is assume to be the same as output bucket
        # i.e. the bucket for the input file should be the same as the output bucket.
        # which is true if both input and output are processed files.
        orgfile_key = target_inf_meta.get('upload_key')
        orgfile_format = parse_formatstr(target_inf_meta.get('file_format'))
        fe_map = FormatExtensionMap(tibanna.ff_keys)
        printlog("orgfile_key = %s" % orgfile_key)
        printlog("orgfile_format = %s" % orgfile_format)
        printlog("target_format = %s" % target_format)
        target_key = get_extra_file_key(orgfile_format, orgfile_key,
                                        target_format, fe_map)
        return target_key
    else:
        raise Exception(
            "input already has extra: 'User overwrite_input_extra': true")
Exemplo n.º 27
0
def get_chip_files(exp_resp, my_auth):
    files = []
    obj_key = []
    paired = ""
    exp_files = exp_resp['files']
    for a_file in exp_files:
        f_t = []
        o_t = []
        file_resp = ff_utils.get_metadata(a_file['uuid'], my_auth)
        # get pair end no
        pair_end = file_resp.get('paired_end')
        if pair_end == '2':
            paired = 'paired'
            continue
        # get paired file
        paired_with = ""
        relations = file_resp.get('related_files')
        if not relations:
            pass
        else:
            for relation in relations:
                if relation['relationship_type'] == 'paired with':
                    paired = 'paired'
                    paired_with = relation['file']['uuid']
        # decide if data is not paired end reads
        if not paired_with:
            if not paired:
                paired = 'single'
            else:
                if paired != 'single':
                    print('inconsistent fastq pair info')
                    continue
            f_t.append(file_resp['uuid'])
            o_t.append(file_resp['display_title'])
        else:
            f2 = ff_utils.get_metadata(paired_with, my_auth)
            f_t.append(file_resp['uuid'])
            o_t.append(file_resp['display_title'])
            f_t.append(f2['uuid'])
            o_t.append(f2['display_title'])
        files.append(f_t)
        obj_key.append(o_t)
    return files, obj_key, paired
Exemplo n.º 28
0
def test_post_patch(update_ffmeta_event_data_fastqc2):
    updater = FourfrontUpdater(**update_ffmeta_event_data_fastqc2)
    item = next(updater.qc_template_generator())
    item_uuid = item['uuid']
    updater.update_post_items(item_uuid, item, 'quality_metric_fastqc')
    assert 'uuid' in updater.post_items['quality_metric_fastqc'][item_uuid]
    assert updater.post_items['quality_metric_fastqc'][item_uuid]['uuid'] == item_uuid
    updater.create_wfr_qc()
    wfr_qc_uuid = updater.ff_meta.quality_metric
    assert updater.post_items['QualityMetricWorkflowrun'][wfr_qc_uuid]['lab'] == '4dn-dcic-lab'
    updater.post_all()
    updater.update_patch_items(item_uuid, {'Per base sequence content': 'PASS'})
    updater.patch_all()
    res = ff_utils.get_metadata(item_uuid, key=updater.tibanna_settings.ff_keys)
    assert res['Per base sequence content'] == 'PASS'
    updater.update_patch_items(item_uuid, {'status': 'deleted'})
    updater.patch_all()
    res = ff_utils.get_metadata(item_uuid, key=updater.tibanna_settings.ff_keys)
    assert res['status'] == 'deleted'
Exemplo n.º 29
0
def get_item_type(auth, item):
    try:
        return item['@type'].pop(0)
    except (KeyError, TypeError):
        res = get_metadata(item, auth)
        try:
            return res['@type'][0]
        except (AttributeError, KeyError):  # noqa: E722
            print("Can't find a type for item %s" % item)
    return None
Exemplo n.º 30
0
def run_md5(env, accession, uuid):
    tibanna = Tibanna(env=env)
    meta_data = get_metadata(accession, key=tibanna.ff_keys)
    file_name = meta_data['upload_key'].split('/')[-1]

    input_json = make_input(env=env,
                            workflow='md5',
                            object_key=file_name,
                            uuid=uuid)
    return _run_workflow(input_json, accession)
Exemplo n.º 31
0
def get_ontologies(connection, ont_list):
    '''return list of ontology jsons retrieved from server
        ontology jsons are now fully embedded
    '''
    ontologies = []
    if ont_list == 'all':
        ontologies = search_metadata('search/?type=Ontology', connection)
    else:
        ontologies = [get_metadata('ontologys/' + ontology, connection) for ontology in ont_list]
    # removing item not found cases with reporting
    if not isinstance(ontologies, (list, tuple)):
        print("we must not have got ontolgies... bailing")
        import sys
        sys.exit()
    for i, ontology in enumerate(ontologies):
        if 'Ontology' not in ontology['@type']:
            ontologies.pop(i)
    return ontologies
Exemplo n.º 32
0
def main():
    """
    Load a given JSON file with ontology terms inserts to a server using
    the `load_data` endpoint defined in loadxl.
    """
    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('encoded').setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        description="Load Ontology Term Data", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('json_file', help="File containing terms to load")
    parser.add_argument('--env', default='local',
                        help='FF environment to update from. Defaults to local')
    parser.add_argument('--local-key', help='Access key ID if using local')
    parser.add_argument('--local-secret', help='Access key secret if using local')
    args = parser.parse_args()

    # authentication with Fourfront
    if args.env == 'local':
        # prompt access key ID and secret from user
        config_uri = 'development.ini'
        local_id = args.local_key if args.local_key else input('[local access key ID] ')
        local_secret = args.local_secret if args.local_secret else input('[local access key secret] ')
        auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'}
    else:
        config_uri = 'production.ini'
        auth = ff_utils.get_authentication_with_server(None, args.env)

    load_endpoint = '/'.join([auth['server'], 'load_data'])
    logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint)
    json_data = {'config_uri': config_uri, 'itype': 'ontology_term',
                 'overwrite': True, 'iter_response': True}
    with open(args.json_file) as infile:
        json_data['store'] = {'ontology_term': json.load(infile)}
    num_to_load = len(json_data['store']['ontology_term'])
    logger.info('Will attempt to load %s ontology terms to %s'
                % (num_to_load, auth['server']))
    start = datetime.now()
    try:
        # sustained by returning Response.app_iter from loadxl.load_data
        res =  ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST',
                                           timeout=None, json=json_data)
    except Exception as exc:
        logger.error('Error on POST: %s' % str(exc))
    else:
        # process the individual item responses from the generator.
        # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>"
        load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []}
        for val in res.text.split('\n'):
            if val.startswith('POST') or val.startswith('SKIP'):
                prefix_len = 4  # 'POST' or 'SKIP'
            else:
                prefix_len = 5  # 'PATCH' or 'ERROR'
            # this is a bit weird, but we want to split out the POST/PATCH...
            # and also remove ': ' from the value for each message
            cat, msg = val[:prefix_len], val[prefix_len + 2:]
            if not msg:
                continue
            if cat in load_res:
                load_res[cat].append(msg)
        logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s"
                    % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP'])))
        if load_res['ERROR']:
            logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR'])
        if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']):
            logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s"
                         % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH'])))
    logger.info("Finished request in %s" % str(datetime.now() - start))

    # update sysinfo. Don't worry about doing this on local
    if args.env != 'local':
        data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()}
        try:
            found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth)
        except Exception:
            found_info = None

        if found_info:
            ff_utils.patch_metadata(data, found_info['uuid'], key=auth)
        else:
            ff_utils.post_metadata(data, 'sysinfos', key=auth)
        logger.info("Updated sysinfo with name %s" % data['name'])
    logger.info("DONE!")