def main(): # pragma: no cover # initial set up args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) # bucket addresses ff_health = get_metadata('/health', auth) source_bucket = ff_health['file_upload_bucket'] target_bucket = ff_health['processed_file_bucket'] s3 = boto3.resource('s3') # get the uuids for the files query = 'type=FileVistrack' uids = scu.get_item_ids_from_args([query], auth, True) files2copy = [get_metadata(uid, auth).get('upload_key') for uid in uids] for file_key in files2copy: copy_source = {'Bucket': source_bucket, 'Key': file_key} try: # print(file_key + ' cp from ' + source_bucket + ' to ' + target_bucket) s3.meta.client.copy(copy_source, target_bucket, file_key) except Exception: print('Can not find file on source', file_key) continue print('{} file copied'.format(file_key))
def main(): args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) dryrun = not args.dbupdate file_list = scu.get_item_ids_from_args(args.input, auth, args.search) wf_data = get_metadata(args.workflow, auth) for f in file_list: file_info = get_metadata(f, auth) parents = file_info.get('produced_from') if parents: inputs = [] for p in parents: inputs.append(get_metadata(p, auth)) wfr_json = create_wfr_meta_only_json(auth, wf_data, inputs, [file_info]) if dryrun: print('DRY RUN -- will post') print(wfr_json) else: res = post_metadata(wfr_json, 'workflow_run_awsem', auth) # and add a notes_to_tsv to the file patchstatus = add_notes_to_tsv(file_info, auth) print(res) print(patchstatus)
def get_source_sample(self, input_file_uuid): """ Connects to fourfront and get source experiment info as a unique list Takes a single input file uuid. """ pf_source_samples_set = set() inf_uuids = aslist(flatten(input_file_uuid)) for inf_uuid in inf_uuids: infile_meta = get_metadata( inf_uuid, key=self.tbn.ff_keys, ff_env=self.tbn.env, add_on='frame=object&datastore=database') if infile_meta.get('samples'): for exp in infile_meta.get('samples'): exp_obj = get_metadata( exp, key=self.tbn.ff_keys, ff_env=self.tbn.env, add_on='frame=raw&datastore=database') pf_source_samples_set.add(exp_obj['uuid']) if infile_meta.get('source_samples'): # this field is an array of strings, not linkTo's pf_source_samples_set.update(infile_meta.get('source_samples')) return list(pf_source_samples_set)
def test_fastqc(): key = dev_key() data = get_test_json('fastqc.json') fq_uuid = post_new_fastqfile(key=key, upload_file=os.path.join( FILE_DIR, 'fastq/A.R2.fastq.gz')) data['input_files'][0]['uuid'] = fq_uuid api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' res = ff_utils.get_metadata(fq_uuid, key=key, ff_env=DEV_ENV, check_queue=True) ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key) assert 'quality_metric' in res assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(wfr_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['run_status'] == 'complete' assert 'quality_metric' in res
def extract_file_info(obj_id, arg_name, env, rename=[]): auth = ff_utils.get_authentication_with_server({}, ff_env=env) my_s3_util = s3Utils(env=env) raw_bucket = my_s3_util.raw_file_bucket out_bucket = my_s3_util.outfile_bucket """Creates the formatted dictionary for files. """ # start a dictionary template = {"workflow_argument_name": arg_name} if rename: change_from = rename[0] change_to = rename[1] # if it is list of items, change the structure if isinstance(obj_id, list): object_key = [] uuid = [] buckets = [] for obj in obj_id: metadata = ff_utils.get_metadata(obj, key=auth) object_key.append(metadata['display_title']) uuid.append(metadata['uuid']) # get the bucket if 'FileProcessed' in metadata['@type']: my_bucket = out_bucket else: # covers cases of FileFastq, FileReference, FileMicroscopy my_bucket = raw_bucket buckets.append(my_bucket) # check bucket consistency try: assert len(list(set(buckets))) == 1 except AssertionError: print('Files from different buckets', obj_id) return template['object_key'] = object_key template['uuid'] = uuid template['bucket_name'] = buckets[0] if rename: template['rename'] = [ i.replace(change_from, change_to) for i in template['object_key'] ] # if obj_id is a string else: metadata = ff_utils.get_metadata(obj_id, key=auth) template['object_key'] = metadata['display_title'] template['uuid'] = metadata['uuid'] # get the bucket if 'FileProcessed' in metadata['@type']: my_bucket = out_bucket else: # covers cases of FileFastq, FileReference, FileMicroscopy my_bucket = raw_bucket template['bucket_name'] = my_bucket if rename: template['rename'] = template['object_key'].replace( change_from, change_to) return template
def add_preliminary_processed_files(item_id, list_pc, auth, run_type="hic"): titles = { "hic": "HiC Processing Pipeline - Preliminary Files", "repliseq": "Repli-Seq Pipeline - Preliminary Files", 'chip': "ENCODE ChIP-Seq Pipeline - Preliminary Files", 'atac': "ENCODE ATAC-Seq Pipeline - Preliminary Files" } if run_type in titles: pc_set_title = titles[run_type] else: pc_set_title = run_type resp = ff_utils.get_metadata(item_id, key=auth) # check if this items are in processed files field # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one ex_pc = resp.get('processed_files') if ex_pc: ex_pc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]] for a in ex_pc] ex_pc_ids = [a for i in ex_pc_ids for a in i] for i in list_pc: if i in ex_pc_ids: print('Error - Cannot add files to pc') print(i, 'is already in processed files') return # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one ex_opc = resp.get('other_processed_files') if ex_opc: # check the titles all_existing_titles = [a['title'] for a in ex_opc] if pc_set_title in all_existing_titles: print('Error - Cannot add files to opc') print('The same title already in other processed files') return # check the individual files ex_opc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]] for i in ex_opc for a in i['files']] ex_opc_ids = [a for i in ex_opc_ids for a in i] for i in list_pc: if i in ex_opc_ids: print('Error - Cannot add files to opc') print(i, 'is already in other processed files') return # we need raw to get the existing piece, to patch back with the new ones patch_data = ff_utils.get_metadata( item_id, key=auth, add_on='frame=raw').get('other_processed_files') if not patch_data: patch_data = [] new_data = {'title': pc_set_title, 'type': 'preliminary', 'files': list_pc} patch_data.append(new_data) patch = {'other_processed_files': patch_data} ff_utils.patch_metadata(patch, obj_id=item_id, key=auth)
def get_item_if_you_can(auth, value, itype=None): try: value.get('uuid') return value except AttributeError: svalue = str(value) item = get_metadata(svalue, auth) try: item.get('uuid') return item except AttributeError: if itype is not None: svalue = '/' + itype + svalue + '/?datastore=database' return get_metadata(svalue, auth)
def get_format_extension_map(ff_keys): try: fp_schema = get_metadata("profiles/file_processed.json", key=ff_keys) fe_map = fp_schema.get('file_format_file_extension') fp_schema2 = get_metadata("profiles/file_fastq.json", key=ff_keys) fe_map2 = fp_schema2.get('file_format_file_extension') fp_schema3 = get_metadata("profiles/file_reference.json", key=ff_keys) fe_map3 = fp_schema3.get('file_format_file_extension') fe_map.update(fe_map2) fe_map.update(fe_map3) except Exception as e: raise Exception( "Can't get format-extension map from file_processed schema. %s\n" % e) return fe_map
def get_extra_file_format(event): '''if the file extension matches the regular file format, returns None if it matches one of the format of an extra file, returns that format (e.g. 'pairs_px2' ''' # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) upload_key = event['Records'][0]['s3']['object']['key'] uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] extension = object_key.replace(accession, '') tibanna = Tibanna(env=env) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta: file_format = meta.get('file_format') fe_map = get_format_extension_map(tibanna.ff_keys) file_extension = fe_map.get(file_format) if extension == file_extension: return None else: for extra in meta.get('extra_files', []): extra_format = extra.get('file_format') extra_extension = fe_map.get(extra_format) if extension == extra_extension: return extra_format raise Exception("file extension not matching") else: raise Exception("Cannot get input metadata")
def run(keypairs_file, schema_name): assert os.path.isfile(str(keypairs_file)) try: key = FDN_Key(keypairs_file, "default") except Exception as e: print(e) print("key error") raise e try: connection = FDN_Connection(key) except Exception as e: print(e) print("connection error") raise e try: response = ff_utils.get_metadata("/" + schema_name, key=connection.key, frame=None) except Exception as e: print(e) print("post error") raise e print(response)
def get(cls, uuid, key, ff_env=None, check_queue=False, file_format=None): data = get_metadata(uuid, key=key, ff_env=ff_env, add_on='frame=object', check_queue=check_queue) if type(data) is not dict: raise Exception("unable to find object with unique key of %s" % uuid) if 'FileProcessed' not in data.get('@type', {}): raise Exception( "you can only load ProcessedFiles into this object") if 'extra_files' not in data: return None if len(data['extra_files']) == 1: if not file_format or file_format == data['extra_files'][0][ 'file_format']: return ExtraFileMetadata(**data['extra_files'][0]) else: raise Exception("extra file format not matching") elif not file_format: raise Exception("Two or more extra files - specify file format") for ef in data['extra_files']: if ef['file_format'] == file_format: return ExtraFileMetadata(ef) raise Exception("no matching format for extra file")
def test_bed2beddb(): key = dev_key() # prep new File data = get_test_json('bedtobeddb.json') bed_content = b'chr1\t1000000\t2000000\tregion1' gzipped_content = gzip.compress(bed_content) bed_uuid = post_new_processedfile(file_format='bed', key=key, upload_content=gzipped_content, extension='bed.gz') data['input_files'][0]['uuid'] = bed_uuid api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(bed_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['extra_files'] assert len(res['extra_files']) == 1 extra = res['extra_files'][0] assert extra['file_format']['display_title'] == 'beddb' ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key) ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
def get_status(event): print("is status uploading: %s" % event) upload_key = event['Records'][0]['s3']['object']['key'] if upload_key.endswith('html'): return False uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) try: tibanna = Tibanna(env=env) except Exception as e: raise TibannaStartException("%s" % e) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta: return meta.get('status', '') else: return ''
def test_handle_processed_files(run_awsem_event_data_secondary_files): data = run_awsem_event_data_secondary_files tibanna_settings = data.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env') # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=data.get('ff_keys'), settings=tibanna_settings) workflow_uuid = data['workflow_uuid'] workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys) output_files, pf_meta = handle_processed_files(workflow_info, tibanna) assert (output_files) assert len(output_files) == 3 for of in output_files: if of['extension'] == '.pairs.gz': assert of['secondary_file_extensions'] == ['.pairs.gz.px2'] assert of['secondary_file_formats'] == ['pairs_px2'] assert of['extra_files'] else: assert 'secondary_files_extension' not in of assert 'secondary_files_formats' not in of assert (pf_meta) assert len(pf_meta) == 3 for pf in pf_meta: pdict = pf.__dict__ if pdict['file_format'] == 'pairs': assert pdict['extra_files'] == [{'file_format': 'pairs_px2'}] else: assert 'extra_files' not in pdict
def parse_fdn_xls(fdn_xls): biosamples = read_sheet(fdn_xls, 'Biosample', Biosample_4dn, ['aliases', 'treatments', 'modifications', '*biosource']) org_dict = {'dmelanogaster': 'Drosophila melanogaster', 'mouse': 'Mus musculus', 'human': 'H**o sapiens'} # start with BioSample # get list of biosources biosource_ids = [sample.biosource for sample in biosamples] # while doing this either take info from Biosource sheet, or look up biosource on portal biosources = [] for item in biosource_ids: # source name: get biosource cell line result = ff.get_metadata(item, ff_env="data", frame="embedded") source_name, cell_line = get_source_name(result) alias = item indiv = result['individual'] org = result.get('individual').get('organism').get('display_title') # check for modifications if result.get('modifications'): pass else: mods = None biosources.append(Biosource_4dn(alias, source_name, cell_line, org_dict[org], mods)) # parse treatments # next parse FileFastq sheet # next parse Experiment sheet(s) book = xlrd.open_workbook(fdn_xls) sheetnames = [name for name in book.sheet_names() if name.startswith('Experiment') and 'Set' not in name and 'Mic' not in name]
def __init__(self, key4dn): # passed key object stores the key dict in con_key self.check = False self.key = key4dn.con_key # check connection and find user uuid # TODO: we should not need try/except, since if me page fails, there is # no need to proggress, but the test are failing without this Part # make mocked connections and remove try/except # is public connection using submit4dn a realistic case? try: me_page = ff_utils.get_metadata('me', key=self.key) self.user = me_page['@id'] self.email = me_page['email'] self.check = True except: print('Can not establish connection, please check your keys') me_page = {} if not me_page: sys.exit(1) if me_page.get('submits_for') is not None: # get all the labs that the user making the connection submits_for self.labs = [l['@id'] for l in me_page['submits_for']] # take the first one as default value for the connection - reset in # import_data if needed by calling set_lab_award self.lab = self.labs[0] self.set_award(self.lab) # set as default first else: self.labs = None self.lab = None self.award = None
def extract_nz_file(acc, auth): mapping = {"HindIII": "6", "DpnII": "4", "MboI": "4", "NcoI": "6"} exp_resp = ff_utils.get_metadata(acc, key=auth) exp_type = exp_resp.get('experiment_type') # get enzyme nz_num = "" nz = exp_resp.get('digestion_enzyme') if nz: nz_num = mapping.get(nz['display_title']) if nz_num: pass # Soo suggested assigning 6 for Chiapet # Burak asked for running all without an NZ with paramter 6 elif exp_type in [ 'CHIA-pet', 'ChIA-PET', 'micro-C', 'DNase Hi-C', 'TrAC-loop' ]: nz_num = '6' else: return (None, None) # get organism biosample = exp_resp['biosample'] organisms = list( set([ bs['individual']['organism']['name'] for bs in biosample['biosource'] ])) chrsize = '' if len(organisms) == 1: chrsize = chr_size.get(organisms[0]) # if organism is not available return empty if not chrsize: print(organisms[0], 'not covered') return (None, None) # return result if both exist return nz_num, chrsize
def get_status_for_extra_file(event, extra_format): if not extra_format: return None upload_key = event['Records'][0]['s3']['object']['key'] if upload_key.endswith('html'): return False uuid, object_key = upload_key.split('/') accession = object_key.split('.')[0] # guess env from bucket name bucket = event['Records'][0]['s3']['bucket']['name'] env = '-'.join(bucket.split('-')[1:3]) try: tibanna = Tibanna(env=env) except Exception as e: raise TibannaStartException("%s" % e) meta = get_metadata(accession, key=tibanna.ff_keys, ff_env=env, add_on='frame=object', check_queue=True) if meta and 'extra_files' in meta: for exf in meta['extra_files']: if parse_formatstr(exf['file_format']) == extra_format: return exf.get('status', None) return None
def finalize_user_content(spawner): """ This function is called after the singleuser notebook stops. Responsible for: - adding date_culled to the TrackingItem given by FF_TRACKING_ID """ # grab this info fresh every time err_output = [] ff_keys = recompute_ff_keys(err_output) if not os.environ.get('FF_TRACKING_ID'): return # get current item track_id = os.environ['FF_TRACKING_ID'] try: track_res = ff_utils.get_metadata(track_id, key=ff_keys) except: pass # Nothing to do here else: session = track_res.get('jupyterhub_session') if session and isinstance(session, dict): session['date_culled'] = datetime.datetime.utcnow().isoformat( ) + '+00:00' try: ff_utils.patch_metadata({'jupyterhub_session': session}, track_id, key=ff_keys) except: pass
def set_award(self, lab, dontPrompt=True): '''Sets the award for the connection for use in import_data if dontPrompt is False will ask the User to choose if there are more than one award for the connection.lab otherwise the first award for the lab will be used ''' self.award = None labjson = ff_utils.get_metadata(lab, key=self.key) if labjson.get('awards') is not None: awards = labjson.get('awards') # if don't prompt is active take first lab if dontPrompt: self.award = awards[0]['@id'] return # if there is one lab return it as lab if len(awards) == 1: self.award = awards[0]['@id'] return # if there are multiple labs achoices = [] print("Multiple awards for {labname}:".format(labname=lab)) for i, awd in enumerate(awards): ch = str(i + 1) achoices.append(ch) print(" ({choice}) {awdname}".format(choice=ch, awdname=awd['@id'])) # re try the input until a valid choice is input awd_resp = '' while awd_resp not in achoices: awd_resp = str( input( "Select the award for this session {choices}: ".format( choices=achoices))) self.award = awards[int(awd_resp) - 1]['@id'] return
def testrun_md5(workflow_name='tibanna_pony', env='webdev'): """Creates a random file object with no md5sum/content_md5sum and run md5 workflow. It waits for 6 mintues till the workflow run finishes and checks the input file object has been updated. """ bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput" ff_key = get_authentication_with_server(ff_env='fourfront-' + env) newfile = post_random_file(bucket, ff_key) uuid = newfile['uuid'] accession = newfile['accession'] input_json = { "config": { "ebs_type": "io1", "ebs_iops": 500, "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", "ami_id": "ami-cfb14bb5", "json_bucket": "4dn-aws-pipeline-run-json", "shutdown_min": 30, "copy_to_s3": True, "launch_instance": True, "log_bucket": "tibanna-output", "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", "key_name": "4dn-encode", "password": "" }, "_tibanna": { "env": "fourfront-webdev", "run_type": "md5" }, "parameters": {}, "app_name": "md5", "workflow_uuid": "c77a117b-9a58-477e-aaa5-291a109a99f6", "input_files": [{ "workflow_argument_name": "input_file", "bucket_name": bucket, "uuid": uuid, "object_key": accession + '.pairs.gz' }], "output_bucket": bucket } resp = run_workflow(input_json, workflow=workflow_name) print(resp) # check result time.sleep(6 * 60) # wait for 6 minutes filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database') content_md5sum = filemeta.get('content_md5sum') md5sum = filemeta.get('md5sum') if content_md5sum and md5sum: print(content_md5sum) print(md5sum) patch_metadata({'status': 'deleted'}, uuid, key=ff_key) else: raise Exception('md5 step function run failed')
def release_files(set_id, list_items, auth, status=None): if status: item_status = status else: item_status = ff_utils.get_metadata(set_id, key=auth)['status'] # bring files to same status as experiments and sets if item_status in ['released', 'released to project', 'pre-release']: for a_file in list_items: it_resp = ff_utils.get_metadata(a_file, key=auth) workflow = it_resp.get('workflow_run_outputs') # release the wfr that produced the file if workflow: ff_utils.patch_metadata({"status": item_status}, obj_id=workflow[0]['uuid'], key=auth) ff_utils.patch_metadata({"status": item_status}, obj_id=a_file, key=auth)
def get_types_that_can_have_field(auth, field): """find items that have the passed in fieldname in their properties even if there is currently no value for that field""" profiles = get_metadata('/profiles/', auth, add_on='frame=raw') types_w_field = [] for t, j in profiles.items(): if j['properties'].get(field): types_w_field.append(t) return types_w_field
def get_schema_names(con_key): schema_name = {} profiles = ff_utils.get_metadata('/profiles/', key=con_key, add_on='frame=raw') for key, value in profiles.items(): try: schema_name[key] = value['id'].split('/')[-1][:-5] except: continue return schema_name
def get_format_extension_map(ff_keys): try: fp_schema = get_metadata("profiles/file_processed.json", key=ff_keys) fe_map = fp_schema.get('file_format_file_extension') except Exception as e: raise Exception( "Can't get format-extension map from file_processed schema. %s\n" % e) return fe_map
def output_target_for_input_extra(target_inf, of, tibanna, overwrite_input_extra=False): extrafileexists = False printlog("target_inf = %s" % str(target_inf)) # debugging target_inf_meta = ff_utils.get_metadata(target_inf.get('value'), key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object', check_queue=True) target_format = parse_formatstr(of.get('format')) if target_inf_meta.get('extra_files'): for exf in target_inf_meta.get('extra_files'): if parse_formatstr(exf.get('file_format')) == target_format: extrafileexists = True if overwrite_input_extra: exf['status'] = 'to be uploaded by workflow' break if not extrafileexists: new_extra = { 'file_format': target_format, 'status': 'to be uploaded by workflow' } target_inf_meta['extra_files'].append(new_extra) else: new_extra = { 'file_format': target_format, 'status': 'to be uploaded by workflow' } target_inf_meta['extra_files'] = [new_extra] if overwrite_input_extra or not extrafileexists: # first patch metadata printlog("extra_files_to_patch: %s" % str(target_inf_meta.get('extra_files'))) # debugging ff_utils.patch_metadata( {'extra_files': target_inf_meta.get('extra_files')}, target_inf.get('value'), key=tibanna.ff_keys, ff_env=tibanna.env) # target key # NOTE : The target bucket is assume to be the same as output bucket # i.e. the bucket for the input file should be the same as the output bucket. # which is true if both input and output are processed files. orgfile_key = target_inf_meta.get('upload_key') orgfile_format = parse_formatstr(target_inf_meta.get('file_format')) fe_map = FormatExtensionMap(tibanna.ff_keys) printlog("orgfile_key = %s" % orgfile_key) printlog("orgfile_format = %s" % orgfile_format) printlog("target_format = %s" % target_format) target_key = get_extra_file_key(orgfile_format, orgfile_key, target_format, fe_map) return target_key else: raise Exception( "input already has extra: 'User overwrite_input_extra': true")
def get_chip_files(exp_resp, my_auth): files = [] obj_key = [] paired = "" exp_files = exp_resp['files'] for a_file in exp_files: f_t = [] o_t = [] file_resp = ff_utils.get_metadata(a_file['uuid'], my_auth) # get pair end no pair_end = file_resp.get('paired_end') if pair_end == '2': paired = 'paired' continue # get paired file paired_with = "" relations = file_resp.get('related_files') if not relations: pass else: for relation in relations: if relation['relationship_type'] == 'paired with': paired = 'paired' paired_with = relation['file']['uuid'] # decide if data is not paired end reads if not paired_with: if not paired: paired = 'single' else: if paired != 'single': print('inconsistent fastq pair info') continue f_t.append(file_resp['uuid']) o_t.append(file_resp['display_title']) else: f2 = ff_utils.get_metadata(paired_with, my_auth) f_t.append(file_resp['uuid']) o_t.append(file_resp['display_title']) f_t.append(f2['uuid']) o_t.append(f2['display_title']) files.append(f_t) obj_key.append(o_t) return files, obj_key, paired
def test_post_patch(update_ffmeta_event_data_fastqc2): updater = FourfrontUpdater(**update_ffmeta_event_data_fastqc2) item = next(updater.qc_template_generator()) item_uuid = item['uuid'] updater.update_post_items(item_uuid, item, 'quality_metric_fastqc') assert 'uuid' in updater.post_items['quality_metric_fastqc'][item_uuid] assert updater.post_items['quality_metric_fastqc'][item_uuid]['uuid'] == item_uuid updater.create_wfr_qc() wfr_qc_uuid = updater.ff_meta.quality_metric assert updater.post_items['QualityMetricWorkflowrun'][wfr_qc_uuid]['lab'] == '4dn-dcic-lab' updater.post_all() updater.update_patch_items(item_uuid, {'Per base sequence content': 'PASS'}) updater.patch_all() res = ff_utils.get_metadata(item_uuid, key=updater.tibanna_settings.ff_keys) assert res['Per base sequence content'] == 'PASS' updater.update_patch_items(item_uuid, {'status': 'deleted'}) updater.patch_all() res = ff_utils.get_metadata(item_uuid, key=updater.tibanna_settings.ff_keys) assert res['status'] == 'deleted'
def get_item_type(auth, item): try: return item['@type'].pop(0) except (KeyError, TypeError): res = get_metadata(item, auth) try: return res['@type'][0] except (AttributeError, KeyError): # noqa: E722 print("Can't find a type for item %s" % item) return None
def run_md5(env, accession, uuid): tibanna = Tibanna(env=env) meta_data = get_metadata(accession, key=tibanna.ff_keys) file_name = meta_data['upload_key'].split('/')[-1] input_json = make_input(env=env, workflow='md5', object_key=file_name, uuid=uuid) return _run_workflow(input_json, accession)
def get_ontologies(connection, ont_list): '''return list of ontology jsons retrieved from server ontology jsons are now fully embedded ''' ontologies = [] if ont_list == 'all': ontologies = search_metadata('search/?type=Ontology', connection) else: ontologies = [get_metadata('ontologys/' + ontology, connection) for ontology in ont_list] # removing item not found cases with reporting if not isinstance(ontologies, (list, tuple)): print("we must not have got ontolgies... bailing") import sys sys.exit() for i, ontology in enumerate(ontologies): if 'Ontology' not in ontology['@type']: ontologies.pop(i) return ontologies
def main(): """ Load a given JSON file with ontology terms inserts to a server using the `load_data` endpoint defined in loadxl. """ logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('encoded').setLevel(logging.INFO) parser = argparse.ArgumentParser( description="Load Ontology Term Data", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('json_file', help="File containing terms to load") parser.add_argument('--env', default='local', help='FF environment to update from. Defaults to local') parser.add_argument('--local-key', help='Access key ID if using local') parser.add_argument('--local-secret', help='Access key secret if using local') args = parser.parse_args() # authentication with Fourfront if args.env == 'local': # prompt access key ID and secret from user config_uri = 'development.ini' local_id = args.local_key if args.local_key else input('[local access key ID] ') local_secret = args.local_secret if args.local_secret else input('[local access key secret] ') auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'} else: config_uri = 'production.ini' auth = ff_utils.get_authentication_with_server(None, args.env) load_endpoint = '/'.join([auth['server'], 'load_data']) logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint) json_data = {'config_uri': config_uri, 'itype': 'ontology_term', 'overwrite': True, 'iter_response': True} with open(args.json_file) as infile: json_data['store'] = {'ontology_term': json.load(infile)} num_to_load = len(json_data['store']['ontology_term']) logger.info('Will attempt to load %s ontology terms to %s' % (num_to_load, auth['server'])) start = datetime.now() try: # sustained by returning Response.app_iter from loadxl.load_data res = ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST', timeout=None, json=json_data) except Exception as exc: logger.error('Error on POST: %s' % str(exc)) else: # process the individual item responses from the generator. # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>" load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []} for val in res.text.split('\n'): if val.startswith('POST') or val.startswith('SKIP'): prefix_len = 4 # 'POST' or 'SKIP' else: prefix_len = 5 # 'PATCH' or 'ERROR' # this is a bit weird, but we want to split out the POST/PATCH... # and also remove ': ' from the value for each message cat, msg = val[:prefix_len], val[prefix_len + 2:] if not msg: continue if cat in load_res: load_res[cat].append(msg) logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s" % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP']))) if load_res['ERROR']: logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR']) if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']): logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s" % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH']))) logger.info("Finished request in %s" % str(datetime.now() - start)) # update sysinfo. Don't worry about doing this on local if args.env != 'local': data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()} try: found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth) except Exception: found_info = None if found_info: ff_utils.patch_metadata(data, found_info['uuid'], key=auth) else: ff_utils.post_metadata(data, 'sysinfos', key=auth) logger.info("Updated sysinfo with name %s" % data['name']) logger.info("DONE!")