def finalize_user_content(spawner): """ This function is called after the singleuser notebook stops. Responsible for: - adding date_culled to the TrackingItem given by FF_TRACKING_ID """ # grab this info fresh every time err_output = [] ff_keys = recompute_ff_keys(err_output) if not os.environ.get('FF_TRACKING_ID'): return # get current item track_id = os.environ['FF_TRACKING_ID'] try: track_res = ff_utils.get_metadata(track_id, key=ff_keys) except: pass # Nothing to do here else: session = track_res.get('jupyterhub_session') if session and isinstance(session, dict): session['date_culled'] = datetime.datetime.utcnow().isoformat( ) + '+00:00' try: ff_utils.patch_metadata({'jupyterhub_session': session}, track_id, key=ff_keys) except: pass
def test_fastqc(): key = dev_key() data = get_test_json('fastqc.json') fq_uuid = post_new_fastqfile(key=key, upload_file=os.path.join( FILE_DIR, 'fastq/A.R2.fastq.gz')) data['input_files'][0]['uuid'] = fq_uuid api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' res = ff_utils.get_metadata(fq_uuid, key=key, ff_env=DEV_ENV, check_queue=True) ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key) assert 'quality_metric' in res assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(wfr_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['run_status'] == 'complete' assert 'quality_metric' in res
def patch_jsons(auth, to_patch): for item in to_patch: uid = item.get('uuid') try: patch_metadata(item, uid, auth) except Exception as e: print(e)
def test_bed2beddb(): key = dev_key() # prep new File data = get_test_json('bedtobeddb.json') bed_content = b'chr1\t1000000\t2000000\tregion1' gzipped_content = gzip.compress(bed_content) bed_uuid = post_new_processedfile(file_format='bed', key=key, upload_content=gzipped_content, extension='bed.gz') data['input_files'][0]['uuid'] = bed_uuid api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(bed_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['extra_files'] assert len(res['extra_files']) == 1 extra = res['extra_files'][0] assert extra['file_format']['display_title'] == 'beddb' ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key) ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
def testrun_md5(workflow_name='tibanna_pony', env='webdev'): """Creates a random file object with no md5sum/content_md5sum and run md5 workflow. It waits for 6 mintues till the workflow run finishes and checks the input file object has been updated. """ bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput" ff_key = get_authentication_with_server(ff_env='fourfront-' + env) newfile = post_random_file(bucket, ff_key) uuid = newfile['uuid'] accession = newfile['accession'] input_json = { "config": { "ebs_type": "io1", "ebs_iops": 500, "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", "ami_id": "ami-cfb14bb5", "json_bucket": "4dn-aws-pipeline-run-json", "shutdown_min": 30, "copy_to_s3": True, "launch_instance": True, "log_bucket": "tibanna-output", "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", "key_name": "4dn-encode", "password": "" }, "_tibanna": { "env": "fourfront-webdev", "run_type": "md5" }, "parameters": {}, "app_name": "md5", "workflow_uuid": "c77a117b-9a58-477e-aaa5-291a109a99f6", "input_files": [{ "workflow_argument_name": "input_file", "bucket_name": bucket, "uuid": uuid, "object_key": accession + '.pairs.gz' }], "output_bucket": bucket } resp = run_workflow(input_json, workflow=workflow_name) print(resp) # check result time.sleep(6 * 60) # wait for 6 minutes filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database') content_md5sum = filemeta.get('content_md5sum') md5sum = filemeta.get('md5sum') if content_md5sum and md5sum: print(content_md5sum) print(md5sum) patch_metadata({'status': 'deleted'}, uuid, key=ff_key) else: raise Exception('md5 step function run failed')
def output_target_for_input_extra(target_inf, of, tibanna, overwrite_input_extra=False): extrafileexists = False printlog("target_inf = %s" % str(target_inf)) # debugging target_inf_meta = ff_utils.get_metadata(target_inf.get('value'), key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object', check_queue=True) target_format = parse_formatstr(of.get('format')) if target_inf_meta.get('extra_files'): for exf in target_inf_meta.get('extra_files'): if parse_formatstr(exf.get('file_format')) == target_format: extrafileexists = True if overwrite_input_extra: exf['status'] = 'to be uploaded by workflow' break if not extrafileexists: new_extra = { 'file_format': target_format, 'status': 'to be uploaded by workflow' } target_inf_meta['extra_files'].append(new_extra) else: new_extra = { 'file_format': target_format, 'status': 'to be uploaded by workflow' } target_inf_meta['extra_files'] = [new_extra] if overwrite_input_extra or not extrafileexists: # first patch metadata printlog("extra_files_to_patch: %s" % str(target_inf_meta.get('extra_files'))) # debugging ff_utils.patch_metadata( {'extra_files': target_inf_meta.get('extra_files')}, target_inf.get('value'), key=tibanna.ff_keys, ff_env=tibanna.env) # target key # NOTE : The target bucket is assume to be the same as output bucket # i.e. the bucket for the input file should be the same as the output bucket. # which is true if both input and output are processed files. orgfile_key = target_inf_meta.get('upload_key') orgfile_format = parse_formatstr(target_inf_meta.get('file_format')) fe_map = FormatExtensionMap(tibanna.ff_keys) printlog("orgfile_key = %s" % orgfile_key) printlog("orgfile_format = %s" % orgfile_format) printlog("target_format = %s" % target_format) target_key = get_extra_file_key(orgfile_format, orgfile_key, target_format, fe_map) return target_key else: raise Exception( "input already has extra: 'User overwrite_input_extra': true")
def test_pseudo_run(testapp, input_json): # this test can be problematic; uncomment the following line to disable it # assert False res = testapp.post_json('/WorkflowRun/pseudo-run', input_json) assert(res) # cleanup output = json.loads(res.json['output']) patch_metadata({'status':'deleted'}, output['ff_meta']['uuid'], ff_env='fourfront-webdev')
def add_preliminary_processed_files(item_id, list_pc, auth, run_type="hic"): titles = { "hic": "HiC Processing Pipeline - Preliminary Files", "repliseq": "Repli-Seq Pipeline - Preliminary Files", 'chip': "ENCODE ChIP-Seq Pipeline - Preliminary Files", 'atac': "ENCODE ATAC-Seq Pipeline - Preliminary Files" } if run_type in titles: pc_set_title = titles[run_type] else: pc_set_title = run_type resp = ff_utils.get_metadata(item_id, key=auth) # check if this items are in processed files field # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one ex_pc = resp.get('processed_files') if ex_pc: ex_pc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]] for a in ex_pc] ex_pc_ids = [a for i in ex_pc_ids for a in i] for i in list_pc: if i in ex_pc_ids: print('Error - Cannot add files to pc') print(i, 'is already in processed files') return # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one ex_opc = resp.get('other_processed_files') if ex_opc: # check the titles all_existing_titles = [a['title'] for a in ex_opc] if pc_set_title in all_existing_titles: print('Error - Cannot add files to opc') print('The same title already in other processed files') return # check the individual files ex_opc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]] for i in ex_opc for a in i['files']] ex_opc_ids = [a for i in ex_opc_ids for a in i] for i in list_pc: if i in ex_opc_ids: print('Error - Cannot add files to opc') print(i, 'is already in other processed files') return # we need raw to get the existing piece, to patch back with the new ones patch_data = ff_utils.get_metadata( item_id, key=auth, add_on='frame=raw').get('other_processed_files') if not patch_data: patch_data = [] new_data = {'title': pc_set_title, 'type': 'preliminary', 'files': list_pc} patch_data.append(new_data) patch = {'other_processed_files': patch_data} ff_utils.patch_metadata(patch, obj_id=item_id, key=auth)
def patch_and_report(auth, patch_d, skipped, uuid2patch, dryrun): # report and patch if dryrun: print('DRY RUN - nothing will be patched to database') if skipped: print('WARNING! - SKIPPING for', uuid2patch) for f, v in skipped.items(): print('Field: %s\tHAS: %s\tNOT ADDED: %s' % (f, v['new'], v['old'])) if not patch_d: print('NOTHING TO PATCH - ALL DONE!') else: print('PATCHING -', uuid2patch) for f, v in patch_d.items(): print(f, '\t', v) if not dryrun: # do the patch res = patch_metadata(patch_d, uuid2patch, auth) if res['status'] == 'success': print("SUCCESS!") return True else: print("FAILED TO PATCH", uuid2patch, "RESPONSE STATUS", res['status'], res['description']) return False return True
def main(): # pragma: no cover start = datetime.now() print(str(start)) args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) # assumes a single line corresponds to json for single term if not args.dbupdate: print("DRY RUN - use --dbupdate to update the database") with open(args.infile) as items: for i in items: [iid, payload] = [t.strip() for t in i.split('\t')] payload = json.loads(payload) if args.dbupdate: e = patch_metadata(payload, iid, auth) else: print("DRY RUN\n\tPATCH: ", iid, " TO\n", payload) e = {'status': 'success'} status = e.get('status') if status and status == 'success': print(status) else: print('FAILED', e) end = datetime.now() print("FINISHED - START: ", str(start), "\tEND: ", str(end))
def main(): args = get_args() try: auth = ff.get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print("Working on {}".format(auth.get('server'))) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) seen = [] failed = [] for itemid in itemids: print("Touching ", itemid) if args.dbupdate: try: res = ff.patch_metadata({}, itemid, auth) print(res.get('status')) if res.get('status') == 'success': seen.append(itemid) except Exception: print(itemid, ' failed to patch') failed.append(itemid) continue else: print('dry run!') for i in seen: print(i) print("Failures") for f in failed: print(f)
def release_files(set_id, list_items, auth, status=None): if status: item_status = status else: item_status = ff_utils.get_metadata(set_id, key=auth)['status'] # bring files to same status as experiments and sets if item_status in ['released', 'released to project', 'pre-release']: for a_file in list_items: it_resp = ff_utils.get_metadata(a_file, key=auth) workflow = it_resp.get('workflow_run_outputs') # release the wfr that produced the file if workflow: ff_utils.patch_metadata({"status": item_status}, obj_id=workflow[0]['uuid'], key=auth) ff_utils.patch_metadata({"status": item_status}, obj_id=a_file, key=auth)
def patch(self, key, fields=None): if fields: patch_json = { k: v for k, v in self.as_dict().items() if k in fields } else: patch_json = self.as_dict() print(patch_json) return patch_metadata(patch_json, key=key, add_on='force_md5')
def test_output_target_for_input_extra(): tibanna = Tibanna('fourfront-webdev', settings={"run_type": "bedGraphToBigWig", "env": "fourfront-webdev"}) target_inf = {'workflow_argument_name': 'bgfile', 'value': '83a80cf8-ca2c-421a-bee9-118bd0572424'} of = {'format': 'bw'} ff_utils.patch_metadata({'extra_files': []}, '83a80cf8-ca2c-421a-bee9-118bd0572424', key=tibanna.ff_keys) time.sleep(10) target_key = output_target_for_input_extra(target_inf, of, tibanna) assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw' with pytest.raises(Exception) as expinfo: target_key = output_target_for_input_extra(target_inf, of, tibanna) assert "input already has extra: 'User overwrite_input_extra'" in str(expinfo.value) target_key = output_target_for_input_extra(target_inf, of, tibanna, True) assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'
def test_bed2beddb_opendata(): key = dev_key() # prep new File data = get_test_json('bedtobeddb_opendata.json') #bed_content = b'chr1\t1000000\t2000000\tregion1' #gzipped_content = gzip.compress(bed_content) #bed_uuid = post_new_processedfile(file_format='bed', key=key, upload_content=gzipped_content, extension='bed.gz') #data['input_files'][0]['uuid'] = bed_uuid bed_uuid = data['input_files'][0]['uuid'] # first delete extra file from s3 so that we can check it's newly created. boto3.client('s3').delete_object( Bucket='elasticbeanstalk-fourfront-webdev-wfoutput', Key='614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb') api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(bed_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['extra_files'] assert len(res['extra_files']) == 1 extra = res['extra_files'][0] assert extra[ 'upload_key'] == '614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb' # check the extra file is created in the right bucket. head = boto3.client('s3').head_object( Bucket='elasticbeanstalk-fourfront-webdev-wfoutput', Key='614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb') assert head assert extra['file_format']['display_title'] == 'beddb' #ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key) ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
def test_md5(): key = dev_key() # prep new File data = get_test_json('md5.json') content = bytes(str(uuid.uuid4()), 'utf-8') gzipped_content = gzip.compress(content) fq_uuid = post_new_fastqfile( key=key, upload_content=gzipped_content ) # upload random content to avoid md5 conflict # prep input json data['input_files'][0]['uuid'] = fq_uuid # run workflow api = API() res = api.run_workflow(data, sfn=DEV_SFN) assert 'jobid' in res assert 'exec_arn' in res['_tibanna'] time.sleep(420) # check step function status assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED' outjson = api.check_output(res['_tibanna']['exec_arn']) # check postrun json postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True)) assert 'status' in postrunjson['Job'] assert postrunjson['Job']['status'] == '0' # check metadata update res = ff_utils.get_metadata(fq_uuid, key=key, ff_env=DEV_ENV, check_queue=True) ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key) assert res['md5sum'] == hashlib.md5(gzipped_content).hexdigest() assert res['content_md5sum'] == hashlib.md5(content).hexdigest() assert res['file_size'] == len(gzipped_content) assert 'ff_meta' in outjson assert 'uuid' in outjson['ff_meta'] wfr_uuid = outjson['ff_meta']['uuid'] res = ff_utils.get_metadata(wfr_uuid, key=key, ff_env=DEV_ENV, check_queue=True) assert res['run_status'] == 'complete' assert 'quality_metric' in res
def _input_extra_updater(status, tibanna, accession, extra_file_format, md5=None, filesize=None, higlass_uid=None): try: original_file = ff_utils.get_metadata(accession, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object', check_queue=True) except Exception as e: raise Exception("Can't get metadata for input file %s" % e) if 'extra_files' not in original_file: raise Exception( "inconsistency - extra file metadata deleted during workflow run?") matching_exf_found = False for exf in original_file['extra_files']: if parse_formatstr(exf['file_format']) == extra_file_format: matching_exf_found = True exf['status'] = status if status == 'uploaded': if md5: exf['md5sum'] = md5 if filesize: exf['file_size'] = filesize if not matching_exf_found: raise Exception( "inconsistency - extra file metadata deleted during workflow run?") try: patch_file = {'extra_files': original_file['extra_files']} if higlass_uid: patch_file['higlass_uid'] = higlass_uid ff_utils.patch_metadata(patch_file, original_file['uuid'], key=tibanna.ff_keys) except Exception as e: raise Exception("patch_metadata failed in extra_updater." + str(e) + "original_file ={}\n".format(str(original_file)))
def patch_items_with_headers(connection, action, kwargs): """ Arguments are: - the connection (FS connection) - the action (from ActionResult) - kwargs (from the action function) Takes care of patching info on Fourfront and also populating fields on the action """ action_logs = {'patch_failure': [], 'patch_success': []} # get latest results from prepare_static_headers headers_check_result = action.get_associated_check_result(kwargs) # the dictionaries can be combined total_patches = headers_check_result['full_output']['to_add'] total_patches.update(headers_check_result['full_output']['to_remove']) for item, headers in total_patches.items(): # if all headers are deleted, use ff_utils.delete_field if headers == []: try: ff_utils.delete_field(item, 'static_headers', key=connection.ff_keys) except Exception as e: patch_error = '\n'.join([item, str(e)]) action_logs['patch_failure'].append(patch_error) else: action_logs['patch_success'].append(item) else: patch_data = {'static_headers': headers} try: ff_utils.patch_metadata(patch_data, obj_id=item, key=connection.ff_keys) except Exception as e: patch_error = '\n'.join([item, str(e)]) action_logs['patch_failure'].append(patch_error) else: action_logs['patch_success'].append(item) action.status = 'DONE' action.output = action_logs
def main(): # pragma: no cover args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) taggable = scu.get_types_that_can_have_field(auth, 'tags') if args.types2exclude is not None: # remove explicitly provide types not to tag taggable = [t for t in taggable if t not in args.types2exclude] seen = [ ] # only need to add tag once so this keeps track of what's been seen to_patch = {} # keep track of those to patch # main loop through the top level item ids for itemid in itemids: items2tag = {} if args.taglinked: # need to get linked items and tag them linked = scu.get_linked_items(auth, itemid, {}) items2tag = scu.filter_dict_by_value(linked, taggable, include=True) else: # only want to tag provided items itype = scu.get_item_type(auth, itemid) if itype in taggable: items2tag = {itemid: itype} for i, t in items2tag.items(): if i not in seen: seen.append(i) item = get_metadata(i, auth) if not scu.has_field_value(item, 'tags', args.tag): # not already tagged with this tag so make a patch and add 2 dict to_patch[i] = make_tag_patch(item, args.tag) # now do the patching or reporting for pid, patch in to_patch.items(): if args.dbupdate: pres = patch_metadata(patch, pid, auth) print(pres['status']) else: print("DRY RUN: patch ", pid, " with ", patch)
def do_patch(uid, type, patch, auth, dbupdate, cnts): if not dbupdate: print('DRY RUN - will update %s of type %s with %s' % (uid, type, patch)) cnts['not_patched'] += 1 return # import pdb; pdb.set_trace() res = patch_metadata(patch, uid, auth) # res = {'status': 'testing'} print('UPDATING - %s of type %s with %s' % (uid, type, patch)) rs = res['status'] print(rs) if rs == 'success': cnts['patched'] += 1 else: cnts['errors'] += 1 print(res) return
def add_notes_to_tsv(file_meta, auth): """ adds a notes to tsv with the canned value below to the processed file returns success, error or skip if the value already exists """ note_txt = "This file contains processed results performed outside of the 4DN-DCIC standardized pipelines. The file and the information about its provenance, i.e. which files were used as input to generate this output was provided by or done in collaboration with the lab that did the experiments to generate the raw data. For more information about the specific analysis performed, please contact the submitting lab or refer to the relevant publication if available." n2tsv = file_meta.get('notes_to_tsv', []) for note in n2tsv: if note_txt in note: return "SKIP" n2tsv.append(note_txt) patch = {'notes_to_tsv': n2tsv} try: pres = patch_metadata(patch, file_meta.get('uuid'), auth) except Exception as e: print(e) return "ERROR" if pres.get('status') == 'success': return "SUCCESS" return "ERROR"
def main(): # pragma: no cover args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print('#', auth.get('server')) id_list = scu.get_item_ids_from_args(args.input, auth, args.search) for itemid in id_list: # get the existing data in other p item_data = get_metadata(itemid, auth, add_on='frame=raw') pfiles = item_data.get('processed_files') if not pfiles: continue patch_data = item_data.get('other_processed_files', []) if patch_data: # does the same title exist if args.title in [i['title'] for i in patch_data]: print(itemid, 'already has preliminary results') continue patch_data.append({ 'title': args.title, 'type': 'preliminary', 'files': pfiles }) if patch_data: patch = {'other_processed_files': patch_data} if args.dbupdate: res = patch_metadata(patch, obj_id=itemid, key=auth, add_on='delete_fields=processed_files') print(res.get('status')) else: print("DRY RUN -- will patch") print(patch) print('and delete processed_files field value')
def main(ff_env='fourfront-cgapwolf', skip_software=False, skip_file_format=False, skip_workflow=False): """post / patch contents from portal_objects to the portal""" keycgap = ff_utils.get_authentication_with_server(ff_env=ff_env) # software if not skip_software: print("Processing software...") with open('portal_objects/software.json') as f: d = json.load(f) for dd in d: print(" processing uuid %s" % dd['uuid']) try: ff_utils.post_metadata(dd, 'Software', key=keycgap) except: ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap) # file formats if not skip_file_format: print("Processing file format...") with open('portal_objects/file_format.json') as f: d = json.load(f) for dd in d: print(" processing uuid %s" % dd['uuid']) try: ff_utils.post_metadata(dd, 'FileFormat', key=keycgap) except: ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap) # workflows if not skip_workflow: print("Processing workflow...") wf_dir = "portal_objects/workflows" files = os.listdir(wf_dir) for fn in files: if fn.endswith('.json'): print(" processing file %s" % fn) with open(os.path.join(wf_dir, fn), 'r') as f: d = json.load(f) try: ff_utils.post_metadata(d, 'Workflow', key=keycgap) except: ff_utils.patch_metadata(d, d['uuid'], key=keycgap)
def md5_updater(status, wf_file, ff_meta, tibanna): # get key ff_key = tibanna.ff_keys # get metadata about original input file accession = wf_file.runner.inputfile_accessions['input_file'] original_file = ff_utils.get_metadata(accession, key=ff_key, ff_env=tibanna.env, add_on='frame=object', check_queue=True) if status.lower() == 'uploaded': md5_array = wf_file.read().split('\n') if not md5_array: print("report has no content") return md5_updater("upload failed", wf_file, ff_meta, tibanna) if len(md5_array) == 1: md5 = None content_md5 = md5_array[0] elif len(md5_array) > 1: md5 = md5_array[0] content_md5 = md5_array[1] new_file = _md5_updater(original_file, md5, content_md5) if new_file and new_file != "Failed": try: ff_utils.patch_metadata(new_file, accession, key=ff_key) except Exception as e: # TODO specific excpetion # if patch fails try to patch worfklow status as failed new_file = {} new_file['status'] = 'upload failed' new_file['description'] = str(e) ff_utils.patch_metadata(new_file, original_file['uuid'], key=ff_key) elif new_file == "Failed": # we may not have to update the file, cause it already correct info # so we return Failed when we know upload failed md5_updater("upload failed", wf_file, ff_meta, tibanna) elif status == 'upload failed': new_file = {} new_file['status'] = 'upload failed' ff_utils.patch_metadata(new_file, original_file['uuid'], key=ff_key) # nothing to patch to ff_meta return None
def md5_updater(status, awsemfile, ff_meta, tibanna, **kwargs): # get key ff_key = tibanna.ff_keys # get metadata about original input file accession = awsemfile.runner.get_file_accessions('input_file')[0] format_if_extras = awsemfile.runner.get_format_if_extras('input_file') original_file = ff_utils.get_metadata(accession, key=ff_key, ff_env=tibanna.env, add_on='frame=object', check_queue=True) if status.lower() == 'uploaded': # md5 report file is uploaded md5, content_md5 = parse_md5_report(awsemfile.read()) # add file size to input file metadata input_file = awsemfile.runner.input_files()[0] file_size = boto3.client('s3').head_object(Bucket=input_file.bucket, Key=input_file.key).get( 'ContentLength', '') for format_if_extra in format_if_extras: printlog("format_if_extra : %s" % format_if_extra) new_file = _md5_updater(original_file, md5, content_md5, format_if_extra, file_size) if new_file: break printlog("new_file = %s" % str(new_file)) if new_file: try: resp = ff_utils.patch_metadata(new_file, accession, key=ff_key) printlog(resp) except Exception as e: # TODO specific excpetion # if patch fails try to patch worfklow status as failed raise e else: pass # nothing to patch to ff_meta return None
def main(): args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print("Working on {}".format(auth.get('server'))) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) field = args.field val = args.value if val == 'True': val = True elif val == 'False': val = False if args.isarray: val = [v for v in val.split("'") if v] ntype = args.numtype if ntype: if ntype == 'i': val = int(val) elif ntype == 'f': val = float(val) for iid in itemids: print("PATCHING", iid, "to", field, "=", val) if (args.dbupdate): # do the patch if val == '*delete*': res = delete_field(iid, field, auth) else: res = patch_metadata({args.field: val}, iid, auth) if res['status'] == 'success': print("SUCCESS!") else: print("FAILED TO PATCH", iid, "RESPONSE STATUS", res['status'], res['description'])
def md5_updater(status, awsemfile, ff_meta, tibanna): # get key ff_key = tibanna.ff_keys # get metadata about original input file accession = awsemfile.runner.inputfile_accessions['input_file'] format_if_extra = awsemfile.runner.inputfile_format_if_extra['input_file'] original_file = ff_utils.get_metadata(accession, key=ff_key, ff_env=tibanna.env, add_on='frame=object', check_queue=True) if status.lower() == 'uploaded': # md5 report file is uploaded md5_array = awsemfile.read().split('\n') if not md5_array: raise Exception("md5 report has no content") if len(md5_array) == 1: md5 = None content_md5 = md5_array[0] elif len(md5_array) > 1: md5 = md5_array[0] content_md5 = md5_array[1] new_file = _md5_updater(original_file, md5, content_md5, format_if_extra) print("new_file = %s" % str(new_file)) if new_file: try: resp = ff_utils.patch_metadata(new_file, accession, key=ff_key) print(resp) except Exception as e: # TODO specific excpetion # if patch fails try to patch worfklow status as failed raise e else: pass # nothing to patch to ff_meta return None
def upload_file_to_uuid(filename, uuid, auth): """ Upload file to a target environment. :param filename: the name of a file to upload. :param uuid: the item into which the filename is to be uploaded. :param auth: auth info in the form of a dictionary containing 'key', 'secret', and 'server'. """ # filename here should not include path patch_data = {'filename': os.path.basename(filename)} response = ff_utils.patch_metadata(patch_data, uuid, key=auth) try: [metadata] = response['@graph'] upload_credentials = metadata['upload_credentials'] except Exception: raise RuntimeError("Unable to obtain upload credentials for file %s." % filename) execute_prearranged_upload(filename, upload_credentials=upload_credentials, auth=auth)
def clean_for_reupload(file_acc, key, clean_release_dates=False, delete_runs=True): """Rare cases we want to reupload the file, and this needs some cleanupself. If you want to delete release dates too, set 'clean_release_dates' to True""" resp = ff_utils.get_metadata(file_acc, key=key) clean_fields = ['extra_files', 'md5sum', 'content_md5sum', 'file_size', 'filename', 'quality_metric'] if clean_release_dates: clean_fields.extend(['public_release', 'project_release']) if delete_runs: runs = resp.get('workflow_run_inputs', []) if runs: for a_run in runs: ff_utils.patch_metadata({'status': 'deleted'}, obj_id=a_run['uuid'], key=key) if resp.get('quality_metric'): ff_utils.patch_metadata({'status': 'deleted'}, obj_id=resp['quality_metric']['uuid'], key=key) del_f = [] for field in clean_fields: if field in resp: del_f.append(field) del_add_on = 'delete_fields=' + ','.join(del_f) ff_utils.patch_metadata({'status': 'uploading'}, obj_id=resp['uuid'], key=key, add_on=del_add_on)
def main(): """ Load a given JSON file with ontology terms inserts to a server using the `load_data` endpoint defined in loadxl. """ logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('encoded').setLevel(logging.INFO) parser = argparse.ArgumentParser( description="Load Ontology Term Data", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('json_file', help="File containing terms to load") parser.add_argument('--env', default='local', help='FF environment to update from. Defaults to local') parser.add_argument('--local-key', help='Access key ID if using local') parser.add_argument('--local-secret', help='Access key secret if using local') args = parser.parse_args() # authentication with Fourfront if args.env == 'local': # prompt access key ID and secret from user config_uri = 'development.ini' local_id = args.local_key if args.local_key else input('[local access key ID] ') local_secret = args.local_secret if args.local_secret else input('[local access key secret] ') auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'} else: config_uri = 'production.ini' auth = ff_utils.get_authentication_with_server(None, args.env) load_endpoint = '/'.join([auth['server'], 'load_data']) logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint) json_data = {'config_uri': config_uri, 'itype': 'ontology_term', 'overwrite': True, 'iter_response': True} with open(args.json_file) as infile: json_data['store'] = {'ontology_term': json.load(infile)} num_to_load = len(json_data['store']['ontology_term']) logger.info('Will attempt to load %s ontology terms to %s' % (num_to_load, auth['server'])) start = datetime.now() try: # sustained by returning Response.app_iter from loadxl.load_data res = ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST', timeout=None, json=json_data) except Exception as exc: logger.error('Error on POST: %s' % str(exc)) else: # process the individual item responses from the generator. # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>" load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []} for val in res.text.split('\n'): if val.startswith('POST') or val.startswith('SKIP'): prefix_len = 4 # 'POST' or 'SKIP' else: prefix_len = 5 # 'PATCH' or 'ERROR' # this is a bit weird, but we want to split out the POST/PATCH... # and also remove ': ' from the value for each message cat, msg = val[:prefix_len], val[prefix_len + 2:] if not msg: continue if cat in load_res: load_res[cat].append(msg) logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s" % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP']))) if load_res['ERROR']: logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR']) if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']): logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s" % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH']))) logger.info("Finished request in %s" % str(datetime.now() - start)) # update sysinfo. Don't worry about doing this on local if args.env != 'local': data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()} try: found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth) except Exception: found_info = None if found_info: ff_utils.patch_metadata(data, found_info['uuid'], key=auth) else: ff_utils.post_metadata(data, 'sysinfos', key=auth) logger.info("Updated sysinfo with name %s" % data['name']) logger.info("DONE!")
def patch(self, key, type_name=None): return patch_metadata(self.as_dict(), key=key)
def add_processed_files(item_id, list_pc, auth): # patch the exp or set patch_data = {'processed_files': list_pc} ff_utils.patch_metadata(patch_data, obj_id=item_id, key=auth) return