Exemplo n.º 1
0
def get_metadata(keypairs_file, schema_name=None, schema_class_name=None, uuid=None):

    assert os.path.isfile(str(keypairs_file))

    try:
        key = fdnDCIC.FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e

    try:
        connection = fdnDCIC.FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e

    try:
        if schema_name is not None:
            response = fdnDCIC.get_FDN(schema_name, connection)
            return(response)
        if schema_class_name is not None:
            response = fdnDCIC.get_FDN("search/?type=" + schema_class_name, connection)
            return(response)
        if uuid is not None:
            response = fdnDCIC.get_FDN(uuid, connection)
            return(response)

    except Exception as e:
        print(e)
        print("get error")
        raise e
Exemplo n.º 2
0
def get_species_from_expr(expr, connection):
    """get species for a given experiment"""
    if isinstance(expr, dict):
        sep_resp = expr
    else:
        sep_resp = fdnDCIC.get_FDN(expr, connection)
    sep_resp2 = fdnDCIC.get_FDN(sep_resp["biosample"], connection)["biosource"]
    indv = fdnDCIC.get_FDN(sep_resp2[0], connection)["individual"]
    return(str(fdnDCIC.get_FDN(indv, connection)['organism']))
Exemplo n.º 3
0
def get_metadata(obj_id, key='', connection=None, frame="object"):
    connection = fdn_connection(key, connection)
    res = fdnDCIC.get_FDN(obj_id, connection, frame=frame)
    retry = 1
    sleep = [2, 4, 12]
    while 'error' in res.get('@type', []) and retry < 3:
        time.sleep(sleep[retry])
        retry += 1
        res = fdnDCIC.get_FDN(obj_id, connection, frame=frame)

    return res
Exemplo n.º 4
0
Arquivo: app.py Projeto: j1z0/tibanna
def patch_to_metadata(keypairs_file,
                      patch_item,
                      schema_class_name=None,
                      accession=None,
                      uuid=None):

    assert os.path.isfile(keypairs_file)

    try:
        key = fdnDCIC.FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e

    try:
        connection = fdnDCIC.FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e

    try:
        if (schema_class_name is not None):
            resp = fdnDCIC.get_FDN("/search/?type=" + schema_class_name,
                                   connection)
            items_uuids = [i['uuid'] for i in resp['@graph']]
        elif (accession is not None):
            resp = fdnDCIC.get_FDN("/" + accession, connection)
            item_uuid = resp.get('uuid')
            items_uuids = [item_uuid]
        elif (uuid is not None):
            items_uuids = [uuid]
        else:
            items_uuids = []

    except Exception as e:
        print(e)
        print("get error")
        raise e

    try:
        for item_uuid in items_uuids:
            response = fdnDCIC.patch_FDN(item_uuid, connection, patch_item)
            return (response)

    except Exception as e:
        print(e)
        print("get error")
        raise e
Exemplo n.º 5
0
def delete_field(post_json, del_field, connection=None):
    """Does a put to delete the given field."""
    my_uuid = post_json.get("uuid")
    my_accession = post_json.get("accesion")
    raw_json = fdnDCIC.get_FDN(my_uuid, connection, frame="raw")
    # check if the uuid is in the raw_json
    if not raw_json.get("uuid"):
        raw_json["uuid"] = my_uuid
    # if there is an accession, add it to raw so it does not created again
    if my_accession:
        if not raw_json.get("accession"):
            raw_json["accession"] = my_accession
    # remove field from the raw_json
    if raw_json.get(del_field):
        del raw_json[del_field]
    # Do the put with raw_json
    try:
        response = fdnDCIC.put_FDN(my_uuid, connection, raw_json)
        if response.get('status') == 'error':
            raise Exception("error %s \n unable to delete field: %s \n of  item: %s" %
                            (response, del_field, my_uuid))
    except Exception as e:
        raise Exception("error %s \n unable to delete field: %s \n of  item: %s" %
                        (e, del_field, my_uuid))
    return response
Exemplo n.º 6
0
def delete_wfr_many(wf_uuid, keypairs_file, run_status_filter=['error'], input_source_experiment_filter=None,
                    delete=True):
    """delete the wfr metadata for all wfr with a specific wf
    if run_status_filter is set, only those with the specific run_status is deleted
    run_status_filter : list of run_statuses e.g. ['started', 'error']
    if run_status_filter is None, it deletes everything
    if input_source_experiment_filter is set (an array, e.g. ['some_uuid', 'some_other_uuid', ...]),
    only wfr whose input source experiment is one of these specified are deleted.
    """
    connection = get_connection(keypairs_file)
    wfrsearch_resp = fdnDCIC.get_FDN('search/?workflow.uuid=' + wf_uuid + '&type=WorkflowRun', connection)
    for entry in wfrsearch_resp['@graph']:
        # skip entries that are already deleted
        if entry['status'] == 'deleted':
            continue
        # run_status filter
        if run_status_filter:
            if 'run_status' not in entry or entry['run_status'] not in run_status_filter:
                continue
        # input_source_experiment_filter
        if input_source_experiment_filter:
            sexp = get_wfr_input_source_experiment(entry, connection)
            if not set(sexp).intersection(input_source_experiment_filter):
                continue
        print('\n\ntobedeleted: ' + entry['uuid'] + ':' + str(entry))
        if delete:
            delete_wfr(entry, connection)
Exemplo n.º 7
0
def testrun_md5(keypairs_file, workflow_name='tibanna_pony', env='webdev'):
    """Creates a random file object with no md5sum/content_md5sum and run md5 workflow.
    It waits for 6 mintues till the workflow run finishes and checks the input file object
    has been updated.
    """
    bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput"
    newfile = post_random_file(bucket, keypairs_file)
    uuid = newfile['uuid']
    accession = newfile['accession']
    input_json = {
        "config": {
            "ebs_type": "io1",
            "ebs_iops": 500,
            "s3_access_arn":
            "arn:aws:iam::643366669028:instance-profile/S3_access",
            "ami_id": "ami-cfb14bb5",
            "json_bucket": "4dn-aws-pipeline-run-json",
            "shutdown_min": 30,
            "copy_to_s3": True,
            "launch_instance": True,
            "log_bucket": "tibanna-output",
            "script_url":
            "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/",
            "key_name": "4dn-encode",
            "password": ""
        },
        "_tibanna": {
            "env": "fourfront-webdev",
            "run_type": "md5"
        },
        "parameters": {},
        "app_name":
        "md5",
        "workflow_uuid":
        "c77a117b-9a58-477e-aaa5-291a109a99f6",
        "input_files": [{
            "workflow_argument_name": "input_file",
            "bucket_name": bucket,
            "uuid": uuid,
            "object_key": accession + '.pairs.gz'
        }],
        "output_bucket":
        bucket
    }
    resp = run_workflow(input_json, workflow=workflow_name)
    print(resp)

    # check result
    key = fdnDCIC.FDN_Key(keypairs_file, "default")
    connection = fdnDCIC.FDN_Connection(key)
    time.sleep(6 * 60)  # wait for 6 minutes
    filemeta = fdnDCIC.get_FDN(uuid, connection)
    content_md5sum = filemeta.get('content_md5sum')
    md5sum = filemeta.get('md5sum')
    if content_md5sum and md5sum:
        print(content_md5sum)
        print(md5sum)
    else:
        raise Exception('md5 step function run failed')
Exemplo n.º 8
0
def get_datatype_for_expr(expr, connection):
    """get experiment type (e.g. 'in situ Hi-C') given an experiment id (or uuid)"""
    if isinstance(expr, dict):
        exp_resp = expr
    else:
        exp_resp = fdnDCIC.get_FDN(expr, connection)
    datatype = exp_resp['experiment_type']
    return(datatype)
Exemplo n.º 9
0
def patch_to_metadata(keypairs_file, patch_item, schema_class_name=None, accession=None, uuid=None):

    assert os.path.isfile(keypairs_file)

    try:
        key = fdnDCIC.FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e

    try:
        connection = fdnDCIC.FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e

    try:
        if(schema_class_name is not None):
            resp = fdnDCIC.get_FDN("/search/?type=" + schema_class_name, connection)
            items_uuids = [i['uuid'] for i in resp['@graph']]
        elif(accession is not None):
            resp = fdnDCIC.get_FDN("/" + accession, connection)
            item_uuid = resp.get('uuid')
            items_uuids = [item_uuid]
        elif(uuid is not None):
            items_uuids = [uuid]
        else:
            items_uuids = []

    except Exception as e:
        print(e)
        print("get error")
        raise e

    try:
        for item_uuid in items_uuids:
            response = fdnDCIC.patch_FDN(item_uuid, connection, patch_item)
            return(response)

    except Exception as e:
        print(e)
        print("get error")
        raise e
Exemplo n.º 10
0
def prep_input_file_entry_list_for_single_exp(input_argname, prev_workflow_uuid, prev_output_argument_name, connection,
                                              addon=None, wfuuid=None, datatype_filter=None, single=True):
    schema_name = 'search/?type=WorkflowRunAwsem&workflow.uuid=' + prev_workflow_uuid + '&run_status=complete'
    schema_name = schema_name + '&datastore=database'
    response = fdnDCIC.get_FDN(schema_name, connection)
    files_for_ep = map_exp_to_inputfile_entry(response, input_argname, prev_output_argument_name, connection,
                                              addon=addon, wfuuid=wfuuid, datatype_filter=datatype_filter,
                                              single=single)
    return(files_for_ep)
Exemplo n.º 11
0
def release_all_wfr(keypairs_file,
                    searchterm='?run_status=complete&type=WorkflowRunAwsem&status=in+review+by+lab',
                    releaseterm='released to project'):
    connection = get_connection(keypairs_file)
    wfrsearch_resp = fdnDCIC.get_FDN(searchterm, connection)
    for entry in wfrsearch_resp['@graph']:
        patch_json = {'uuid': entry['uuid'], 'status': releaseterm}
        patch_resp = fdnDCIC.patch_FDN(entry['uuid'], connection, patch_json)
        print(patch_resp)
Exemplo n.º 12
0
def get_wfr_input_source_experiment(wfr_dict, connection):
    "returns all the input source experiments in a nonredundant list"
    if 'input_files' not in wfr_dict:
        return(None)
    sexp = []
    for if_id in [_['value'] for _ in wfr_dict['input_files']]:
        if_dict = fdnDCIC.get_FDN(if_id, connection)
        if 'source_experiments' in if_dict:
            sexp.extend(if_dict['source_experiments'])
    return(list(set(sexp)))
Exemplo n.º 13
0
def get_digestion_enzyme_for_expr(expr, connection):
    """get species for a given experiment
    Returns enzyme name (e.g. HindIII)
    """
    if isinstance(expr, dict):
        exp_resp = expr
    else:
        exp_resp = fdnDCIC.get_FDN(expr, connection)
    if 'digestion_enzyme' not in exp_resp:
        return(None)
    re = exp_resp['digestion_enzyme'].replace('/enzymes/', '').replace('/', '')
    return(re)
Exemplo n.º 14
0
def create_inputfile_entry(fileId, input_argname, connection, addon=None, wfr_input_filter=None,
                           datatype_filter=None):
    """create an input file entry (uuid, accession, object_key)
    addon : list of following strings (currently only 're' is available to add restriction enzyme info)
    wfr_input_filter : workflow_uuid, return None if specified and has a completed or
    started run of the specified workflow
    assumes file is a processed file (has source_experiments field)
    assumes single source_experiments
    """
    file_dict = fdnDCIC.get_FDN(fileId + '?datastore=database', connection)
    if 'uuid' not in file_dict:
        raise Exception("key error uuid: " + str(file_dict))
    file_uuid = file_dict['uuid']
    entry = {'uuid': file_uuid, 'accession': file_dict['accession'],
             'object_key': file_dict['upload_key'].replace(file_uuid + '/', ''),
             'workflow_argument_name': input_argname}

    # add source experiment if exists
    if 'source_experiments' in file_dict:
        if file_dict['source_experiments']:
            sep = file_dict['source_experiments'][0]
            sep_dict = fdnDCIC.get_FDN(sep, connection)
            sep_id = sep_dict['@id']
            entry['source_experiments'] = [sep_id]
            if datatype_filter:
                # would be faster if it takes sep_dict. Leave it for now
                datatype = get_datatype_for_expr(sep_dict, connection)
                if datatype not in datatype_filter:
                    return(None)
            if addon:
                if 're' in addon:
                    entry['RE'] = get_digestion_enzyme_for_expr(sep_dict, connection)
    if wfr_input_filter:
        wfr_info = get_info_on_workflowrun_as_input(file_dict, connection)
        if wfr_input_filter in wfr_info:
            if 'complete' in wfr_info[wfr_input_filter]:
                return(None)
            # if 'started' in wfr_info[wfr_input_filter]:
            #    return(None)
    return(entry)
Exemplo n.º 15
0
def delete_wfr(wfr_dict, connection):
    # delete all the output files first
    if 'output_files' in wfr_dict:
        outputfile_ids = [_['value'] for _ in wfr_dict['output_files']]
        for of_id in outputfile_ids:
            of_uuid = fdnDCIC.get_FDN(of_id, connection)['uuid']
            output_patch_json = {'uuid': of_uuid, 'status': 'deleted'}
            patch_resp = fdnDCIC.patch_FDN(of_uuid, connection, output_patch_json)
            print(patch_resp)

    # then delete the wfr itself
    patch_json = {'uuid': wfr_dict['uuid'], 'status': 'deleted'}
    patch_resp = fdnDCIC.patch_FDN(wfr_dict['uuid'], connection, patch_json)
    print(patch_resp)
Exemplo n.º 16
0
Arquivo: app.py Projeto: j1z0/tibanna
def get_metadata(keypairs_file,
                 schema_name=None,
                 schema_class_name=None,
                 uuid=None):

    assert os.path.isfile(str(keypairs_file))

    try:
        key = fdnDCIC.FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e

    try:
        connection = fdnDCIC.FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e

    try:
        if schema_name is not None:
            response = fdnDCIC.get_FDN(schema_name, connection)
            return (response)
        if schema_class_name is not None:
            response = fdnDCIC.get_FDN("search/?type=" + schema_class_name,
                                       connection)
            return (response)
        if uuid is not None:
            response = fdnDCIC.get_FDN(uuid, connection)
            return (response)

    except Exception as e:
        print(e)
        print("get error")
        raise e
Exemplo n.º 17
0
def get_info_on_workflowrun_as_input(file_dict, connection):
    """given a json for file, returns a dictionary with workflow uuids as keys.
    dictionary structure : dict{wf_uuid}{run_status} = [wfr_id1, wfr_id2, ... ]
    These workflow uuids are the the ones in the workflow runs
    that has the given file as input
    """
    wfr_info = dict()
    if 'workflow_run_inputs' in file_dict:
        wfr_list = file_dict.get("workflow_run_inputs")
        if wfr_list:
            for wfr in wfr_list:
                wfr_dict = fdnDCIC.get_FDN(wfr, connection)
                wf = wfr_dict['workflow'].replace('/workflows/', '').replace('/', '')
                run_status = wfr_dict['run_status']
                if wf not in wfr_info:
                    wfr_info[wf] = dict()
                if run_status not in wfr_info[wf]:
                    wfr_info[wf][run_status] = []
                wfr_info[wf][run_status].append(wfr)
    return(wfr_info)
Exemplo n.º 18
0
def get_expset_from_exp(expr, connection):
    """getting the experiment sets of an experiment
    """
    sep_dict = fdnDCIC.get_FDN(expr, connection)
    seps = sep_dict['experiment_sets']
    return(seps)
Exemplo n.º 19
0
def get_metadata(obj_id, key='', connection=None):
    connection = fdn_connection(key, connection)
    return fdnDCIC.get_FDN(obj_id, connection)
Exemplo n.º 20
0
def get_nrawfiles_from_exp(expr, connection):
    """getting the number of raw files of an experiment
    """
    sep_dict = fdnDCIC.get_FDN(expr, connection)
    nfiles = len(sep_dict['files'])
    return(nfiles)
Exemplo n.º 21
0
def get_allexp_from_expset(expset, connection):
    """getting all the experiments from an experiment set
    """
    seps_dict = fdnDCIC.get_FDN(expset, connection)
    return(seps_dict['experiments_in_set'])