Exemplo n.º 1
0
def get_documents_by_method(solr_api_url, auth_object, q="*:*", method="BET"):
    docs_query = client_solr.StudyDocuments()
    docs_query.settings['endpointfilter'] = None
    materialfilter = None
    docs_query.settings['query_guidance'] = None
    docs_query.settings['query_organism'] = None
    docs_query.setStudyFilter({
        "topcategory_s": "*",
        "endpointcategory_s": "*",
        "E.method_s": "({})".format(method)
    })
    docs_query.settings['fields'] = "*"
    query = docs_query.getQuery(textfilter=q,
                                facets=None,
                                fq=None,
                                rows=10000,
                                _params=True,
                                _conditions=True,
                                _composition=True)
    #print(query)
    r = client_solr.post(solr_api_url, query=query, auth=auth_object)

    results = None
    if r.status_code == 200:

        try:
            rows = docs_query.parse(r.json()['response']['docs'], process=None)
            results = pd.DataFrame(rows)
        except:
            print(r.text)
    else:
        logger.info(r.status_code)
    return results
Exemplo n.º 2
0
def get_docs(query):
    docs_query = client_solr.StudyDocuments()
    docs_query.settings['endpointfilter'] = None
    materialfilter = None
    docs_query.settings['query_guidance'] = None
    docs_query.settings['query_organism'] = None
    #docs_query.setStudyFilter({"topcategory_s" : "*", "endpointcategory_s" : row["endpoint"], "E.method_s" : "({})".format(row["method"])})
    #docs_query.setStudyFilter("*")
    docs_query.settings['fields'] = "*"
    _query = docs_query.getQuery(textfilter=query,
                                 facets=None,
                                 fq=None,
                                 rows=10000,
                                 _params=True,
                                 _conditions=True,
                                 _composition=False)
    print(_query)
    r = client_solr.post(solr_api_url, query=_query, auth=auth_object)
    results = None
    if r.status_code == 200:

        rows = docs_query.parse(r.json()['response']['docs'], process=None)
        results = pd.DataFrame(rows)
        return results

    else:
        print(r.status_code)
    return None
Exemplo n.º 3
0
def dose_response_overview(query):
    _query = "*:*" if query=="" else query
    facets = client_solr.Facets()
    facets.set_annotation_folder(annotation_folder)
    _tag_method = "E.method_s"
    _tag_endpoint= "effectendpoint_s"
    _tag_endpoint_type= "effectendpoint_type_s"
    _tag_conc_unit = '_CONDITION_concentration_UNIT_s'
    _tag_conc = '_CONDITION_concentration_d'
    _tag_time_unit = '_CONDITION_exposure_time_UNIT_s'
    _tag_time = '_CONDITION_exposure_time_d'
    df = facets.summary(solr_api_url,auth_object, query=_query,fields=[
            "topcategory_s","endpointcategory_s",_tag_method,_tag_endpoint,"effectendpoint_type_s",
            _tag_conc,_tag_conc_unit,
            _tag_time,_tag_time_unit
            ],log_query=log_query,log_result=beautify)    

    df.head()

    columns = ['id', 'name_s', 'publicname_s', 'owner_name_s', 'substanceType_s',
            's_uuid_s', 'type_s', 'document_uuid_s', 'investigation_uuid_s',
            'assay_uuid_s', 'topcategory_s', 'endpointcategory_s', 'guidance_s',
            'endpoint_s', 'effectendpoint_s', 'effectendpoint_type_s',
            'reference_owner_s', 'reference_year_s', 'reference_s', 'loValue_d',
            'unit_s','E.method_s', 'E.cell_type_s',
            _tag_conc_unit, _tag_conc,
            _tag_time_unit, _tag_time,
            '_CONDITION_replicate_s', '_CONDITION_material_s', '_version_', 'err_d',
            'errQualifier_s', 'guidance_synonym_ss', 'effectendpoint_synonym_ss',
                'E.method_synonym_ss']
            
    for index,row in df.iterrows():
        method = row[_tag_method]
        endpoint= row[_tag_endpoint]

        query = "{}:{} {}:{} {}:[* TO *] ".format(_tag_method,method,_tag_endpoint,endpoint,_tag_conc)
        _query = {'q': query, 'fq': 'type_s:study', 'wt': 'json',  'rows': 10000}
        r = client_solr.post(solr_api_url,query=_query,auth=auth_object)
        
        tmp = pd.DataFrame(r.json()['response']['docs'])
        tmp["time"] = ""
        if not _tag_conc in tmp.columns:
            continue
        time = ""

        
        if _tag_time in tmp.columns:
            tmp["time"] = tmp[_tag_time]
        if _tag_time_unit in tmp.columns:
            tmp["time"] = tmp["time"] + " " + row[_tag_time_unit]      

        r.close()
        cells = tmp["E.cell_type_s"].unique()
        for cell in cells:
            tmp_cell = tmp.loc[tmp["E.cell_type_s"]==cell]
            units = tmp_cell["unit_s"].unique()
            for unit in units:
                tmp_endpoint_unit = tmp.loc[tmp["unit_s"]==unit]
                conc_units = tmp_endpoint_unit[_tag_conc_unit].unique()
                for cu in conc_units:
                    tmp_endpoint_unit_cu = tmp_endpoint_unit.loc[tmp_endpoint_unit[_tag_conc_unit]==cu]
                    if not tmp_endpoint_unit_cu.empty:
                        print(method,endpoint,cell,unit,cu)
            #print(tmp.columns)
                        #display(tmp_endpoint_unit_cu[["publicname_s",_tag_conc,_tag_conc_unit,"time","reference_owner_s"]].head())
                        imgfile = os.path.join(folder_output,"data","{}_{}_{}_{}_{}.png").format(method,endpoint,unit.replace("/","_"),cell,cu.replace("/","_"))
                        plot_dose_response(tmp_endpoint_unit_cu,method, endpoint,unit, cu,facet_row="time",facet_col="publicname_s",color="reference_owner_s",imgfile=imgfile)                
Exemplo n.º 4
0
facets = client_solr.Facets()
facets.set_annotation_folder(annotation_folder)

# FAIR - Findable

## Data model

import pandas as pd

import pandas as pd

docs_query = "{}  +publicname_s:({})".format(query, "NRCWE-006")
print(docs_query)

_query = {'q': docs_query, 'fq': 'type_s:study', 'wt': 'json', 'rows': 10000}
r = client_solr.post(solr_api_url, query=_query, auth=auth_object)
tmp = pd.DataFrame(r.json()['response']['docs'])
print(tmp.shape)
print(tmp.columns)
tmp.head()

print(tmp["owner_name_s"].unique())
doc_uuids = tmp["document_uuid_s"].unique()
print(tmp["E.method_s"].unique())
#for doc_uuid in doc_uuids:
#    doc = tmp.loc[tmp["document_uuid_s"]==doc_uuid]
cols = [
    "publicname_s", "document_uuid_s", "effectendpoint_s", "loValue_d",
    "E.method_s", "unit_s"
]
fig = px.treemap(