示例#1
0
excel_file_name = 'my_metabolites.xlsx'
input_excel = pd.read_excel(excel_file_name)
metabolite_column = 1
#metabolites names or aliases must be in the first column in Excel file.
input_metabolite_names = []
[
    input_metabolite_names.append(x)
    for x in input_excel[input_excel.columns[metabolite_column]]
    if x not in input_metabolite_names
]  # making alias list unique
#input_metabolite_names = input_metabolite_names[0:3]

# ps_api retreives data from the database and loads it into APISession.Graph derived from Networkx:MultiDiGraph
api_config = 'path2apiconfig.json'
ps_api = open_api_session(
    api_config
)  # specify here path to your APIconfig file. Defaults to ./ElsevierAPI/APIconfig.json
ps_api.add_ent_props(
    ['Alias']
)  # need to retreive aliases from the database in case input metabolites are found by Alias
ps_api.PageSize = 10000

# dump file contains references for all relations retreived from database
# do not use dump file unless you need to include references into report:
ps_api.DumpFiles.clear()

# retreive all ChemicalReaction linked to metabolites in excel_file_name as ResnetGraph from the database:
step = 1000
for i in range(0, len(input_metabolite_names), step):
    name_list = input_metabolite_names[i:i + step]
    my_goql_query = OQL.expand_entity(name_list, ['Name', 'Alias'],
示例#2
0
from ElsevierAPI import open_api_session

ps_api = open_api_session()
import csv
OMIMpairsFile = "OMIM/OMIMDisease-Gene_Demo.txt"
tsv_file = open(OMIMpairsFile)
OMIMpairs = csv.reader(tsv_file, delimiter="\t")

OmimProp = "OMIM relation"
ENTITY_PROPS = ['Name']
REL_PROPS = ['Name', 'Mechanism', 'RelationNumberOfReferences']

for pair in OMIMpairs:
    diseaseAliases = [pair[0]]
    nodash = pair[0].replace('-', ' ')
    diseaseAliases.append(nodash)
    if pair[0][len(pair[0]) - 5:] in [" type", " form"]:
        diseaseAliases.append(pair[0][:len(pair[0]) - 5])
    noS = pair[0].replace('\'s', '')
    diseaseAliases.append(noS)
    diseaseAliases = list(set(diseaseAliases))
    gene = pair[1]
    FoundRelations = ps_api.connect_entities(
        [gene], ["Name"], ['Protein', 'Complex', 'FunctionalClass'],
        diseaseAliases, ["Name", "Alias"], ['Disease'],
        REL_PROPS=REL_PROPS)
    if type(FoundRelations) == type(None):
        FoundRelation = ps_api.connect_entities(
            [gene], ["Alias"], ['Protein', 'Complex', 'FunctionalClass'],
            diseaseAliases, ["Name", "Alias"], ['Disease'],
            REL_PROPS=REL_PROPS)
示例#3
0
from ElsevierAPI import open_api_session
from ElsevierAPI.ETM_API.references import PS_ID_TYPES,PS_BIBLIO_PROPS,SENTENCE_PROPS,CLINTRIAL_PROPS,RELATION_PROPS

# ps_api retreives data from the database and loads it into APISession.Graph derived from Networkx:MultiDiGraph 
ps_api = open_api_session(api_config_file=None)#specify here path to your APIconfig file. 
#If api_config_file not specified the default APIConfig from __init__.py will be used

all_relation_properties = list(PS_ID_TYPES)+list(PS_BIBLIO_PROPS)+list(SENTENCE_PROPS)+list(CLINTRIAL_PROPS)+list(RELATION_PROPS)

ps_api.add_rel_props(['Name','Effect','Mechanism','ChangeType','BiomarkerType','QuantitativeType','Sentence','Title','PMID','DOI'])
#add_rel_props specifies what attributes to retreive for relations from the database. The list order defines the column order in the dump file
ps_api.add_ent_props(['Name','Description','URN'])
#add_ent_props specifies what attributes to retreive for nodes (entities) from the database.The list order defines the column order in the dump file
ps_api.clear_graph_cache = False #set it to True for large downloads
#by default ps_api.clear_graph_cache = False to keep all downloaded data in ps_api.Graph

pcnt = '%'
#my_goql_query = 'SELECT Relation WHERE objectType=StateChange AND CellType LIKE \'' + pcnt + 'hepatocyte' + pcnt + '\''
my_goql_query = 'SELECT Relation WHERE NeighborOf (SELECT Entity WHERE Alias = NSCLC) AND NeighborOf (SELECT Entity WHERE objectType = GeneticVariant)'
request_name = 'Find relations reported in hepatocytes'

#dafault print_rel21row = False to print 1 row per reference in every relation
ps_api.print_rel21row = False #if True ResnetAPIsessionDump.tsv will have only 1 row per each relation
# with reference properties concatenated into 1 string per property

if __name__ == "__main__":
    ps_api.start_download_from(0) #if download was interrupted change this paramater to resume download from certain position
#position must be specified as the number of relations (or entities) downloaded previously

    my_graph = ps_api.process_oql(my_goql_query,request_name, debug=False, flush_dump=True)
# process_oql retreives data by iterations. Iteration size is controled by ps_api.PageSize