excel_file_name = 'my_metabolites.xlsx' input_excel = pd.read_excel(excel_file_name) metabolite_column = 1 #metabolites names or aliases must be in the first column in Excel file. input_metabolite_names = [] [ input_metabolite_names.append(x) for x in input_excel[input_excel.columns[metabolite_column]] if x not in input_metabolite_names ] # making alias list unique #input_metabolite_names = input_metabolite_names[0:3] # ps_api retreives data from the database and loads it into APISession.Graph derived from Networkx:MultiDiGraph api_config = 'path2apiconfig.json' ps_api = open_api_session( api_config ) # specify here path to your APIconfig file. Defaults to ./ElsevierAPI/APIconfig.json ps_api.add_ent_props( ['Alias'] ) # need to retreive aliases from the database in case input metabolites are found by Alias ps_api.PageSize = 10000 # dump file contains references for all relations retreived from database # do not use dump file unless you need to include references into report: ps_api.DumpFiles.clear() # retreive all ChemicalReaction linked to metabolites in excel_file_name as ResnetGraph from the database: step = 1000 for i in range(0, len(input_metabolite_names), step): name_list = input_metabolite_names[i:i + step] my_goql_query = OQL.expand_entity(name_list, ['Name', 'Alias'],
from ElsevierAPI import open_api_session ps_api = open_api_session() import csv OMIMpairsFile = "OMIM/OMIMDisease-Gene_Demo.txt" tsv_file = open(OMIMpairsFile) OMIMpairs = csv.reader(tsv_file, delimiter="\t") OmimProp = "OMIM relation" ENTITY_PROPS = ['Name'] REL_PROPS = ['Name', 'Mechanism', 'RelationNumberOfReferences'] for pair in OMIMpairs: diseaseAliases = [pair[0]] nodash = pair[0].replace('-', ' ') diseaseAliases.append(nodash) if pair[0][len(pair[0]) - 5:] in [" type", " form"]: diseaseAliases.append(pair[0][:len(pair[0]) - 5]) noS = pair[0].replace('\'s', '') diseaseAliases.append(noS) diseaseAliases = list(set(diseaseAliases)) gene = pair[1] FoundRelations = ps_api.connect_entities( [gene], ["Name"], ['Protein', 'Complex', 'FunctionalClass'], diseaseAliases, ["Name", "Alias"], ['Disease'], REL_PROPS=REL_PROPS) if type(FoundRelations) == type(None): FoundRelation = ps_api.connect_entities( [gene], ["Alias"], ['Protein', 'Complex', 'FunctionalClass'], diseaseAliases, ["Name", "Alias"], ['Disease'], REL_PROPS=REL_PROPS)
from ElsevierAPI import open_api_session from ElsevierAPI.ETM_API.references import PS_ID_TYPES,PS_BIBLIO_PROPS,SENTENCE_PROPS,CLINTRIAL_PROPS,RELATION_PROPS # ps_api retreives data from the database and loads it into APISession.Graph derived from Networkx:MultiDiGraph ps_api = open_api_session(api_config_file=None)#specify here path to your APIconfig file. #If api_config_file not specified the default APIConfig from __init__.py will be used all_relation_properties = list(PS_ID_TYPES)+list(PS_BIBLIO_PROPS)+list(SENTENCE_PROPS)+list(CLINTRIAL_PROPS)+list(RELATION_PROPS) ps_api.add_rel_props(['Name','Effect','Mechanism','ChangeType','BiomarkerType','QuantitativeType','Sentence','Title','PMID','DOI']) #add_rel_props specifies what attributes to retreive for relations from the database. The list order defines the column order in the dump file ps_api.add_ent_props(['Name','Description','URN']) #add_ent_props specifies what attributes to retreive for nodes (entities) from the database.The list order defines the column order in the dump file ps_api.clear_graph_cache = False #set it to True for large downloads #by default ps_api.clear_graph_cache = False to keep all downloaded data in ps_api.Graph pcnt = '%' #my_goql_query = 'SELECT Relation WHERE objectType=StateChange AND CellType LIKE \'' + pcnt + 'hepatocyte' + pcnt + '\'' my_goql_query = 'SELECT Relation WHERE NeighborOf (SELECT Entity WHERE Alias = NSCLC) AND NeighborOf (SELECT Entity WHERE objectType = GeneticVariant)' request_name = 'Find relations reported in hepatocytes' #dafault print_rel21row = False to print 1 row per reference in every relation ps_api.print_rel21row = False #if True ResnetAPIsessionDump.tsv will have only 1 row per each relation # with reference properties concatenated into 1 string per property if __name__ == "__main__": ps_api.start_download_from(0) #if download was interrupted change this paramater to resume download from certain position #position must be specified as the number of relations (or entities) downloaded previously my_graph = ps_api.process_oql(my_goql_query,request_name, debug=False, flush_dump=True) # process_oql retreives data by iterations. Iteration size is controled by ps_api.PageSize