def GOQLtoFindDrugs(TargetIds: list, TargetType='Protein', drugEffect=['negative']): if TargetType == 'Protein': return OQL.get_drugs(for_targets_with_ids=TargetIds) elif TargetType == 'Small Molecule': REL_TYPES = ['Regulation', 'MolSynthesis'] OQLquery = OQL.expand_entity(PropertyValues=TargetIds, SearchByProperties=['id'], expand_by_rel_types=REL_TYPES, expand2neighbors=['Small Molecule'], direction='upstream') OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')' return OQLquery else: REL_TYPES = ['Regulation'] OQLquery = OQL.expand_entity(PropertyValues=TargetIds, SearchByProperties=['id'], expand_by_rel_types=REL_TYPES, expand2neighbors=['Small Molecule'], direction='upstream') OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')' return OQLquery
def retreive_clinical_trials(drugs_name_file=None): request_name = 'Find all completed clinical trials' oql_query = 'SELECT Relation WHERE objectType = ClinicalTrial AND TrialStatus = Completed AND NeighborOf (SELECT Entity WHERE objectType = SmallMol)' if isinstance(drugs_name_file, str): with open(drugs_name_file) as f: drug_names = [line.rstrip('\n') for line in f] print('Read %s with %d drug names' % (drugs_name_file, len(drug_names))) search_by_prop, drug_names_str = OQL.get_search_strings( ['Name', 'Alias'], drug_names) oql_query = oql_query + ' AND NeighborOf (SELECT Entity WHERE (Name,Alias)= ({names}))' oql_query = oql_query.format(names=drug_names_str) request_name = 'Find clinical trials for drugs in {fname}' request_name = request_name.format(fname=drugs_name_file) return ps_api.process_oql(oql_query, request_name, debug=False)
args.infile_has_header, use_cache=False, map2type=entity_types) if len(args.pathways) > 0: print("Begin linking entities mapped from infile to pathways") LinkToPathways = str(args.pathways).split(",") start_time = time.time() for PathwayName in LinkToPathways: PathwayMembersId2Entity = search.get_pathway_member_ids( [PathwayName], search_pathways_by=['Name'], only_entities=['Protein', 'FunctionalClass', 'Complex'], with_properties=['objectType']) pathway_components = set(PathwayMembersId2Entity.keys()) QueryOntology = OQL.get_childs(list(pathway_components), ['id']) pathway_components.update(search._obj_id_by_oql(QueryOntology)) if len(pathway_components) == 0: print('No entity for %s found in the database' % PathwayName) else: search.link2concept(PathwayName, list(pathway_components)) exec_time = search.execution_time(start_time) print("Entities in file %s were linked to %s pathway in %s" % (EntityListFile, PathwayName, exec_time)) else: print( 'No pathways were specified for semantic linking with entities from \"%s\"' % (EntityListFile))
REL_PROPs = ['Name', 'Effect', 'Mechanism', 'ChangeType']# add here relation properties to retrieve # if properties from NetworkxObjects.REF_ID_TYPES or NetworkxObjects.REF_PROPS are added to REL_PROPs then: # output size may increase dramatically because it will contain one reference per row. ENT_PROPs = ['Name', 'Description', 'Cell Localization'] ps_api = ps_api = open_api_session() ps_api.PageSize = 10000 ps_api.add_rel_props(list(set(REL_PROPs)|PS_ID_TYPES)) ps_api.add_ent_props(ENT_PROPs) # this dump file will list all proteins in the database with connectivity >0: ps_api.add_dump_file('Proteins from database.tsv', replace_main_dump=True) print('Fetching all proteins from the database') ProteinsOnyGraph = ps_api.process_oql("Select Entity WHERE objectType = Protein AND Connectivity > 0 AND Name LIKE 'A%'", flush_dump=True) ps_api.add_dump_file("Protein neighbors dump.tsv", replace_main_dump=True) # dump file accumulates all data in one big file out_dir = 'csv' counter = 0 for node_id, psObj in ProteinsOnyGraph.nodes(data=True): protein_name = psObj['Name'][0] counter += 1 print('Finding neighbors for \"%s\", node #%d from %d total' % (protein_name, counter, ProteinsOnyGraph.number_of_nodes())) oql_query = GOQL.expand_entity([node_id], SearchByProperties=['id']) ProteinNeighborsGraph = ps_api.process_oql(oql_query) protein_neighbors_file = out_dir + '/' + protein_name + '_neighbors.csv' ps_api.to_csv(protein_neighbors_file) ps_api.Graph.clear() # need to release memory when performing large dumps
import ElsevierAPI.ResnetAPI.PathwayStudioGOQL as OQL from ElsevierAPI.ResnetAPI.NetworkxObjects import Reference from ElsevierAPI import load_api_config from ElsevierAPI.ResnetAPI.ResnetAPISession import APISession import pandas as pd APIconfig = load_api_config() ps_api = APISession(APIconfig['ResnetURL'], APIconfig['PSuserName'], APIconfig['PSpassword']) fileIn = 'Drugs for Regulators in 4 patients.txt' InDir = 'D:\\Python\\PBTA\\PNOC003\\4 patients analysis\\' with open(InDir + fileIn) as f: drugs = [line.rstrip('\n') for line in f] print('Finding drugs in %s in Resnet' % (fileIn)) OQLquery = OQL.get_entities_by_props(drugs, ['Name', 'Alias'], only_object_types=['Small Molecule']) ps_api.add_ent_props(['Name', 'PharmaPendium ID']) resnet_drugs = ps_api.process_oql(OQLquery, 'Find all drugs') print('Found %d drugs in Resnet' % len(resnet_drugs)) #removing duplicates wiht no PharmaPendium ID resnet2pharmapendium_map = dict() for i, drug in resnet_drugs.nodes(data=True): try: resnet2pharmapendium_map[str( drug['Name'][0]).lower()] = drug['PharmaPendium ID'][0] except KeyError: continue all_drugs = list(resnet_drugs.nodes(data=True)) for i, drug in all_drugs:
api_config ) # specify here path to your APIconfig file. Defaults to ./ElsevierAPI/APIconfig.json ps_api.add_ent_props( ['Alias'] ) # need to retreive aliases from the database in case input metabolites are found by Alias ps_api.PageSize = 10000 # dump file contains references for all relations retreived from database # do not use dump file unless you need to include references into report: ps_api.DumpFiles.clear() # retreive all ChemicalReaction linked to metabolites in excel_file_name as ResnetGraph from the database: step = 1000 for i in range(0, len(input_metabolite_names), step): name_list = input_metabolite_names[i:i + step] my_goql_query = OQL.expand_entity(name_list, ['Name', 'Alias'], expand_by_rel_types=['ChemicalReaction']) request_name = 'Retrieve metabolic reactions graph for {count} metabolites'.format( count=len(name_list)) ps_api.process_oql(my_goql_query, request_name) reactions_graph = ps_api.Graph input_name2objs, objid2input_names = reactions_graph.get_prop2obj_dic( 'Name', input_metabolite_names) aliasinput_2objs, objid2input_alias = reactions_graph.get_prop2obj_dic( 'Alias', input_metabolite_names) objid2input_names.update(objid2input_alias) metabolite_ids = list(objid2input_names.keys()) # objid2input_names = {obj_id:[input_names]} - allows for duplicates when mapping by name+alias # find enzymes linked to ChemicalReactions and retreive their ontology children (proteins) enzymes = reactions_graph.get_objects(PROTEIN_TYPES)
InputDiseaseNames = ','.join(SearchEntitiesBy) # specify files used in ps_api.DiseaseNetwork.AddGraph to dump graph data in tab-delimited format: myDir = '' # 'D:\\Python\\PS_API\\' foutDiseaseSNPs = myDir + "Gene variants linked to " + InputDiseaseNames + '.tsv' foutDiseaseProteins = myDir + "Genes with SNPs linked to " + InputDiseaseNames + '.tsv' foutDrugsForDiseaseProteins = myDir + "Druggable targets for " + InputDiseaseNames + '.tsv' ps_api.add_rel_props(REL_PROPs) ps_api.add_ent_props(ENT_PROP_Neo4j) print("Finding GeneticVariants linked to %s" % InputDiseaseNames) ps_api.add_dump_file(foutDiseaseSNPs, replace_main_dump=True) ps_api.process_oql( GOQL.expand_entity(PropertyValues=SearchEntitiesBy, SearchByProperties=['Name', 'Alias'], expand_by_rel_types=[], expand2neighbors=['GeneticVariant'])) SNPIds = list(set(ps_api.Graph.get_entity_ids(['GeneticVariant']))) print("Finding Proteins containing GeneticVariants linked to %s" % InputDiseaseNames) ps_api.add_dump_file(foutDiseaseProteins, replace_main_dump=True) ps_api.process_oql(GOQL.expand_entity(PropertyValues=SNPIds, SearchByProperties=['id'], expand_by_rel_types=['GeneticChange'], expand2neighbors=['Protein']), flush_dump=True) foutDiseasePPI = myDir + "\\PPIs between genes linked to " + InputDiseaseNames + '.tsv' PPIgraph = ps_api.get_ppi_graph(foutDiseasePPI) # calculating centrality