def load_syngo_genes(): syngo = Ontology.from_table('../prev_databases/SynGO_BP.txt') syngo_bp_genes = syngo.genes syngo = Ontology.from_table('../prev_databases/SynGO_CC.txt') syngo_cc_genes = syngo.genes syngo_genes = list(set(syngo_bp_genes + syngo_cc_genes)) return syngo_genes
def load_syngo_genes(): syngo=Ontology.from_table('/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_BP.txt') syngo_bp_genes=syngo.genes syngo=Ontology.from_table('/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_CC.txt') syngo_cc_genes=syngo.genes syngo_genes=list(set(syngo_bp_genes+syngo_cc_genes)) training=load_training_genes() syngo_genes=list(set(syngo_genes)-set(training)) return syngo_genes
def compare(ont): #Generate custom ontology 1 from synapse branch from human GO #ontology_file=Generate_Ontology_File('GO:0045202', 'ont1.txt') ont1=Ontology.from_table('../../analyses/Histogram_ont/synapse.txt') #print ('num synapse ontology terms', len(ont1.terms)) ont2=Ontology.from_table(fn) #print ('num custom ontology terms', len(ont2.terms)) return(Num_Enriched_Modules(ont1, ont2))
def compare_to_go(ont_fn): #Generate custom ontology 1 from synapse branch from human GO #ontology_file=Generate_Ontology_File('GO:0045202', 'ont1.txt') synapse_ont = '/home/joreyna/projects/BNFO286/synapse_ontology/analyses/Histogram_ont/synapse.txt' ont1 = Ontology.from_table(synapse_ont) #print ('num synapse ontology terms', len(ont1.terms)) ont2 = Ontology.from_table(ont_fn) #print ('num custom ontology terms', len(ont2.terms)) return (Num_Enriched_Modules(ont1, ont2))
def create_ddot_object(self, filename="ddot_output.txt", **kwargs): from ddot import Ontology self.ontology = Ontology.from_table(filename, clixo_format=True, **kwargs) return self
def load_pheno_hpo(): hpo = Ontology.from_table( '/Users/karenmei/Documents/Synapse_Ontology/HPO/making_HPO/HPO_parent_child.txt' ) #print (hpo) hpo = hpo.propagate(direction='forward', gene_term=True, term_term=False) hpo = hpo.focus(branches=['Phenotypic abnormality']) return hpo
def Generate_Ontology_File(Term): ndex_server = 'http://public.ndexbio.org' go_human = Ontology.from_ndex( 'http://public.ndexbio.org/v2/network/16565851-4bfc-11e9-9f06-0ac135e8bacf' ) ont = go_human.focus(Term) ont.to_table('custom_ontology.txt') return ont
def run_ddot(theargs): try: (e_code, c_out, c_err) = run_clixo(theargs.clixopath, theargs.input, theargs.alpha, theargs.beta) df = pd.read_csv(io.StringIO(c_out.decode('utf-8')), sep='\t', engine='python', header=None, comment='#') if theargs.output is not None: try: with open(theargs.output, 'wb') as f: f.write(c_out) outputdir = os.path.dirname(theargs.output) statres = os.stat(outputdir) os.chown(theargs.output, statres[stat.ST_UID], statres[stat.ST_GID]) except Exception as ex: sys.stderr.write('Caught exception trying to write file or' 'change permission: ' + str(ex)) ont1 = Ontology.from_table(df, clixo_format=True, parent=0, child=1) if theargs.ndexserver.startswith('http://'): server = theargs.ndexserver else: server = 'http://' + theargs.ndexserver idf = pd.read_csv(theargs.input, sep='\t', engine='python', header=None, comment='#') idf.rename(columns={ 0: 'Gene1', 1: 'Gene2', 2: 'has_edge' }, inplace=True) ont_url, G = ont1.to_ndex(name=theargs.ndexname, network=idf, main_feature='has_edge', ndex_server=server, ndex_pass=theargs.ndexuser, ndex_user=theargs.ndexpass, layout=theargs.ndexlayout, visibility=theargs.ndexvisibility) return 'RESULT:' + ont_url.strip().replace('/v2/network/', '/#/network/') + '\n' except OverflowError as ofe: logger.exception('Error running clixo') return 'ERROR:' + str(ofe) + '\n' except Exception as e: logger.exception('Some other error') return 'ERROR:' + str(e) + '\n' return {'error': 'unknown error'}
def load_syngo_presynapse(): syngo = Ontology.from_table( '/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_CC.txt' ) syngo = syngo.propagate(direction='forward', gene_term=True, term_term=False) pre = syngo.focus(branches=['presynapse']) pre_genes = pre.genes return pre_genes
def find_GO_score_matrix(): ndex_server ='http://public.ndexbio.org' ndex_user, ndex_pass = '******', 'Synapse' go_human = Ontology.from_ndex('http://public.ndexbio.org/v2/network/16565851-4bfc-11e9-9f06-0ac135e8bacf') print (go_human) sim, genes=go_human.flatten() sim_df = pd.DataFrame(sim, index=genes, columns=genes) return sim_df
def GO_Focus(Term): ndex_server = 'http://public.ndexbio.org' ndex_user, ndex_pass = '******', 'Synapse' go_human = Ontology.from_ndex( 'http://public.ndexbio.org/v2/network/16565851-4bfc-11e9-9f06-0ac135e8bacf' ) go_genes = go_human.genes subont = go_human.focus(Term) subont_genes = subont.genes return go_genes, subont_genes
def upload_file(): #================= # default values #================= alpha = 0.05 beta = 0.5 try: data = request.files.get('file') except Exception as e: raise HTTPError(500, e) if data and data.file: if (request.query.alpha): alpha = request.query.alpha if (request.query.beta): beta = request.query.beta with tempfile.NamedTemporaryFile('w', delete=False) as f: f.write(data.file.read()) f_name = f.name f.close() try: clixo_file = generate_clixo_file(f_name, alpha, beta) with open(clixo_file, 'r') as f_saved: df = pd.read_csv(f_saved, sep='\t', engine='python', header=None, comment='#') print(df.columns) ont1 = Ontology.from_table(df, clixo_format=True, parent=0, child=1) ont_url, G = ont1.to_ndex(name='MODY', ndex_server='http://test.ndexbio.org', ndex_pass='******', ndex_user='******', layout='bubble-collect', visibility='PUBLIC') if ont_url is not None and len(ont_url) > 0 and 'http' in ont_url: uuid = ont_url.split('/')[-1] return 'File has been processed. UUID:: %s \n' % uuid else: return 'File has been processed. UUID: %s \n' % ont_url print('File has been processed: %s' % ont_url) except OverflowError as ofe: print('Error with running clixo') else: raise HTTPError(422, '**** FILE IS MISSING ****') return "Unable to complete process. See stack message above."
def test_from_sim(self): """Test that the run_clixo function can run.""" sim, genes = self.ont.flatten() genes = list(genes) print(genes) sim = pd.DataFrame(sim, columns=genes, index=genes) ddo = Ontology.run_clixo( sim, 'test_df_output.txt', 'test_clixo_output.txt', 'test_output.txt', square=True, square_names=genes, ) print('ddo:\n', ddo)
class TestDdot(unittest.TestCase): def setUp(self): self.ont = Ontology(hierarchy, mapping) def test_from_sim(self): """Test that the run_clixo function can run.""" sim, genes = self.ont.flatten() genes = list(genes) print(genes) sim = pd.DataFrame(sim, columns=genes, index=genes) ddo = Ontology.run_clixo( sim, 'test_df_output.txt', 'test_clixo_output.txt', 'test_output.txt', square=True, square_names=genes, ) print('ddo:\n', ddo)
def analyze_gene_enrichment(ont_fn, disease_gene_fn): ## We read in our ontology #Generate custom ontology from Chromatin branch from human GO #ontology_file=Generate_Ontology_File('GO:0000785') ont = Ontology.from_table(ont_fn) translated = Find_GO_Focus_GeneDict(ont) #Test genes: autism text_file = open(disease_gene_fn, "r") test_gene_list = text_file.read().splitlines() text_file.close() #print("Number of autism genes in our ontology:" , len(set(ont.genes).intersection(set(test_gene_list)))) num_ont_disease_genes = len( set(ont.genes).intersection(set(test_gene_list))) #Find number of test genes in enriched modules: num_enriched_disease_genes = Find_num_genes_in_enriched( ont, translated, test_gene_list) return (num_ont_disease_genes, num_enriched_disease_genes)
def load_pheno_hpo(): hpo=Ontology.from_table('../other_resources/HPO_parent_child.txt') #print (hpo) hpo=hpo.propagate(direction='forward', gene_term=True, term_term=False) hpo=hpo.focus(branches=['Phenotypic abnormality']) return hpo
true_terms=Find_Enrichment(ont, chr_ont, test_gene_list) genes_in_true_terms=[] for item in true_terms: term=item[0] genes=chr_ont[term] genes_in_true_terms.append(genes) print ('genes in true terms:', genes_in_true_terms) overlap=list(set(genes_in_true_terms[0])&set(test_gene_list)) overlap_num=len(overlap) print (overlap_num) return overlap_num #Test genes: a list of some genes involved in transcriptionally active chromatin test_gene_list=['AFF4', 'PSIP1', 'H2AFB1', 'H2AFB2', 'H2AFB3', 'TTC37', 'WDR61', 'KMT2E', 'BCAS3', 'HIST1H1C', 'ZC3H8', 'CTR9', 'PADI2', 'PAF1', 'SUPT6H', 'EXOSC4', 'ELL', 'PCID2', 'EXOSC5', 'PELP1', 'ESR1', 'EXOSC10', 'EXOSC3', 'ICE1', 'ICE2'] test_gene_list = 'AAK1 ABCC8 ABHD17A ABHD17B ABHD17C ABHD6 ABI1 ABI2 ABL1'.split() #Generate custom ontology from Chromatin branch from human GO #ontology_file=Generate_Ontology_File('GO:0000785') ont=Ontology.from_table('./ont2.txt') #Make dictionary of terms and genes within custom ontology chr_ont=Find_GO_Focus_GeneDict(ont) #Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)
def setUp(self): self.ont = Ontology(hierarchy, mapping)
genes_in_true_terms = [] for item in true_terms: term = item[0] genes = chr_ont[term] genes_in_true_terms.append(genes) print('genes in true terms:', genes_in_true_terms) overlap = list(set(genes_in_true_terms[0]) & set(test_gene_list)) overlap_num = len(overlap) print(overlap_num) return overlap_num #Test genes: a list of some genes involved in transcriptionally active chromatin test_gene_list = [ 'AFF4', 'PSIP1', 'H2AFB1', 'H2AFB2', 'H2AFB3', 'TTC37', 'WDR61', 'KMT2E', 'BCAS3', 'HIST1H1C', 'ZC3H8', 'CTR9', 'PADI2', 'PAF1', 'SUPT6H', 'EXOSC4', 'ELL', 'PCID2', 'EXOSC5', 'PELP1', 'ESR1', 'EXOSC10', 'EXOSC3', 'ICE1', 'ICE2' ] #Generate custom ontology from Chromatin branch from human GO ontology_file = Generate_Ontology_File('GO:0000785') ont = Ontology.from_table('custom_ontology.txt') #Make dictionary of terms and genes within custom ontology chr_ont = Find_GO_Focus_GeneDict(ont) #Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)
def metric_1(ont_file, test_gene_list): ont1 = Ontology.from_table(ont_file) ont1_genes = ont1.genes test_recovery = jaccard(ont1_genes, test_gene_list) print('recovery of test genes:', test_recovery) return test_recovery
# print(x) return x cooc_df = cooc_df.apply(normalize, axis=1) # print(cooc_df) namemap_df = pd.read_csv(nnm, sep="|", header=None).set_index(0) namemap_df.columns = ["Label"] # print(namemap_df) ont = Ontology.run_clixo( cooc_df, alpha, beta, verbose=False, ) ont.update_node_attr(namemap_df) # print(ont.to_table(output, clixo_format=True)) # nwx = ont.to_networkx() # nx.write_graphml(nwx, 'clixotable.graphml') grph = ont.to_igraph(include_genes=True) grph.write_graphml(output + '.graphml') if upload: ndex_url, ont_ndexgraph = ont.to_ndex( name="Pitch 15 Asthma Ontology",
def infer_hierarchical_model( self, method='clixo-api', method_kwargs=None, edge_attr='has_edge', ddot_api_location=None, get_ontology_object=False, ): if method_kwargs is None: method_kwargs = {} if method == 'clixo-api': cols = self.edge_table.columns table = self.edge_table[[cols[0], cols[1], edge_attr]] self.ddot_client = DDOT_Client.from_dataframe( table, ddot_api_location=ddot_api_location) (self.ddot_client.call(**method_kwargs).wait_for_hiview_url()) self.hiview_url = self.ddot_client.hiview_url if get_ontology_object: self.ddot_client.get_output_file(create_object=True) self.ontology = self.ddot_client.ontology elif method == 'clixo': from ddot import Ontology self.ontology = Ontology.run_clixo( self.edge_table, method_kwargs['alpha'], method_kwargs['beta'], clixo_cmd=method_kwargs['clixo_path'], clixo_version=method_kwargs['clixo_version'], ) if len(self.edge_table.columns) != 3: #TODO print("future warning about edge table columns") if method_kwargs.get("upload_to_ndex", True): self.upload_ontology_to_ndex( method_kwargs['ndex_username'], method_kwargs['ndex_password'], ndex_server=method_kwargs.get('ndex_server', "http://test.ndexbio.org"), main_network=self.edge_table, main_feature=self.edge_table.columns[-1]) ndex_url, _ = self.ontology.to_ndex( method_kwargs['ndex_username'], method_kwargs['ndex_password'], method_kwargs.get('ndex_server', 'http://test.ndexbio.org'), network=self.edge_table, main_feature=self.edge_table.columns[-1]) uuid = ndex_url.split('/')[-1] self.hiview_url = f"http://hiview-test.ucsd.edu/{uuid}?type=test&server=http://dev2.ndexbio.org" else: raise ValueError("Invalid method!") return self
if len(test_gene_list) != 0: enriched_terms = Find_Enrichment(ont1, chr_ont, test_gene_list) enriched_modules.append(enriched_terms) else: continue enriched_modules = [x for x in enriched_modules if x != []] enriched_modules = [ item for sublist in enriched_modules for item in sublist ] enriched_term_names = [] for item in enriched_modules: enriched_term_name = item[0] enriched_term_names.append(enriched_term_name) unique_enriched_term_names = set(enriched_term_names) #print (unique_enriched_term_names) print('Num of Enriched Modules', len(unique_enriched_term_names)) return #Generate custom ontology 1 from Chromatin branch from human GO ontology_file = Generate_Ontology_File('GO:0000785', 'ont1.txt') ont1 = Ontology.from_table('ont1.txt') #print (ont1) #Generate custom ontology 2 from Chromosome branch from human GO ont2_file = Generate_Ontology_File('GO:0005694', 'ont2.txt') ont2 = Ontology.from_table('ont2.txt') #print (ont2) Num_Enriched_Modules(ont1, ont2)
# In[7]: pc = pd.DataFrame(columns=['alpha', 'precision', 'recall']) index = 0 alphas = [0.5,1,0.05,2,1.5,3,0.2] for alpha in alphas: try: # run CliXO with specified alpha CX.RunCliXO(CX_File, a_ = alpha,b_ = 0.5, M_ = 0.0001,z_ = 0.05) t1 = timeit.default_timer() print('time: {:.1f}'.format(t1-t0)) # generate ontology object from generated CliXO ontology hierarchy,mapping = CX.CliXO_Parser(CX_ONT) ontCX_synapse = Ontology(hierarchy, mapping, ignore_orphan_terms=True) # calculate precision recall and precision, recall = precision_recall(ontCX_synapse, ontGO_synapse) row = [alpha, precision, recall] pc.loc[index, :] = row pc.to_csv(path + 'precision_recall_draft_1.csv', sep = '\t') index += 1 except: pass
# Input file is tab separated adjacency list with edge weights as final column # In[2]: # path = os.path.dirname(os.getcwd()) + '/data/' # path to network file file = 'EXAMPLE.txt' # print(path) # If the edge weights are not greater than 0 # In[3]: CX_File = CX.ProcessCliXO(file) # Run CliXO # In[4]: CX.RunCliXO(CX_File, a_=0.01, b_=0.5, M_=0.0001, z_=0.05) # DDOT Ont object # In[6]: CX_ONT = 'RUN_' + CX_File[0:-4] + 'ONT' # In[8]: hierarchy, mapping = CX.CliXO_Parser(CX_ONT) ont = Ontology(hierarchy, mapping, ignore_orphan_terms=True)
def StreamElements(self, element_iterator, context): try: params = { 'name': 'Data-Driven Ontology', 'ont1_ndex_uuid': None, 'ont2_ndex_uuid': None, 'iterations': 3, 'threads': 4 } params.update(default_params) G, params, errors = self.read_element_stream( element_iterator, params) self.format_params(params) if verbose: print('Parameters:', params) ## Read graphs using NDEx client hier1, hier2 = self.read_hierarchies(params) if True: hier1_ont = Ontology.from_NdexGraph(hier1) hier2_ont = Ontology.from_NdexGraph(hier2) print('Summary of hier1_ont:', hier1_ont.summary()) print('Summary of hier2_ont:', hier2_ont.summary()) hier1_collapsed, hier2_collapsed = Ontology.mutual_collapse( hier1_ont, hier2_ont, verbose=True) assert len(hier1_collapsed.terms) < 3000, len( hier1_collapsed.terms) assert len(hier2_collapsed.terms) < 3000, len( hier2_collapsed.terms) if verbose: print 'Aligning hierarchies' alignment = align_hierarchies(hier1_collapsed, hier2_collapsed, params['iterations'], params['threads']) if verbose: print('One-to-one term alignments:', alignment.shape[0]) print(alignment.iloc[:30, :]) update_nx_with_alignment(hier1, alignment) ont_url = hier1.upload_to(params['ndex_server'], params['ndex_user'], params['ndex_pass']) if verbose: print 'ontology_url:', ont_url for elt in yield_ndex(ont_url): yield elt else: for caught_error in errors: error = self.create_internal_crash_error( caught_error.message, 500) log_error(error) yield error except Exception as e: message = "Unexpected error: " + str(e) error = self.create_internal_crash_error(message, 500) log_error(error) import traceback print traceback.print_exc() yield error
# Download gene-term annotations for human r = requests.get('http://geneontology.org/gene-associations/goa_human.gaf.gz') with open(path + 'goa_human.gaf.gz', 'wb') as f: f.write(r.content) hierarchy = pd.read_table('go.tab', sep='\t', header=None, names=['Parent', 'Child', 'Relation', 'Namespace']) with gzip.open(path + 'goa_human.gaf.gz', 'rb') as f: mapping = ddot.parse_gaf(f) ontGO = Ontology.from_table(table=hierarchy, parent='Parent', child='Child', mapping=mapping, mapping_child='DB Object ID', mapping_parent='GO ID', add_root_name='GO:00SUPER', ignore_orphan_terms=True) ontGO.clear_node_attr() ontGO.clear_edge_attr() go_descriptions = pd.read_table('goID_2_name.tab', header=None, names=['Term', 'Term_Description'], index_col=0) ontGO.update_node_attr(go_descriptions) ontGO = ontGO.collapse_ontology(method='mhkramer') if 'GO:00SUPER' not in ontGO.terms: ontGO.add_root('GO:00SUPER', inplace=True)
uniq_parents = sorted(clixo_ont['Parent'].unique()) for parent in uniq_parents: fw.write('{}\t{}\n'.format(parent, parent)) # # for line in f: # if line.startswith('#'): # continue # # header = line.split()[0:3] # parents = set() # for line in f: # line = line.strip().split('\t') # parents.add(line[0]) # # for parent in sorted(parents): # fw.write('{}\t{}'.format(parent, parent)) with open(genes_fn) as f: test_gene_list = [x.strip() for x in f.readlines()] ont = Ontology.from_table(mod_ont_fn) # Make dictionary of terms and genes within custom ontology chr_ont = Find_GO_Focus_GeneDict(ont) #print(chr_ont) # Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)
def StreamElements(self, element_iterator, context): try: params = { 'similarity' : 'Similarity', 'name' : 'Data-Driven Ontology', 'min_dt' : -100000, 'features' : '', 'all_features' : False, 'input_fmt' : 'cx', 'timeout' : 100, 'ndex_uuid' : None } params.update(default_params) G, params, errors = self.read_element_stream(element_iterator, params) self.format_params(params) if verbose: print('Parameters', params) input_fmt = params['input_fmt'].replace('ndex', 'cx') G_df, nodes_attr = ndex_to_sim_matrix( params['ndex_uuid'], params['ndex_server'], params['ndex_user'], params['ndex_pass'], similarity=params['similarity'], input_fmt=input_fmt, output_fmt='sparse') print 'Similarity' print G_df.head() features = [params['similarity']] if params['all_features']: features.extend(G_df.columns.values.tolist()) gene_names = np.unique(np.append( G_df['Gene1'].values, G_df['Gene2'].values)) if verbose: print 'Gene names:', gene_names if params['features']: to_concat = [] for f in params['features']: if f in G_df.columns: in_G.append(f) else: tmp = ndex_to_sim_matrix( params['ndex_uuid'], params['ndex_server'], params['ndex_user'], params['ndex_pass'], similarity=params['similarity'], input_fmt=input_fmt, output_fmt='sparse', subset=None) tmp.set_index(['Gene1', 'Gene2'], inplace=True) to_concat.append(tmp) to_concat.append(G_df) G_df = pd.concat(to_concat, axis=1) if verbose: print 'Features:', features errors = [e for e in errors if e.message != "Error decoding token from stream, EOF"] if len(errors) == 0: ## Run CLIXO graph = G_df[['Gene1', 'Gene2', params['similarity']]] ont = Ontology.run_clixo( graph, params['alpha'], params['beta'], min_dt=params['min_dt'], timeout=params['timeout'] ) # Create an NDEX network for every term's subnetwork term_2_uuid = ont.upload_subnets_ndex( G_df, features, params['ndex_server'], params['ndex_user'], params['ndex_pass'], params['name'], propagate=True ) if params['output_fmt']=='cx': # Use CXmate to stream to NDEX for elt in self.stream_ontology(ont, term_sizes, term_2_uuid, tree_edges): yield elt elif params['output_fmt']=='ndex': description = ( 'Data-driven ontology created by CLIXO ' '(parameters: alpha={alpha}, beta={beta}). ' 'Created from similarity network ' 'at {ndex_server}/{ndex_uuid}').format(**params) # nx_nodes_to_pandas(G) ont_ndex = ont.to_NdexGraph( name=params['name'], description=description, term_2_uuid=term_2_uuid, gene_attr=nodes_attr) ont_url = ont_ndex.upload_to( params['ndex_server'], params['ndex_user'], params['ndex_pass']) print 'ontology_url:', ont_url for elt in yield_ndex(ont_url): yield elt else: for caught_error in errors: error = self.create_internal_crash_error(caught_error.message, 500) log_error(error) yield error except Exception as e: message = "Unexpected error: " + str(e) error = self.create_internal_crash_error(message, 500) log_error(error) import traceback print traceback.print_exc() yield error
concept1 = x[0] concept2 = x[1] x[0] = x[0].replace(' ', '_') x[1] = x[1].replace(' ', '_') x[2] = float(x[2]) / (frequency_dict[concept1] * frequency_dict[concept2]) # print(x) return x cooc_df = cooc_df.apply(normalize, axis=1) print(cooc_df) print(isinstance(cooc_df, pd.DataFrame)) ont = Ontology.run_clixo(cooc_df, alpha, beta) print(ont.to_table(output, clixo_format=True)) # nwx = ont.to_networkx() # nx.write_graphml(nwx, 'clixotable.graphml') grph = ont.to_igraph(include_genes=True) grph.write_graphml(output + '.graphml') if upload: ndex_url, ont_ndexgraph = ont.to_ndex( name="Pitch 15 Asthma Ontology", ndex_server="http://test.ndexbio.org", ndex_pass='******', ndex_user='******', layout='bubble-collect',