def load_syngo_genes(): syngo = Ontology.from_table('../prev_databases/SynGO_BP.txt') syngo_bp_genes = syngo.genes syngo = Ontology.from_table('../prev_databases/SynGO_CC.txt') syngo_cc_genes = syngo.genes syngo_genes = list(set(syngo_bp_genes + syngo_cc_genes)) return syngo_genes
def load_syngo_genes(): syngo=Ontology.from_table('/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_BP.txt') syngo_bp_genes=syngo.genes syngo=Ontology.from_table('/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_CC.txt') syngo_cc_genes=syngo.genes syngo_genes=list(set(syngo_bp_genes+syngo_cc_genes)) training=load_training_genes() syngo_genes=list(set(syngo_genes)-set(training)) return syngo_genes
def compare(ont): #Generate custom ontology 1 from synapse branch from human GO #ontology_file=Generate_Ontology_File('GO:0045202', 'ont1.txt') ont1=Ontology.from_table('../../analyses/Histogram_ont/synapse.txt') #print ('num synapse ontology terms', len(ont1.terms)) ont2=Ontology.from_table(fn) #print ('num custom ontology terms', len(ont2.terms)) return(Num_Enriched_Modules(ont1, ont2))
def compare_to_go(ont_fn): #Generate custom ontology 1 from synapse branch from human GO #ontology_file=Generate_Ontology_File('GO:0045202', 'ont1.txt') synapse_ont = '/home/joreyna/projects/BNFO286/synapse_ontology/analyses/Histogram_ont/synapse.txt' ont1 = Ontology.from_table(synapse_ont) #print ('num synapse ontology terms', len(ont1.terms)) ont2 = Ontology.from_table(ont_fn) #print ('num custom ontology terms', len(ont2.terms)) return (Num_Enriched_Modules(ont1, ont2))
def create_ddot_object(self, filename="ddot_output.txt", **kwargs): from ddot import Ontology self.ontology = Ontology.from_table(filename, clixo_format=True, **kwargs) return self
def load_pheno_hpo(): hpo = Ontology.from_table( '/Users/karenmei/Documents/Synapse_Ontology/HPO/making_HPO/HPO_parent_child.txt' ) #print (hpo) hpo = hpo.propagate(direction='forward', gene_term=True, term_term=False) hpo = hpo.focus(branches=['Phenotypic abnormality']) return hpo
def run_ddot(theargs): try: (e_code, c_out, c_err) = run_clixo(theargs.clixopath, theargs.input, theargs.alpha, theargs.beta) df = pd.read_csv(io.StringIO(c_out.decode('utf-8')), sep='\t', engine='python', header=None, comment='#') if theargs.output is not None: try: with open(theargs.output, 'wb') as f: f.write(c_out) outputdir = os.path.dirname(theargs.output) statres = os.stat(outputdir) os.chown(theargs.output, statres[stat.ST_UID], statres[stat.ST_GID]) except Exception as ex: sys.stderr.write('Caught exception trying to write file or' 'change permission: ' + str(ex)) ont1 = Ontology.from_table(df, clixo_format=True, parent=0, child=1) if theargs.ndexserver.startswith('http://'): server = theargs.ndexserver else: server = 'http://' + theargs.ndexserver idf = pd.read_csv(theargs.input, sep='\t', engine='python', header=None, comment='#') idf.rename(columns={ 0: 'Gene1', 1: 'Gene2', 2: 'has_edge' }, inplace=True) ont_url, G = ont1.to_ndex(name=theargs.ndexname, network=idf, main_feature='has_edge', ndex_server=server, ndex_pass=theargs.ndexuser, ndex_user=theargs.ndexpass, layout=theargs.ndexlayout, visibility=theargs.ndexvisibility) return 'RESULT:' + ont_url.strip().replace('/v2/network/', '/#/network/') + '\n' except OverflowError as ofe: logger.exception('Error running clixo') return 'ERROR:' + str(ofe) + '\n' except Exception as e: logger.exception('Some other error') return 'ERROR:' + str(e) + '\n' return {'error': 'unknown error'}
def load_syngo_presynapse(): syngo = Ontology.from_table( '/Users/karenmei/Documents/Synapse_Ontology/NetworkClass/Metrics/SynGO_CC.txt' ) syngo = syngo.propagate(direction='forward', gene_term=True, term_term=False) pre = syngo.focus(branches=['presynapse']) pre_genes = pre.genes return pre_genes
def upload_file(): #================= # default values #================= alpha = 0.05 beta = 0.5 try: data = request.files.get('file') except Exception as e: raise HTTPError(500, e) if data and data.file: if (request.query.alpha): alpha = request.query.alpha if (request.query.beta): beta = request.query.beta with tempfile.NamedTemporaryFile('w', delete=False) as f: f.write(data.file.read()) f_name = f.name f.close() try: clixo_file = generate_clixo_file(f_name, alpha, beta) with open(clixo_file, 'r') as f_saved: df = pd.read_csv(f_saved, sep='\t', engine='python', header=None, comment='#') print(df.columns) ont1 = Ontology.from_table(df, clixo_format=True, parent=0, child=1) ont_url, G = ont1.to_ndex(name='MODY', ndex_server='http://test.ndexbio.org', ndex_pass='******', ndex_user='******', layout='bubble-collect', visibility='PUBLIC') if ont_url is not None and len(ont_url) > 0 and 'http' in ont_url: uuid = ont_url.split('/')[-1] return 'File has been processed. UUID:: %s \n' % uuid else: return 'File has been processed. UUID: %s \n' % ont_url print('File has been processed: %s' % ont_url) except OverflowError as ofe: print('Error with running clixo') else: raise HTTPError(422, '**** FILE IS MISSING ****') return "Unable to complete process. See stack message above."
def analyze_gene_enrichment(ont_fn, disease_gene_fn): ## We read in our ontology #Generate custom ontology from Chromatin branch from human GO #ontology_file=Generate_Ontology_File('GO:0000785') ont = Ontology.from_table(ont_fn) translated = Find_GO_Focus_GeneDict(ont) #Test genes: autism text_file = open(disease_gene_fn, "r") test_gene_list = text_file.read().splitlines() text_file.close() #print("Number of autism genes in our ontology:" , len(set(ont.genes).intersection(set(test_gene_list)))) num_ont_disease_genes = len( set(ont.genes).intersection(set(test_gene_list))) #Find number of test genes in enriched modules: num_enriched_disease_genes = Find_num_genes_in_enriched( ont, translated, test_gene_list) return (num_ont_disease_genes, num_enriched_disease_genes)
def metric_1(ont_file, test_gene_list): ont1 = Ontology.from_table(ont_file) ont1_genes = ont1.genes test_recovery = jaccard(ont1_genes, test_gene_list) print('recovery of test genes:', test_recovery) return test_recovery
def load_pheno_hpo(): hpo=Ontology.from_table('../other_resources/HPO_parent_child.txt') #print (hpo) hpo=hpo.propagate(direction='forward', gene_term=True, term_term=False) hpo=hpo.focus(branches=['Phenotypic abnormality']) return hpo
true_terms=Find_Enrichment(ont, chr_ont, test_gene_list) genes_in_true_terms=[] for item in true_terms: term=item[0] genes=chr_ont[term] genes_in_true_terms.append(genes) print ('genes in true terms:', genes_in_true_terms) overlap=list(set(genes_in_true_terms[0])&set(test_gene_list)) overlap_num=len(overlap) print (overlap_num) return overlap_num #Test genes: a list of some genes involved in transcriptionally active chromatin test_gene_list=['AFF4', 'PSIP1', 'H2AFB1', 'H2AFB2', 'H2AFB3', 'TTC37', 'WDR61', 'KMT2E', 'BCAS3', 'HIST1H1C', 'ZC3H8', 'CTR9', 'PADI2', 'PAF1', 'SUPT6H', 'EXOSC4', 'ELL', 'PCID2', 'EXOSC5', 'PELP1', 'ESR1', 'EXOSC10', 'EXOSC3', 'ICE1', 'ICE2'] test_gene_list = 'AAK1 ABCC8 ABHD17A ABHD17B ABHD17C ABHD6 ABI1 ABI2 ABL1'.split() #Generate custom ontology from Chromatin branch from human GO #ontology_file=Generate_Ontology_File('GO:0000785') ont=Ontology.from_table('./ont2.txt') #Make dictionary of terms and genes within custom ontology chr_ont=Find_GO_Focus_GeneDict(ont) #Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)
if len(test_gene_list) != 0: enriched_terms = Find_Enrichment(ont1, chr_ont, test_gene_list) enriched_modules.append(enriched_terms) else: continue enriched_modules = [x for x in enriched_modules if x != []] enriched_modules = [ item for sublist in enriched_modules for item in sublist ] enriched_term_names = [] for item in enriched_modules: enriched_term_name = item[0] enriched_term_names.append(enriched_term_name) unique_enriched_term_names = set(enriched_term_names) #print (unique_enriched_term_names) print('Num of Enriched Modules', len(unique_enriched_term_names)) return #Generate custom ontology 1 from Chromatin branch from human GO ontology_file = Generate_Ontology_File('GO:0000785', 'ont1.txt') ont1 = Ontology.from_table('ont1.txt') #print (ont1) #Generate custom ontology 2 from Chromosome branch from human GO ont2_file = Generate_Ontology_File('GO:0005694', 'ont2.txt') ont2 = Ontology.from_table('ont2.txt') #print (ont2) Num_Enriched_Modules(ont1, ont2)
# Download gene-term annotations for human r = requests.get('http://geneontology.org/gene-associations/goa_human.gaf.gz') with open(path + 'goa_human.gaf.gz', 'wb') as f: f.write(r.content) hierarchy = pd.read_table('go.tab', sep='\t', header=None, names=['Parent', 'Child', 'Relation', 'Namespace']) with gzip.open(path + 'goa_human.gaf.gz', 'rb') as f: mapping = ddot.parse_gaf(f) ontGO = Ontology.from_table(table=hierarchy, parent='Parent', child='Child', mapping=mapping, mapping_child='DB Object ID', mapping_parent='GO ID', add_root_name='GO:00SUPER', ignore_orphan_terms=True) ontGO.clear_node_attr() ontGO.clear_edge_attr() go_descriptions = pd.read_table('goID_2_name.tab', header=None, names=['Term', 'Term_Description'], index_col=0) ontGO.update_node_attr(go_descriptions) ontGO = ontGO.collapse_ontology(method='mhkramer') if 'GO:00SUPER' not in ontGO.terms: ontGO.add_root('GO:00SUPER', inplace=True)
genes_in_true_terms = [] for item in true_terms: term = item[0] genes = chr_ont[term] genes_in_true_terms.append(genes) print('genes in true terms:', genes_in_true_terms) overlap = list(set(genes_in_true_terms[0]) & set(test_gene_list)) overlap_num = len(overlap) print(overlap_num) return overlap_num #Test genes: a list of some genes involved in transcriptionally active chromatin test_gene_list = [ 'AFF4', 'PSIP1', 'H2AFB1', 'H2AFB2', 'H2AFB3', 'TTC37', 'WDR61', 'KMT2E', 'BCAS3', 'HIST1H1C', 'ZC3H8', 'CTR9', 'PADI2', 'PAF1', 'SUPT6H', 'EXOSC4', 'ELL', 'PCID2', 'EXOSC5', 'PELP1', 'ESR1', 'EXOSC10', 'EXOSC3', 'ICE1', 'ICE2' ] #Generate custom ontology from Chromatin branch from human GO ontology_file = Generate_Ontology_File('GO:0000785') ont = Ontology.from_table('custom_ontology.txt') #Make dictionary of terms and genes within custom ontology chr_ont = Find_GO_Focus_GeneDict(ont) #Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)
uniq_parents = sorted(clixo_ont['Parent'].unique()) for parent in uniq_parents: fw.write('{}\t{}\n'.format(parent, parent)) # # for line in f: # if line.startswith('#'): # continue # # header = line.split()[0:3] # parents = set() # for line in f: # line = line.strip().split('\t') # parents.add(line[0]) # # for parent in sorted(parents): # fw.write('{}\t{}'.format(parent, parent)) with open(genes_fn) as f: test_gene_list = [x.strip() for x in f.readlines()] ont = Ontology.from_table(mod_ont_fn) # Make dictionary of terms and genes within custom ontology chr_ont = Find_GO_Focus_GeneDict(ont) #print(chr_ont) # Find number of test genes in enriched modules: Find_num_genes_in_enriched(ont, chr_ont, test_gene_list)