def returnDirectories(sub_dir): dir_list = unique.returnDirectories(sub_dir) return dir_list
def exportTables(metabolite_list): infer_enzyme_to_metabolite_pathway_data = 'no' current_species_dirs = unique.returnDirectories('/Databases') ### Save results to all species directories for species_code in current_species_dirs: print 'Exporting metabolite data for:', species_code gene_dir = 'Databases/' + species_code + '/gene/HMDB.txt' gene_data = export.ExportFile(gene_dir) hmdb_cas_dir = 'Databases/' + species_code + '/uid-gene/HMDB-CAS.txt' hmdb_cas_data = export.ExportFile(hmdb_cas_dir) hmdb_chebi_dir = 'Databases/' + species_code + '/uid-gene/HMDB-ChEBI.txt' hmdb_chebi_data = export.ExportFile(hmdb_chebi_dir) hmdb_pubchem_dir = 'Databases/' + species_code + '/uid-gene/HMDB-PubChem.txt' hmdb_pubchem_data = export.ExportFile(hmdb_pubchem_dir) hmdb_keggcomp_dir = 'Databases/' + species_code + '/uid-gene/HMDB-KeggCompound.txt' hmdb_keggcomp_data = export.ExportFile(hmdb_keggcomp_dir) hmdb_mapp_dir = 'Databases/' + species_code + '/gene-mapp/HMDB-MAPP.txt' hmdb_mapp_data = export.ExportFile(hmdb_mapp_dir) cas_denom_dir = 'Databases/' + species_code + '/gene-mapp/denominator/CAS.txt' cas_denom_data = export.ExportFile(cas_denom_dir) hmdb_go_dir = 'Databases/' + species_code + '/gene-go/HMDB-GeneOntology.txt' if infer_enzyme_to_metabolite_pathway_data == 'yes': hmdb_go_data = export.ExportFile(hmdb_go_dir) headers = [ 'hmdb_id', 'name', 'description', 'secondary_id', 'iupac', 'cas_number', 'chebi_id', 'pubchem_compound_id', 'Pathways', 'ProteinNames' ] headers = string.join(headers, '\t') + '\n' gene_data.write(headers) ### Attempt to add GO and pathway data from database based on associated protein IDs (simple translation from human) mod = 'Ensembl' try: gene_annotations = gene_associations.importGeneData( species_code, mod) except Exception: mod = 'EntrezGene' try: gene_annotations = gene_associations.importGeneData( species_code, mod) except Exception: gene_annotations = {} symbol_associations = {} for geneid in gene_annotations: symbol_associations[ gene_annotations[geneid].SymbolLower()] = geneid gotype = 'null' try: gene_to_go = gene_associations.importGeneGOData( species_code, mod, gotype) except Exception: gene_to_go = {} try: gene_to_mapp = gene_associations.importGeneMAPPData( species_code, mod) except Exception: gene_to_mapp = {} for ed in metabolite_list: values = [ ed.HMDB(), ed.Name(), ed.Description(), ed.SecondaryIDs(), ed.IUPAC(), ed.CAS(), ed.CheBI(), ed.PubChem(), ed.PathwaysStr(), ed.ProteinNamesStr() ] values = string.join(values, '\t') + '\n' gene_data.write(values) if len(ed.Pathways()) > 1: for pathway in ed.Pathways(): values = [ed.HMDB(), 'Ch', pathway] values = string.join(values, '\t') + '\n' hmdb_mapp_data.write(values) if len(ed.CAS()) > 0: values = [ed.HMDB(), ed.CAS()] values = string.join(values, '\t') + '\n' hmdb_cas_data.write(values) values = [ed.CAS(), 'Ca'] values = string.join(values, '\t') + '\n' cas_denom_data.write(values) if len(ed.CheBI()) > 0: values = [ed.HMDB(), ed.CheBI()] values = string.join(values, '\t') + '\n' hmdb_chebi_data.write(values) if len(ed.PubChem()) > 0: values = [ed.HMDB(), ed.PubChem()] values = string.join(values, '\t') + '\n' hmdb_pubchem_data.write(values) if len(ed.KEGGCompoundID()) > 0: values = [ed.HMDB(), ed.KEGGCompoundID()] values = string.join(values, '\t') + '\n' hmdb_keggcomp_data.write(values) temp_go = {} temp_mapp = {} if infer_enzyme_to_metabolite_pathway_data == 'yes': ### If associated enzyme annotated, use the gene symbol to find GO terms associated with the gene symbol for the metabolite ### Not sure if this is a bad idea or not for protein_name in ed.ProteinNames(): protein_name = string.lower(protein_name) if protein_name in symbol_associations: geneid = symbol_associations[protein_name] if geneid in gene_to_go: for goid in gene_to_go[geneid]: temp_go[goid] = [] if geneid in gene_to_mapp: for mapp in gene_to_mapp[geneid]: temp_mapp[mapp] = [] for goid in temp_go: values = [ed.HMDB(), 'GO:' + goid] values = string.join(values, '\t') + '\n' hmdb_go_data.write(values) for mapp in temp_mapp: values = [ed.HMDB(), 'Ch', mapp] values = string.join(values, '\t') + '\n' hmdb_mapp_data.write(values) gene_data.close() hmdb_mapp_data.close() hmdb_cas_data.close() hmdb_chebi_data.close() hmdb_pubchem_data.close() if infer_enzyme_to_metabolite_pathway_data == 'yes': hmdb_go_data.close() print 'File:', gene_dir, 'exported.'