def formatPhenoFile(fl): expr_group_dir = fl.GroupsFile() expr_batch_dir = string.replace(expr_group_dir,'groups.','batch.') pheno_dir = string.replace(expr_group_dir,'groups.','pheno.') import gene_associations group_db = gene_associations.importGeneric(expr_group_dir) bath_db = gene_associations.importGeneric(expr_batch_dir) pheno_obj = export.ExportFile(pheno_dir) pheno_obj.write('sample\tgroup\tbatch\n') for sample in group_db: group_name = group_db[sample][-1] batch_number = bath_db[sample][0] pheno_obj.write(string.join([sample,group_name,batch_number],'\t')+'\n') pheno_obj.close() return pheno_dir
def formatPhenoFile(fl): expr_group_dir = fl.GroupsFile() expr_batch_dir = string.replace(expr_group_dir, "groups.", "batch.") pheno_dir = string.replace(expr_group_dir, "groups.", "pheno.") import gene_associations group_db = gene_associations.importGeneric(expr_group_dir) bath_db = gene_associations.importGeneric(expr_batch_dir) pheno_obj = export.ExportFile(pheno_dir) pheno_obj.write("sample\tgroup\tbatch\n") for sample in group_db: group_name = group_db[sample][-1] batch_number = bath_db[sample][0] pheno_obj.write(string.join([sample, group_name, batch_number], "\t") + "\n") pheno_obj.close() return pheno_dir
def importHMDBMetaboCardFlatFile(): filename = 'BuildDBs/HMDB/metabocards.txt' fields_to_store = [ 'hmdb_id', 'description', 'name', 'secondary_id', 'iupac', 'biocyc_id', 'cas_number', 'chebi_id', 'kegg_compound_id', 'pubchem_compound_id', 'pathway_1_kegg_id' ] fields_to_store += [ 'metabolic_enzyme_1_gene_name', 'metabolic_enzyme_1_swissprot_id', 'metabolic_enzyme_2_gene_name', 'metabolic_enzyme_2_swissprot_id' ] fields_to_store += [ 'pathway_1_smpdb_id', 'pathway_2_smpdb_id', 'pathway_3_smpdb_id', 'pathway_1_name', 'pathway_2_name', 'pathway_3_name', 'pathway_2_kegg_id', 'pathway_3_kegg_id' ] fn = filepath(filename) field_data = '' field_name = '' entry_data = {} hmdb = [] global kegg_pathways kegg_pathways = gene_associations.importGeneric( 'BuildDBs/HMDB/map_title.tab') x = 0 for line in open(fn, 'rU').xreadlines(): data = cleanUpLine(line) if len(data) > 0: if data[0] == '#': field_name = data[2:-1] else: field_data += data else: if field_name in fields_to_store: entry_data[field_name] = field_data #else: print [field_name] field_name = '' field_data = '' if 'END_METABOCARD' in data: ed = HMBDInfo(entry_data) hmdb.append(ed) entry_data = {} x += 1 #if x>5: break print len(hmdb), 'HMDB entries obtained' exportTables(hmdb)