def process_output(output_file,source='S', sink='T', species_name='',debug=False,de_file=None,mcf=False): ''' Run the standard post-processing steps for responseNet ''' if not os.path.exists(output_file+'.txt'): print 'Output file missing' return dict(),dict(),0.0,set(),set(),set() ##Calculate node flow for ranking of signaling proteins (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow #calculate enrichment statistic if mRNA are used? #visualize if total==0.0: print 'No flow' return total,node_flow,comm_flow,set(),set(),set() phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf) ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files if(species_name.lower==''): print 'No identifier matching, moving on...' idfile='' else: # if(species_name.lower()=='mouse'): # idfile=pickle.load(open(id_directory+'/10090protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='human'): # idfile=pickle.load(open(id_directory+'/9606protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='yeast'): # idfile=pickle.load(open(id_directory+'/4932protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='humaniref'): # idfile=pickle.load(open(id_directory+'/9606mitab.01192011.uniq_miscore-localirefindex3-20110831.geneMapping.pkl','r')) if species_name.lower()=='human': idfile=pickle.load(open(id_directory+'/humanUniprotHugoEntryMapping.pkl','r')) elif(species_name.lower()=='mouseiref'): idfile=pickle.load(open(id_directory+'/mouse_genename_to_9606mitabiref.pkl','r')) else: idfile='' if idfile!='': print "Matching identifiers" identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile) #also for the edge attribute files identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile) identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile) ##created new function for node attributes identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True) identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False) if len(de_file)>0: identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False) return total,node_flow,comm_flow,phens,prots,tfs,mrnas
def process_output(output_file,source='S', sink='T', idfname='',debug=False,de_file=None,mcf=False): ''' Run the standard post-processing steps for responseNet ''' if not os.path.exists(output_file+'.txt'): print 'Output file missing' return 0.0,dict(),dict(),set(),set(),set(),set() ##Calculate node flow for ranking of signaling proteins (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow #calculate enrichment statistic if mRNA are used? #visualize if total==0.0: print 'No flow' return total,node_flow,comm_flow,set(),set(),set(),set() phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf) ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files if(idfname==''): print 'No identifier matching, moving on...' else: print idfname idfile=pickle.load(open(idfname,'r')) if idfname!='': print "Matching identifiers with "+idfname identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile) #also for the edge attribute files identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile) identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile) ##created new function for node attributes identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True) identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False) if len(de_file)>0: identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False) return total,node_flow,comm_flow,phens,prots,tfs,mrnas