def process_output(output_file,source='S', sink='T', species_name='',debug=False,de_file=None,mcf=False): ''' Run the standard post-processing steps for responseNet ''' if not os.path.exists(output_file+'.txt'): print 'Output file missing' return dict(),dict(),0.0,set(),set(),set() ##Calculate node flow for ranking of signaling proteins (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow #calculate enrichment statistic if mRNA are used? #visualize if total==0.0: print 'No flow' return total,node_flow,comm_flow,set(),set(),set() phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf) ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files if(species_name.lower==''): print 'No identifier matching, moving on...' idfile='' else: # if(species_name.lower()=='mouse'): # idfile=pickle.load(open(id_directory+'/10090protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='human'): # idfile=pickle.load(open(id_directory+'/9606protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='yeast'): # idfile=pickle.load(open(id_directory+'/4932protein.aliases.v9.0_geneName.pkl','r')) # elif(species_name.lower()=='humaniref'): # idfile=pickle.load(open(id_directory+'/9606mitab.01192011.uniq_miscore-localirefindex3-20110831.geneMapping.pkl','r')) if species_name.lower()=='human': idfile=pickle.load(open(id_directory+'/humanUniprotHugoEntryMapping.pkl','r')) elif(species_name.lower()=='mouseiref'): idfile=pickle.load(open(id_directory+'/mouse_genename_to_9606mitabiref.pkl','r')) else: idfile='' if idfile!='': print "Matching identifiers" identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile) #also for the edge attribute files identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile) identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile) ##created new function for node attributes identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True) identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False) if len(de_file)>0: identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False) return total,node_flow,comm_flow,phens,prots,tfs,mrnas
def process_output(output_file,source='S', sink='T', idfname='',debug=False,de_file=None,mcf=False): ''' Run the standard post-processing steps for responseNet ''' if not os.path.exists(output_file+'.txt'): print 'Output file missing' return 0.0,dict(),dict(),set(),set(),set(),set() ##Calculate node flow for ranking of signaling proteins (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow #calculate enrichment statistic if mRNA are used? #visualize if total==0.0: print 'No flow' return total,node_flow,comm_flow,set(),set(),set(),set() phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf) ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files if(idfname==''): print 'No identifier matching, moving on...' else: print idfname idfile=pickle.load(open(idfname,'r')) if idfname!='': print "Matching identifiers with "+idfname identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile) identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile) #also for the edge attribute files identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile) identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile) identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile) ##created new function for node attributes identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True) identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False) if len(de_file)>0: identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False) return total,node_flow,comm_flow,phens,prots,tfs,mrnas
def process_output(output_file, source='S', sink='T', idfname='', debug=False, de_file=None, mcf=False): ''' Run the standard post-processing steps for responseNet ''' if not os.path.exists(output_file + '.txt'): print 'Output file missing' return 0.0, dict(), dict(), set(), set(), set(), set() ##Calculate node flow for ranking of signaling proteins (node_flow, comm_flow, total) = calculate_node_flow( open(output_file + '.txt', 'r').readlines(), mcf) #returns a dictionary of node flow #calculate enrichment statistic if mRNA are used? #visualize if total == 0.0: print 'No flow' return total, node_flow, comm_flow, set(), set(), set(), set() phens, prots, tfs, mrnas = write_sif_file(output_file, source, sink, node_flow, comm_flow, debug, de_file, mcf) ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files if (idfname == ''): print 'No identifier matching, moving on...' else: print idfname idfile = pickle.load(open(idfname, 'r')) if idfname != '': print "Matching identifiers with " + idfname identifier_matching.parseSifFileFromStringToGeneName( open(output_file + '_all.sif', 'r'), output_file + '_all_symbol.sif', idfile) identifier_matching.parseSifFileFromStringToGeneName( open(output_file + '_mcfs.sif', 'r'), output_file + '_mcfs_symbol.sif', idfile) identifier_matching.parseSifFileFromStringToGeneName( open(output_file + '_no_mrna.sif', 'r'), output_file + '_no_mrna_symbol.sif', idfile) #also for the edge attribute files identifier_matching.parseAttrFileFromStringToGeneName( open(output_file + '_ppi_attributes.eda', 'r'), output_file + '_ppi_attributes_symbol.eda', idfile) identifier_matching.parseAttrFileFromStringToGeneName( open(output_file + '_edge_commodity.eda', 'r'), output_file + '_edge_commodity_symbol.eda', idfile) identifier_matching.parseAttrFileFromStringToGeneName( open(output_file + '_edge_type.eda', 'r'), output_file + '_edge_type_symbol.eda', idfile) identifier_matching.parseTabFileFromStringToGeneName( open(output_file + '_node_comm_flow.noa', 'r'), output_file + '_node_comm_flow_symbol.noa', idfile) ##created new function for node attributes identifier_matching.parseNodeAttrFileFromStringToGeneName( open(output_file + '_node_type.noa', 'r'), output_file + '_node_type_symbol.noa', idfile, True) identifier_matching.parseNodeAttrFileFromStringToGeneName( open(output_file + '_node_flow.noa', 'r'), output_file + '_node_flow_symbol.noa', idfile, False) if len(de_file) > 0: identifier_matching.parseNodeAttrFileFromStringToGeneName( open(output_file + '_DiffExpr.noa', 'r'), output_file + '_DiffExpr.noa', idfile, False) return total, node_flow, comm_flow, phens, prots, tfs, mrnas