Пример #1
0
def process_output(output_file,source='S', sink='T', species_name='',debug=False,de_file=None,mcf=False):
    '''
    Run the standard post-processing steps for responseNet
    '''
    
    if not os.path.exists(output_file+'.txt'):
        print 'Output file missing'
        return dict(),dict(),0.0,set(),set(),set()
   ##Calculate node flow for ranking of signaling proteins
    (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow
    
        #calculate enrichment statistic if mRNA are used?

        #visualize
    if total==0.0:
        print 'No flow'
        return total,node_flow,comm_flow,set(),set(),set()
    phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf)
            ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files
    
    if(species_name.lower==''):
        print 'No identifier matching, moving on...'
        idfile=''
    else:
     #   if(species_name.lower()=='mouse'):
     #       idfile=pickle.load(open(id_directory+'/10090protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='human'):
     #       idfile=pickle.load(open(id_directory+'/9606protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='yeast'):
     #       idfile=pickle.load(open(id_directory+'/4932protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='humaniref'):
     #       idfile=pickle.load(open(id_directory+'/9606mitab.01192011.uniq_miscore-localirefindex3-20110831.geneMapping.pkl','r'))
        if species_name.lower()=='human':
            idfile=pickle.load(open(id_directory+'/humanUniprotHugoEntryMapping.pkl','r'))
        elif(species_name.lower()=='mouseiref'):
            idfile=pickle.load(open(id_directory+'/mouse_genename_to_9606mitabiref.pkl','r'))
	else:
            idfile=''
    if idfile!='': 

        print "Matching identifiers"
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile)
        #also for the edge attribute files
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile)        
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile)

        identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile)

##created new function for node attributes
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True)
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False)
        if len(de_file)>0:
            identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False)
    return total,node_flow,comm_flow,phens,prots,tfs,mrnas
Пример #2
0
def process_output(output_file,source='S', sink='T', idfname='',debug=False,de_file=None,mcf=False):
    '''
    Run the standard post-processing steps for responseNet
    '''
    
    if not os.path.exists(output_file+'.txt'):
        print 'Output file missing'
        return 0.0,dict(),dict(),set(),set(),set(),set()
    
   ##Calculate node flow for ranking of signaling proteins
    (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow
    
        #calculate enrichment statistic if mRNA are used?

        #visualize
    if total==0.0:
        print 'No flow'
        return total,node_flow,comm_flow,set(),set(),set(),set()
    
    phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf)
            ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files
    
    if(idfname==''):
        print 'No identifier matching, moving on...'
    else:
        print idfname
        idfile=pickle.load(open(idfname,'r'))

    if idfname!='': 

        print "Matching identifiers with "+idfname
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile)
        #also for the edge attribute files
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile)        
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile)

        identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile)

##created new function for node attributes
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True)
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False)
        if len(de_file)>0:
            identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False)
    return total,node_flow,comm_flow,phens,prots,tfs,mrnas