Пример #1
0
def process_output(output_file,source='S', sink='T', species_name='',debug=False,de_file=None,mcf=False):
    '''
    Run the standard post-processing steps for responseNet
    '''
    
    if not os.path.exists(output_file+'.txt'):
        print 'Output file missing'
        return dict(),dict(),0.0,set(),set(),set()
   ##Calculate node flow for ranking of signaling proteins
    (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow
    
        #calculate enrichment statistic if mRNA are used?

        #visualize
    if total==0.0:
        print 'No flow'
        return total,node_flow,comm_flow,set(),set(),set()
    phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf)
            ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files
    
    if(species_name.lower==''):
        print 'No identifier matching, moving on...'
        idfile=''
    else:
     #   if(species_name.lower()=='mouse'):
     #       idfile=pickle.load(open(id_directory+'/10090protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='human'):
     #       idfile=pickle.load(open(id_directory+'/9606protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='yeast'):
     #       idfile=pickle.load(open(id_directory+'/4932protein.aliases.v9.0_geneName.pkl','r'))
     #   elif(species_name.lower()=='humaniref'):
     #       idfile=pickle.load(open(id_directory+'/9606mitab.01192011.uniq_miscore-localirefindex3-20110831.geneMapping.pkl','r'))
        if species_name.lower()=='human':
            idfile=pickle.load(open(id_directory+'/humanUniprotHugoEntryMapping.pkl','r'))
        elif(species_name.lower()=='mouseiref'):
            idfile=pickle.load(open(id_directory+'/mouse_genename_to_9606mitabiref.pkl','r'))
	else:
            idfile=''
    if idfile!='': 

        print "Matching identifiers"
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile)
        #also for the edge attribute files
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile)        
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile)

        identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile)

##created new function for node attributes
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True)
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False)
        if len(de_file)>0:
            identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False)
    return total,node_flow,comm_flow,phens,prots,tfs,mrnas
Пример #2
0
def process_output(output_file,source='S', sink='T', idfname='',debug=False,de_file=None,mcf=False):
    '''
    Run the standard post-processing steps for responseNet
    '''
    
    if not os.path.exists(output_file+'.txt'):
        print 'Output file missing'
        return 0.0,dict(),dict(),set(),set(),set(),set()
    
   ##Calculate node flow for ranking of signaling proteins
    (node_flow,comm_flow,total)=calculate_node_flow(open(output_file+'.txt','r').readlines(),mcf)#returns a dictionary of node flow
    
        #calculate enrichment statistic if mRNA are used?

        #visualize
    if total==0.0:
        print 'No flow'
        return total,node_flow,comm_flow,set(),set(),set(),set()
    
    phens,prots,tfs,mrnas=write_sif_file(output_file, source, sink,node_flow,comm_flow,debug,de_file,mcf)
            ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files
    
    if(idfname==''):
        print 'No identifier matching, moving on...'
    else:
        print idfname
        idfile=pickle.load(open(idfname,'r'))

    if idfname!='': 

        print "Matching identifiers with "+idfname
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_all.sif','r'),output_file+'_all_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_mcfs.sif','r'),output_file+'_mcfs_symbol.sif',idfile)
        identifier_matching.parseSifFileFromStringToGeneName(open(output_file+'_no_mrna.sif','r'),output_file+'_no_mrna_symbol.sif',idfile)
        #also for the edge attribute files
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_ppi_attributes.eda','r'),output_file+'_ppi_attributes_symbol.eda',idfile)        
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_commodity.eda','r'),output_file+'_edge_commodity_symbol.eda',idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(open(output_file+'_edge_type.eda','r'),output_file+'_edge_type_symbol.eda',idfile)

        identifier_matching.parseTabFileFromStringToGeneName(open(output_file+'_node_comm_flow.noa','r'),output_file+'_node_comm_flow_symbol.noa',idfile)

##created new function for node attributes
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_type.noa','r'),output_file+'_node_type_symbol.noa',idfile,True)
        identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_node_flow.noa','r'),output_file+'_node_flow_symbol.noa',idfile,False)
        if len(de_file)>0:
            identifier_matching.parseNodeAttrFileFromStringToGeneName(open(output_file+'_DiffExpr.noa','r'),output_file+'_DiffExpr.noa',idfile,False)
    return total,node_flow,comm_flow,phens,prots,tfs,mrnas
Пример #3
0
def process_output(output_file,
                   source='S',
                   sink='T',
                   idfname='',
                   debug=False,
                   de_file=None,
                   mcf=False):
    '''
    Run the standard post-processing steps for responseNet
    '''

    if not os.path.exists(output_file + '.txt'):
        print 'Output file missing'
        return 0.0, dict(), dict(), set(), set(), set(), set()

##Calculate node flow for ranking of signaling proteins
    (node_flow, comm_flow, total) = calculate_node_flow(
        open(output_file + '.txt', 'r').readlines(),
        mcf)  #returns a dictionary of node flow

    #calculate enrichment statistic if mRNA are used?

    #visualize
    if total == 0.0:
        print 'No flow'
        return total, node_flow, comm_flow, set(), set(), set(), set()

    phens, prots, tfs, mrnas = write_sif_file(output_file, source, sink,
                                              node_flow, comm_flow, debug,
                                              de_file, mcf)
    ##MODIFIED by SGOSLINE: added this to do identifer matching for the sif files

    if (idfname == ''):
        print 'No identifier matching, moving on...'
    else:
        print idfname
        idfile = pickle.load(open(idfname, 'r'))

    if idfname != '':

        print "Matching identifiers with " + idfname
        identifier_matching.parseSifFileFromStringToGeneName(
            open(output_file + '_all.sif', 'r'),
            output_file + '_all_symbol.sif', idfile)
        identifier_matching.parseSifFileFromStringToGeneName(
            open(output_file + '_mcfs.sif', 'r'),
            output_file + '_mcfs_symbol.sif', idfile)
        identifier_matching.parseSifFileFromStringToGeneName(
            open(output_file + '_no_mrna.sif', 'r'),
            output_file + '_no_mrna_symbol.sif', idfile)
        #also for the edge attribute files
        identifier_matching.parseAttrFileFromStringToGeneName(
            open(output_file + '_ppi_attributes.eda', 'r'),
            output_file + '_ppi_attributes_symbol.eda', idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(
            open(output_file + '_edge_commodity.eda', 'r'),
            output_file + '_edge_commodity_symbol.eda', idfile)
        identifier_matching.parseAttrFileFromStringToGeneName(
            open(output_file + '_edge_type.eda', 'r'),
            output_file + '_edge_type_symbol.eda', idfile)

        identifier_matching.parseTabFileFromStringToGeneName(
            open(output_file + '_node_comm_flow.noa', 'r'),
            output_file + '_node_comm_flow_symbol.noa', idfile)

        ##created new function for node attributes
        identifier_matching.parseNodeAttrFileFromStringToGeneName(
            open(output_file + '_node_type.noa', 'r'),
            output_file + '_node_type_symbol.noa', idfile, True)
        identifier_matching.parseNodeAttrFileFromStringToGeneName(
            open(output_file + '_node_flow.noa', 'r'),
            output_file + '_node_flow_symbol.noa', idfile, False)
        if len(de_file) > 0:
            identifier_matching.parseNodeAttrFileFromStringToGeneName(
                open(output_file + '_DiffExpr.noa', 'r'),
                output_file + '_DiffExpr.noa', idfile, False)
    return total, node_flow, comm_flow, phens, prots, tfs, mrnas