示例#1
0
文件: rp_adapt.py 项目: bxlab/esperr
def run( pos_file, neg_file, out_dir, format, align_count, mapping ):

    # Open merit output
    merit_out = open( os.path.join( out_dir, 'merits.txt' ), 'w' )

    # Read integer sequences
    pos_strings = list( io.get_reader( pos_file, format, None ) )
    neg_strings = list( io.get_reader( neg_file, format, None ) )

    symbol_count = mapping.get_out_size()

    # Collapse
    while symbol_count > stop_size:

        print >> sys.stderr, "Collapsing from:", symbol_count

        best_mapping = None
        best_merit = 0

        pb = ProgressBar( 0, symbol_count * ( symbol_count - 1 ) / 2, 78 )

        count = 0

        pairs = all_pairs( symbol_count )

        for i, j in pairs:

                collapsed = mapping.collapse( i, j )
                
                merit = calc_merit( pos_strings, neg_strings, collapsed ) 
                
                if merit > best_merit:
                    best_merit = merit
                    best_mapping = collapsed
	
                count += 1
                pb.update_and_print( count, sys.stderr )
                
        mapping = best_mapping
        symbol_count -= 1
        
        # Append merit to merit output        
        print >>merit_out, symbol_count, best_merit
        
        print >>sys.stderr, "\nBest Merit %d." % best_merit,
        
        # Write best mapping to a file
        mapping_out = open( os.path.join( out_dir, "%03d.mapping" % symbol_count ), 'w' )
        for i, symbol in enumerate( mapping.get_table() ): 
            print >>mapping_out, str.join( '', rp.mapping.DNA.reverse_map( i, align_count ) ), symbol
        mapping_out.close()
示例#2
0
def run( pos_file, neg_file, out_dir, format, align_count, mapping ):

    # Open merit output
    merit_out = open( os.path.join( out_dir, 'merits.txt' ), 'w' )

    # Read integer sequences
    pos_strings = list( rp.io.get_reader( pos_file, format, None ) )
    neg_strings = list( rp.io.get_reader( neg_file, format, None ) )

    symbol_count = mapping.get_out_size()

    # Collapse
    while symbol_count > stop_size:

        # Sync nodes on each pass, may not be required
        pypar.barrier()

        if node_id == 0:
            print "Collapsing from:", symbol_count

        pairs = all_pairs( symbol_count )

        # Decide which subrange of all pairs this node will handle
        lo, hi = pypar.balance( len( pairs ), nodes, node_id )

        # Find best collapsed mapping in interval
        best_i, best_j, best_merit = None, None, 0
        for i, j in pairs[lo:hi]:
            merit = calc_merit( pos_strings, neg_strings, mapping.collapse( i, j )  ) 
            if merit > best_merit:
                best_i, best_j, best_merit = i, j, merit
            
        # Aggregate results
        if node_id != 0:
            # Send best i, j, merit to the master
            pypar.send( ( best_i, best_j, merit ), 0 )
        else:
            # I am the master, get results from all other nodes and determine
            # which had the best merit
            for other_node_id in range( 1, nodes ):
                i, j, merit = pypar.receive( other_node_id )
                if merit > best_merit:
                    best_i, best_j, best_merit = i, j, merit
  
        # Collapse the two symbols that resulted in the best merit   
        mapping = mapping.collapse( best_i, best_j )
        symbol_count -= 1
        
        # Ensure only the master writes files
        if node_id == 0:
        
            # Append merit to merit output        
            print >>merit_out, symbol_count, best_merit
        
            print "\nBest Merit %d." % best_merit,
        
            # Write best mapping to a file
            mapping_out = open( os.path.join( out_dir, "%03d.mapping" % symbol_count ), 'w' )
            for i, symbol in enumerate( mapping.get_table() ): 
                print >>mapping_out, str.join( '', rp.mapping.DNA.reverse_map( i, align_count ) ), symbol
            mapping_out.close()
示例#3
0
#!/usr/bin/env python2.3

"""
Make an identity mapping for some number of species 
(each column mapped to unique symbol)
"""

import sys
import rp.mapping

align_count = int(sys.argv[1])

mapping = rp.mapping.identity_mapping(rp.mapping.DNA.get_out_size() ** align_count)

for i, symbol in enumerate(mapping.get_table()):
    print str.join("", rp.mapping.DNA.reverse_map(i, align_count)), symbol