def run( pos_file, neg_file, out_dir, format, align_count, mapping ): # Open merit output merit_out = open( os.path.join( out_dir, 'merits.txt' ), 'w' ) # Read integer sequences pos_strings = list( io.get_reader( pos_file, format, None ) ) neg_strings = list( io.get_reader( neg_file, format, None ) ) symbol_count = mapping.get_out_size() # Collapse while symbol_count > stop_size: print >> sys.stderr, "Collapsing from:", symbol_count best_mapping = None best_merit = 0 pb = ProgressBar( 0, symbol_count * ( symbol_count - 1 ) / 2, 78 ) count = 0 pairs = all_pairs( symbol_count ) for i, j in pairs: collapsed = mapping.collapse( i, j ) merit = calc_merit( pos_strings, neg_strings, collapsed ) if merit > best_merit: best_merit = merit best_mapping = collapsed count += 1 pb.update_and_print( count, sys.stderr ) mapping = best_mapping symbol_count -= 1 # Append merit to merit output print >>merit_out, symbol_count, best_merit print >>sys.stderr, "\nBest Merit %d." % best_merit, # Write best mapping to a file mapping_out = open( os.path.join( out_dir, "%03d.mapping" % symbol_count ), 'w' ) for i, symbol in enumerate( mapping.get_table() ): print >>mapping_out, str.join( '', rp.mapping.DNA.reverse_map( i, align_count ) ), symbol mapping_out.close()
def run( pos_file, neg_file, out_dir, format, align_count, mapping ): # Open merit output merit_out = open( os.path.join( out_dir, 'merits.txt' ), 'w' ) # Read integer sequences pos_strings = list( rp.io.get_reader( pos_file, format, None ) ) neg_strings = list( rp.io.get_reader( neg_file, format, None ) ) symbol_count = mapping.get_out_size() # Collapse while symbol_count > stop_size: # Sync nodes on each pass, may not be required pypar.barrier() if node_id == 0: print "Collapsing from:", symbol_count pairs = all_pairs( symbol_count ) # Decide which subrange of all pairs this node will handle lo, hi = pypar.balance( len( pairs ), nodes, node_id ) # Find best collapsed mapping in interval best_i, best_j, best_merit = None, None, 0 for i, j in pairs[lo:hi]: merit = calc_merit( pos_strings, neg_strings, mapping.collapse( i, j ) ) if merit > best_merit: best_i, best_j, best_merit = i, j, merit # Aggregate results if node_id != 0: # Send best i, j, merit to the master pypar.send( ( best_i, best_j, merit ), 0 ) else: # I am the master, get results from all other nodes and determine # which had the best merit for other_node_id in range( 1, nodes ): i, j, merit = pypar.receive( other_node_id ) if merit > best_merit: best_i, best_j, best_merit = i, j, merit # Collapse the two symbols that resulted in the best merit mapping = mapping.collapse( best_i, best_j ) symbol_count -= 1 # Ensure only the master writes files if node_id == 0: # Append merit to merit output print >>merit_out, symbol_count, best_merit print "\nBest Merit %d." % best_merit, # Write best mapping to a file mapping_out = open( os.path.join( out_dir, "%03d.mapping" % symbol_count ), 'w' ) for i, symbol in enumerate( mapping.get_table() ): print >>mapping_out, str.join( '', rp.mapping.DNA.reverse_map( i, align_count ) ), symbol mapping_out.close()
#!/usr/bin/env python2.3 """ Make an identity mapping for some number of species (each column mapped to unique symbol) """ import sys import rp.mapping align_count = int(sys.argv[1]) mapping = rp.mapping.identity_mapping(rp.mapping.DNA.get_out_size() ** align_count) for i, symbol in enumerate(mapping.get_table()): print str.join("", rp.mapping.DNA.reverse_map(i, align_count)), symbol