def main(): usage = "%prog [options] <in-file>" parser = OptionParser(usage=usage) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS) parser.add_option("--ids", dest="ids", action="store_true", help="don't output any files - just print out a list of the ids of the sequences in each partition") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) part_pattern = "%s.part%%d" % filename heldout_pattern = "%s.heldout_part%%d" % filename # Divide the data up into partitions, with their complements parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions)) # Save each partition and its complement for i,(part,heldout) in enumerate(parts): if options.ids: # Just print out a list of the ids in the partition print " ".join(["%d" % s.id for s in part]) else: save_sequences(part_pattern % i, part) save_sequences(heldout_pattern % i, heldout) print >>sys.stderr, "Wrote partition %d to %s and %s" % (i,part_pattern % i,heldout_pattern % i)
def main(): usage = "%prog [options] <in-file>" parser = OptionParser(usage=usage) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS) parser.add_option( "--ids", dest="ids", action="store_true", help= "don't output any files - just print out a list of the ids of the sequences in each partition" ) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) part_pattern = "%s.part%%d" % filename heldout_pattern = "%s.heldout_part%%d" % filename # Divide the data up into partitions, with their complements parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions)) # Save each partition and its complement for i, (part, heldout) in enumerate(parts): if options.ids: # Just print out a list of the ids in the partition print " ".join(["%d" % s.id for s in part]) else: save_sequences(part_pattern % i, part) save_sequences(heldout_pattern % i, heldout) print >> sys.stderr, "Wrote partition %d to %s and %s" % ( i, part_pattern % i, heldout_pattern % i)
def main(): usage = "%prog [options] <in-file>" description = "Filter a sequence data file to remove any sequences "\ "that are not fully annotated and write the result back to the file." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) in_filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) sequences = [seq for seq in seqs.sequences if seq.fully_annotated] save_sequences(in_filename, sequences) print >>sys.stderr, "Removed %d sequences" % (len(seqs.sequences)-len(sequences))
def main(): usage = "%prog [options] <in-file>" description = "Filter a sequence data file to remove any sequences "\ "that are not fully annotated and write the result back to the file." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an input data file" sys.exit(1) in_filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) sequences = [seq for seq in seqs.sequences if seq.fully_annotated] save_sequences(in_filename, sequences) print >> sys.stderr, "Removed %d sequences" % (len(seqs.sequences) - len(sequences))