Exemplo n.º 1
0
def main():
    parser = optparse.OptionParser(usage="usage: %prog [options] <file>",
                                   description=__doc__)
    parser.add_option("-r", "--reverse", action="store_true",
                      dest="reverse",
                      help="reverse the sequences, instead of shuffling")
    parser.add_option("-m", "--markov", type="int", dest="markov_length",
                      help="generate Markov sequences, with memory of length"
                      " N, instead of shuffling", default=None, metavar="N")
    parser.add_option("-s", "--seed", type="int", dest="seed",
                      help="seed for randomness [default 0]",
                      default=0, metavar="N")
    parser.add_option("-n", "--no-original", action="store_true",
                      dest="no_original",
                      help="don't output original sequences")
    parser.add_option("-v", "--verbose", action="store_true",
                      dest="verbose", help="be verbose")
    DEFAULT_WRAP = 80
    parser.add_option("-w", "--wrap", dest="wrap", type="int",
                      default=DEFAULT_WRAP,
                      help="wrap sequence to specified width"
                      " [default %s, 0 means don't wrap at all]" % DEFAULT_WRAP,
                      metavar="COLUMNS")
    parser.add_option("--copyright", action="store_true", dest="copyright",
                      help="print copyright and exit")
    (options, args) = parser.parse_args()

    if (len(args) != 1
        or options.markov_length != None and options.reverse
        or options.markov_length != None and options.markov_length < 0):
        parser.print_help()
        sys.exit(1)

    if options.markov_length != None:
        decoy_maker = markov_decoy_maker(options.seed, options.markov_length,
                                         args[0])
    elif options.reverse:
        decoy_maker = reverse_decoy_maker()
    else:
        decoy_maker = shuffle_decoy_maker(options.seed)

    # locus id -> (defline, hash of sequence)
    seen = {}

    # real and decoy 6-mers seen
    sixmers = (set(), set())

    for locusname, defline, sequence, filename \
            in greylag.read_fasta_files([args[0]]):
        write_locus(options, decoy_maker, seen, sixmers,
                    locusname, defline, sequence)

    common_sixmers = sixmers[0] & sixmers[1]
    print >> sys.stderr, ("six-mers: %s real %s decoy %s both"
                          % (len(sixmers[0]) - len(common_sixmers),
                             len(sixmers[1]) - len(common_sixmers),
                             len(common_sixmers)))
Exemplo n.º 2
0
    def __init__(self, random_seed, length, original_sequence_file):
        random.seed(random_seed)
        self.length = length

        # for order 0 through self.length:
        # [ length-mer -> subsequent residues -> count, ... ]
        self.transition = [ defaultdict(lambda: defaultdict(int))
                            for i in range(length+1) ]
        for locusname, defline, sequence, filename \
                in greylag.read_fasta_files([original_sequence_file]):
            for order in range(length+1):
                seq = '-' * order + sequence
                for i in xrange(len(sequence)):
                    self.transition[order][seq[i:i+order]][seq[i+order]] += 1