示例#1
0
 def test_dump(self):
     s = 'ACGTACGTAGCAT'
     bwt = BWT(s)
     t = TemporaryFile()
     bwt.dump(t)
     t.seek(0)
     bwt2 = BWT('', t)
     t.close()
     assert_array_equal(bwt.seq, bwt2.seq)
     assert_array_equal(bwt.sa, bwt2.sa)
     self.assertEqual(bwt.alphabet, bwt2.alphabet)
示例#2
0
    #Parse some arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('command', help="Command [index, search]")
    parser.add_argument('files', metavar='file', nargs='+', help='files to index/search (FASTA)')
    parser.add_argument('--fastq', help='fastq file containing reads to map')
    parser.add_argument('--fasta', help='fasta file containing reads to map')
    parser.add_argument('--ed', type=int, help='maximum edit distance to search for')
    args = parser.parse_args()
    print args

    #Index goes through all the input files and creats a FM-index and dumps it to a JSON file
    if args.command == 'index':
        for fasta in args.files:
            bwt = BWT(SeqIO.read(fasta, 'fasta').seq.tostring())
            with open(fasta + '.index', 'w') as out:
                bwt.dump(out)
            del bwt

    #For the meat:
    elif args.command == 'search':
        #Pull all of our genomes into memory
        indices = {}
        #Try to grab existing indices, otherwise create them
        for fasta in args.files:
            try:
                with open(fasta + '.index') as f:
                    indices[fasta] = BWT('', f)
            except IOError:
                indices[fasta] = BWT(SeqIO.read(fasta, 'fasta').seq.tostring())
            #Check if reads are in fastq or fasta format, and in a file or from stdin
        #Parse using BioPython