def test_dump(self): s = 'ACGTACGTAGCAT' bwt = BWT(s) t = TemporaryFile() bwt.dump(t) t.seek(0) bwt2 = BWT('', t) t.close() assert_array_equal(bwt.seq, bwt2.seq) assert_array_equal(bwt.sa, bwt2.sa) self.assertEqual(bwt.alphabet, bwt2.alphabet)
#Parse some arguments parser = argparse.ArgumentParser() parser.add_argument('command', help="Command [index, search]") parser.add_argument('files', metavar='file', nargs='+', help='files to index/search (FASTA)') parser.add_argument('--fastq', help='fastq file containing reads to map') parser.add_argument('--fasta', help='fasta file containing reads to map') parser.add_argument('--ed', type=int, help='maximum edit distance to search for') args = parser.parse_args() print args #Index goes through all the input files and creats a FM-index and dumps it to a JSON file if args.command == 'index': for fasta in args.files: bwt = BWT(SeqIO.read(fasta, 'fasta').seq.tostring()) with open(fasta + '.index', 'w') as out: bwt.dump(out) del bwt #For the meat: elif args.command == 'search': #Pull all of our genomes into memory indices = {} #Try to grab existing indices, otherwise create them for fasta in args.files: try: with open(fasta + '.index') as f: indices[fasta] = BWT('', f) except IOError: indices[fasta] = BWT(SeqIO.read(fasta, 'fasta').seq.tostring()) #Check if reads are in fastq or fasta format, and in a file or from stdin #Parse using BioPython