示例#1
0
def initiate_case(batchSize):
    reference = decoder("data/KR233687.fasta")
    sequence = decoder("data/ERR1293055_first100.fastq")
    refKmer = kmer_maker(13, reference, True)
    seqKmer = kmer_maker(13, sequence, False)
    reference_trie = Trie()
    sternum = mapper(refKmer, seqKmer, reference_trie, batchSize)
    sternum.filter_matching()
    return sternum
示例#2
0
def run():
    reference = decoder(args.reference)
    sequence = decoder(args.sequence)
    if int(args.method) == 3:
        spine = BWT(reference)
        refKmer = reference
    else:
        refKmer = kmer_maker(int(args.ksize), reference, True)
    seqKmer = kmer_maker(int(args.ksize), sequence, False)
    if int(args.method) == 1:  # mapping through Suffix Trie
        spine = Trie()
    elif int(args.method) == 2:  # mapping through Suffix Array
        spine = SA(reference)
    sternum = mapper(refKmer, seqKmer, spine, int(args.batchSize))
    sternum.filter_matching(int(args.minKcount), int(args.minPercentage))
    reporter(sternum, args.outputPrefix + "_" + str(args.method) + "_")
示例#3
0
 def test_zclear(self):
     """
     Test deleting kmers' files from disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     kmer.clear()
     files = glob.glob(kmer.filePrefix + "_*" + kmer.fileExten)
     self.assertEqual(dict(), kmer.kmers)
     self.assertEqual(files, [])
示例#4
0
 def test_splice_fasta_overlapping(self):
     """
     Test splicing fasta overlapping kmers
     """
     fileName = "data/KR233687.fasta"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, True)
     self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TAATGGTGGCATA", 579], kmer.kmers["KR233687.2.1"])
     self.assertIn(["ATTCAGTTGATAG", 1], kmer.kmers["KR233687.2.2"])
     self.assertIn(["ATGGTCATCAATT", 1123], kmer.kmers["KR233687.2.2"])
     self.assertEqual(2, kmer.seqCount)
示例#5
0
 def test_dump(self):
     """
     Test storing kmers to disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     file = open("_39_ERR1293055.40.kmers")
     lines = file.read()
     self.assertIn("GTTGGGATCAATA", lines)
     file.close()
示例#6
0
 def test_splice_fastq_Nonoverlapping(self):
     """
     Test splicing fastq nonoverlapping kmers
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     self.assertIn(["CTCTTCTACTTCT", 0], kmer.kmers["ERR1293055.1"])
     self.assertIn(["GTTGGGATCAATA", 0], kmer.kmers["ERR1293055.40"])
     self.assertIn(["ATTCAAATGTTCC", 286], kmer.kmers["ERR1293055.100"])
     self.assertNotIn("TCCACTTCACTTT", kmer.kmers["ERR1293055.90"])
     self.assertEqual(100, kmer.seqCount)
示例#7
0
 def test_splice_fasta_Nonoverlapping(self):
     """
     Test splicing fasta nonoverlapping kmers
     """
     fileName = "data/KR233687.fasta"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     self.assertIn(["GAGATCTAATGTC", 0], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TCAATCCCGCACT", 13], kmer.kmers["KR233687.2.1"])
     self.assertIn(["TTCGGATGGTCAT", 1118], kmer.kmers["KR233687.2.2"])
     self.assertNotIn(["AGATCTAATGTCT", 1], kmer.kmers["KR233687.2.1"])
     self.assertEqual(2, kmer.seqCount)
示例#8
0
 def test_load(self):
     """
     Test storing kmers to disk
     """
     fileName = "data/ERR1293055_first100.fastq"
     fastaFile = decoder(fileName)
     kmer = kmer_maker(13, fastaFile, False)
     kmer.dump()
     kmer.load("", 3)
     self.assertIn("ERR1293055.3", kmer.kmers)
     kmer.dump()
     kmer.load("", 4)
     self.assertIn("ERR1293055.7", kmer.kmers)
     kmer.load("", -1)
     self.assertIn("ERR1293055.100", kmer.kmers)