Exemplo n.º 1
0
 def test_chunk_sizes(self):
     text = "AAACCCAAA"
     chunk_size = 'm'
     pattern = "CC"
     self.assertTrue((
         fftmatch.fft_match_index_n_log_m(text, pattern, chunk_size) == \
         np.array(boyermoore.boyer_moore_match_index(text,pattern))).all())
Exemplo n.º 2
0
 def test_chunk_sizes(self):
     text = "AAACCCAAA"
     chunk_size = 'm'
     pattern = "CC"
     self.assertTrue((
         fftmatch.fft_match_index_n_log_m(text, pattern, chunk_size) == \
         np.array(boyermoore.boyer_moore_match_index(text,pattern))).all())
Exemplo n.º 3
0
    def test_long_stream(self, func):
        np.random.seed(67+2)
        text = ''.join(np.random.choice(list('AGCT'), size=100000))
        pattern = ''.join(np.random.choice(list('AGTC'), size=3))

        self.assertTrue((func(text=text, pattern=pattern) == \
             np.array(boyermoore.boyer_moore_match_index(text=text, \
             pattern=pattern))).all(),
            msg=format_error_message(func))
Exemplo n.º 4
0
    def test_long_stream(self, func):
        np.random.seed(67 + 2)
        text = ''.join(np.random.choice(list('AGCT'), size=100000))
        pattern = ''.join(np.random.choice(list('AGTC'), size=3))

        self.assertTrue((func(text=text, pattern=pattern) == \
             np.array(boyermoore.boyer_moore_match_index(text=text, \
             pattern=pattern))).all(),
            msg=format_error_message(func))
Exemplo n.º 5
0
    def test_long_stream(self, func):
        #if func == fftmatch.fft_match_index_n_sq_log_n_naive:
        #    return #You shall not pass
        np.random.seed(67+2)
        texts = np.random.choice(list('AGCT'), size=(10000,31)).tolist()
        texts = [''.join(_list) for _list in texts]
        pattern = ''.join(np.random.choice(list('AGTC'), size=3))

        expected_output = [boyermoore.boyer_moore_match_index(text=text,\
                            pattern=pattern) for text in texts]
        expected_output = np.array(expected_output)

        out = func(texts=texts, pattern=pattern)

        self.assertTrue(ndarrays_equal(out, expected_output),
                        msg=format_error_message(func))
Exemplo n.º 6
0
    def test_long_stream(self, func):
        #if func == fftmatch.fft_match_index_n_sq_log_n_naive:
        #    return #You shall not pass
        np.random.seed(67 + 2)
        texts = np.random.choice(list('AGCT'), size=(10000, 31)).tolist()
        texts = [''.join(_list) for _list in texts]
        pattern = ''.join(np.random.choice(list('AGTC'), size=3))

        expected_output = [boyermoore.boyer_moore_match_index(text=text,\
                            pattern=pattern) for text in texts]
        expected_output = np.array(expected_output)

        out = func(texts=texts, pattern=pattern)

        self.assertTrue(ndarrays_equal(out, expected_output),
                        msg=format_error_message(func))
Exemplo n.º 7
0
    title = title + str(count[title])
    genomes[title] = genome

sorted_genomes = collections.OrderedDict(sorted(genomes.items(),
                                      key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0], args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()
Exemplo n.º 8
0
sorted_genomes = collections.OrderedDict(
    sorted(genomes.items(), key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],
                                                  args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0],
                                           args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()