def nlogm_chunk_analysis(genomes, chunk_max, total_length):
    # analysis dictionary holds all data about the algorithms
    analysis = {'substring_length':len(args.pattern), 'substring': args.pattern,
                'text_length':total_length}

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogm_data = {'name': 'nlogm'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    for i in range(3,chunk_max,3):
        nlogm_matches = []
        with Timer() as t:
            for g in genomes:
                nlogm_matches.append(fft.fft_match_index_n_log_m(g, args.pattern, chunk_size=i))
        nlogm_data['time'] = t.msecs
        nlogm_data['chunk_size'] = i

        accuracy = 0
        for i in range(len(nlogm_matches)):
            i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        nlogm_data['accuracy'] = accuracy / len(bm_matches)

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(nlogm_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
示例#2
0
 def test_chunk_sizes(self):
     text = "AAACCCAAA"
     chunk_size = 'm'
     pattern = "CC"
     self.assertTrue((
         fftmatch.fft_match_index_n_log_m(text, pattern, chunk_size) == \
         np.array(boyermoore.boyer_moore_match_index(text,pattern))).all())
示例#3
0
 def test_chunk_sizes(self):
     text = "AAACCCAAA"
     chunk_size = 'm'
     pattern = "CC"
     self.assertTrue((
         fftmatch.fft_match_index_n_log_m(text, pattern, chunk_size) == \
         np.array(boyermoore.boyer_moore_match_index(text,pattern))).all())
def time_analysis(genomes, total_length, chunk_size='m'):
    # analysis dictionary holds all data about the algorithms
    analysis = {'substring_length':len(args.pattern), 'substring': args.pattern,
                    'text_length':total_length}

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    nlogm_data = {'name': 'nlogm'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    with Timer() as t:
        nlogn_matches = fft.fft_match_index_n_sq_log_n(genomes, args.pattern)
    nlogn_data['time'] = t.msecs
    accuracy = 0

    for i in range(len(nlogn_matches)):
        i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogn_data['accuracy'] = accuracy / len(bm_matches)

    nlogm_matches = []
    total_length = 0
    with Timer() as t:
        for g in genomes:
            total_length += len(g)
            nlogm_matches.append(fft.fft_match_index_n_log_m(g, args.pattern, chunk_size))
    nlogm_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogm_data['accuracy'] = accuracy / len(bm_matches)

    with Timer() as t:
        cvmatch.cv_match_index(genomes, args.pattern)
    opencv_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    opencv_data['accuracy'] = accuracy / len(bm_matches)

    algorithms = []
    algorithms.append(boyermoore_data)
    algorithms.append(nlogn_data)
    algorithms.append(nlogm_data)
    algorithms.append(opencv_data)

    analysis['algorithms'] = algorithms
    # make pretty json format
    print json.dumps(analysis)
def nlogm_chunk_analysis(genomes, chunk_max, total_length):
    # analysis dictionary holds all data about the algorithms
    analysis = {
        'substring_length': len(args.pattern),
        'substring': args.pattern,
        'text_length': total_length
    }

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogm_data = {'name': 'nlogm'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    for i in range(3, chunk_max, 3):
        nlogm_matches = []
        with Timer() as t:
            for g in genomes:
                nlogm_matches.append(
                    fft.fft_match_index_n_log_m(g, args.pattern, chunk_size=i))
        nlogm_data['time'] = t.msecs
        nlogm_data['chunk_size'] = i

        accuracy = 0
        for i in range(len(nlogm_matches)):
            i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        nlogm_data['accuracy'] = accuracy / len(bm_matches)

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(nlogm_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
示例#6
0
    title = title + str(count[title])
    genomes[title] = genome

sorted_genomes = collections.OrderedDict(sorted(genomes.items(),
                                      key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0], args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()
def time_analysis(genomes, total_length, chunk_size='m'):
    # analysis dictionary holds all data about the algorithms
    analysis = {
        'substring_length': len(args.pattern),
        'substring': args.pattern,
        'text_length': total_length
    }

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    nlogm_data = {'name': 'nlogm'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    with Timer() as t:
        nlogn_matches = fft.fft_match_index_n_sq_log_n(genomes, args.pattern)
    nlogn_data['time'] = t.msecs
    accuracy = 0

    for i in range(len(nlogn_matches)):
        i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogn_data['accuracy'] = accuracy / len(bm_matches)

    nlogm_matches = []
    total_length = 0
    with Timer() as t:
        for g in genomes:
            total_length += len(g)
            nlogm_matches.append(
                fft.fft_match_index_n_log_m(g, args.pattern, chunk_size))
    nlogm_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogm_data['accuracy'] = accuracy / len(bm_matches)

    with Timer() as t:
        cvmatch.cv_match_index(genomes, args.pattern)
    opencv_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    opencv_data['accuracy'] = accuracy / len(bm_matches)

    algorithms = []
    algorithms.append(boyermoore_data)
    algorithms.append(nlogn_data)
    algorithms.append(nlogm_data)
    algorithms.append(opencv_data)

    analysis['algorithms'] = algorithms
    # make pretty json format
    print json.dumps(analysis)
示例#8
0
sorted_genomes = collections.OrderedDict(
    sorted(genomes.items(), key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],
                                                  args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0],
                                           args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()