Exemplo n.º 1
0
def construct_tree_inline(root_table, key_list):
    (i, j) = 0, len(root_table) - 1
    root_index = root_table[i][j]
    root = BSTTree(key_list[root_index])
    node_stack = []
    if (root_index + 1 <= j):
        node_stack.append((root_index + 1, j, root))
    if i <= (root_index - 1):
        node_stack.append((i, root_index - 1, root))

    while node_stack:
        (i, j, parent) = node_stack.pop()
        next_root = root_table[i][j]
        node = BSTTree(key_list[next_root])
        if node.value < parent.value:
            parent.left = node
        else:
            parent.right = node

        if (next_root + 1 <= j):
            node_stack.append((next_root + 1, j, node))
        if i <= (next_root - 1):
            node_stack.append((i, next_root - 1, node))

    return root
Exemplo n.º 2
0
def find_optimal_tree_ordering(beta_list, alpha_list, beta_length, key_list):
    """
    Returns binary search tree ordered according to Rule 1 
    with given key probabilities
    (beta_list) and gap probabilities (alpha_list)
    """
    root = None

    # holds subdivisions of beta value array
    section_queue = Queue()

    section_queue.put((0, beta_length - 1, root))

    # Iteratively find the best root for each subtree
    while not section_queue.empty():
        (left_index, right_index, parent) = section_queue.get()

        if (left_index < 0 or right_index >= beta_length
                or left_index > right_index):
            continue

        if left_index == right_index:
            node = BSTTree(key_list[left_index])
            if parent is None:
                root = node
            elif left_index < parent.value:

                parent.left = node
            else:
                parent.right = node
            continue

        max_prob = 0
        best_splits = [left_index]
        for i in range(left_index, right_index + 1):
            if beta_list[i] > max_prob:
                max_prob = beta_list[i]
                best_splits = [i]
            if beta_list[i] == max_prob:
                best_splits.append(i)

        best_split = best_splits[len(best_splits) / 2]

        node = BSTTree(key_list[best_split])
        if parent is None:
            root = node
        elif key_list[best_split] < parent.value:
            parent.left = node
        else:
            parent.right = node

        section_queue.put((left_index, best_split - 1, node))
        section_queue.put((best_split + 1, right_index, node))

    return root
Exemplo n.º 3
0
def test():
    """
    Test function to build trees based on corpus specified
    """

    if len(sys.argv) < 2:
        print "Usage: python benchmarks.py corpus.txt num_words test_type"
        exit(1)

    corpusfile = sys.argv[1]
    num_words = 10000
    small_words = 10000
    big_words = 160000
    num_searches = 100000
    test_type = "small"
    if len(sys.argv) != 4:
        print "Usage: python benchmarks.py corpus.txt num_words test_type"
        exit(1)
    else:
        num_words = int(sys.argv[2])
        test_type = sys.argv[3]

    even_dist_keys = 3000

    random_repeats = 3

    # Convert text document of English words into Python list of strings/words
    corpus = [word for line in open(corpusfile, 'r') for word in line.split()]

    standard_corp = corpus[:num_words]

    # define number of corpus repeats
    corpus_repeats = 2

    #to fill in
    datasets = []

    if test_type == "small":
        #small dataset
        datasets.append(("small dataset", generate_probs(corpus[:small_words]),
                         corpus[:small_words]))
    elif test_type == "medium":
        #medium dataset
        datasets.append(
            ("medium dataset", generate_probs(standard_corp), standard_corp))
    elif test_type == "large":
        #large dataset
        datasets.append(("large dataset", generate_probs(corpus[:big_words]),
                         corpus[:big_words]))
    elif test_type == "leaf":
        #high leaf probabilties
        datasets.append(
            ("high leaf dataset", generate_probs_high_leaf(standard_corp),
             standard_corp))
    elif test_type == "key":
        #high key probabilties
        datasets.append(
            ("high key ds",
             generate_probs_high_key(standard_corp[:num_words / 2]),
             standard_corp[:num_words / 2]))
    elif test_type == "uniform":
        #uniform dataset
        datasets.append(("uniform ds", generate_probs_uniform(even_dist_keys),
                         [i for i in range(even_dist_keys)]))

    for (name, (alphas, betas, beta_values), corpora) in datasets:
        print "========================================"
        print "running", name
        print len(beta_values)
        insert_indices = [i for i in range(len(beta_values))]
        shuffle(insert_indices)

        searches = corpora

        #MEHLHORN
        print
        print "MEHLHORN KNUTH"
        start = time.time()
        nlogntree = Nlogn_build(betas, alphas, len(betas), beta_values,
                                min(betas) / 2)
        end = time.time()
        cons_time = end - start

        depths = []
        start = time.time()
        for x in range(corpus_repeats):
            for k in searches:
                depths.append(nlogntree.find(k)[1])
        end = time.time()
        print "BUILD TIME:", cons_time, "AVG SEARCH TIME:", (end - start) / (
            len(searches) * corpus_repeats), "AVG DEPTH:", float(
                sum(depths)) / len(depths)

        #KNUTH OPTION 1
        print
        print "KNUTH ROOT METHOD"
        start = time.time()
        root_tree = Knuth_Rule1(betas, alphas, len(betas), beta_values)
        end = time.time()
        cons_time = end - start

        depths = []
        start = time.time()
        for x in range(corpus_repeats):
            for k in searches:
                depths.append(root_tree.find(k)[1])
        end = time.time()
        print "BUILD TIME:", cons_time, "AVG SEARCH TIME:", (end - start) / (
            len(searches) * corpus_repeats), "AVG DEPTH:", float(
                sum(depths)) / len(depths)

        #AVL
        print
        print "AVL"
        avg_build_time = 0.0
        avg_depth = 0.0
        for x in range(random_repeats):
            start = time.time()
            avl_tree = AVLTree(None)
            for v in insert_indices:
                avl_tree = avl_tree.insert(beta_values[v])
            end = time.time()
            cons_time = end - start

            depths = []
            start = time.time()
            for x in range(corpus_repeats):
                for k in searches:
                    depths.append(avl_tree.find(k)[1])
            end = time.time()
            avg_build_time += cons_time
            avg_depth += float(sum(depths)) / len(depths)
        print "BUILD TIME:", avg_build_time / random_repeats, "AVG SEARCH TIME:", (
            end - start) / (len(searches) * corpus_repeats
                            ), "AVG DEPTH:", avg_depth / random_repeats

        #NAIVE BST
        print
        print "NAIVE BST"
        avg_build_time = 0.0
        avg_depth = 0.0
        for x in range(random_repeats):
            start = time.time()
            bst_tree = BSTTree(None)
            for v in insert_indices:
                bst_tree.insert(beta_values[v])
            end = time.time()
            cons_time = end - start

            depths = []
            start = time.time()
            for x in range(corpus_repeats):
                for k in searches:
                    depths.append(bst_tree.find(k)[1])
            end = time.time()
            avg_build_time += cons_time
            avg_depth += float(sum(depths)) / len(depths)
        print "BUILD TIME:", avg_build_time / random_repeats, "AVG SEARCH TIME:", (
            end - start) / (len(searches) * corpus_repeats
                            ), "AVG DEPTH:", avg_depth / random_repeats

        #KNUTH
        print
        print "OPTIMAL KNUTH"
        start = time.time()
        (exp, root) = Knuth_find(betas, alphas, len(betas))
        Knuth_tree = Knuth_build(root, beta_values)
        end = time.time()
        cons_time = end - start

        depths = []
        start = time.time()
        for x in range(corpus_repeats):
            for k in searches:
                depths.append(Knuth_tree.find(k)[1])
        end = time.time()
        print "BUILD TIME:", cons_time, "AVG SEARCH TIME:", (end - start) / (
            len(searches) * corpus_repeats), "AVG DEPTH:", float(
                sum(depths)) / len(depths)

        print "\nMemory:", float(
            resource.getrusage(
                resource.RUSAGE_SELF).ru_maxrss) / 1000000, "megaytes used"
Exemplo n.º 4
0
 def __init__(self, value):
     BSTTree.__init__(self, value)
     self.balance = 0
     self.parent = None
Exemplo n.º 5
0
# bsttests.py
# COMP 150
# Created by Alex King
# 3/31/2017

# Testing the functionality, correctness, and general runtime of BSTTree

from BSTTree import BSTTree
from random import randint
import time
import sys

def print_value(x):
    sys.stdout.write(str(x) + "\n")

myTree = BSTTree(None)
values = []
for i in range(30):
    random = randint(0, 500)
    values.append(random)
    myTree.insert(random)

print myTree


# Ensure insertion works as expected
for v in values:
    if not myTree.find(v):
        print "Inserted value %d not found in tree!" % v
        exit(1)
def find_optimal_tree_ordering(beta_list, alpha_list, beta_length, key_list,
                               EPSILON):
    """
    Returns nearly optimal binary search tree given key probabilities
    (beta_list) and gap probabilities (alpha_list)
    """
    root = None

    # holds subdivisions of beta value array
    section_queue = Queue()

    # triple contains beginning and ending indices of array and probability sum
    # in updated version, we also put the parent node
    section_queue.put((0, beta_length - 1, 1, root))

    # Iteratively find the best root for each subtree
    while not section_queue.empty():

        # find best split
        (left_index, right_index, prob_sum, parent) = section_queue.get()

        if (left_index < 0 or right_index >= beta_length
                or left_index > right_index):
            continue

        if left_index == right_index:
            node = BSTTree(key_list[left_index])

            if parent is None:
                root = node
            elif key_list[left_index] < parent.value:
                parent.left = node
            else:
                parent.right = node
            continue

        left_prob_sum = alpha_list[left_index]
        right_prob_sum = alpha_list[right_index + 1]

        left_last_diff = abs(left_prob_sum - (prob_sum - left_prob_sum -
                                              beta_list[left_index]))

        right_last_diff = abs(right_prob_sum - (prob_sum - right_prob_sum -
                                                beta_list[right_index]))

        for i in xrange(1, (right_index - left_index + 1)):

            # Move lefthand pointer inwards and calculate new split
            left_prob_sum += beta_list[left_index + i -
                                       1] + alpha_list[left_index + i]

            new_diff = abs(left_prob_sum - (prob_sum - left_prob_sum -
                                            beta_list[left_index + i]))

            if (new_diff < left_last_diff) and (abs(new_diff - left_last_diff)
                                                > EPSILON):

                left_last_diff = new_diff

            else:
                best_split = left_index + i - 1
                node = BSTTree(key_list[best_split])
                if parent is None:
                    root = node
                elif key_list[best_split] < parent.value:
                    parent.left = node
                else:
                    parent.right = node

                prev_left_prob_sum = left_prob_sum - beta_list[
                    left_index + i - 1] - alpha_list[left_index + i]

                section_queue.put(
                    (left_index, best_split - 1, prev_left_prob_sum, node))
                section_queue.put(
                    (best_split + 1, right_index,
                     (prob_sum - prev_left_prob_sum - beta_list[best_split]),
                     node))

                break

            # Move righthand pointer inwards and calculate new split
            right_prob_sum += (beta_list[right_index + 1 - i] +
                               alpha_list[right_index + 1 - i])

            new_diff = abs(right_prob_sum - (prob_sum - right_prob_sum -
                                             beta_list[right_index - i]))

            if new_diff < right_last_diff and abs(new_diff -
                                                  right_last_diff) > EPSILON:
                right_last_diff = new_diff
            else:

                best_split = right_index - i + 1
                node = BSTTree(key_list[best_split])
                if parent is None:
                    root = node
                elif key_list[best_split] < parent.value:
                    parent.left = node
                else:
                    parent.right = node

                prev_right_prob_sum = right_prob_sum - beta_list[
                    right_index + 1 - i] - alpha_list[right_index + 1 - i]

                section_queue.put(
                    (best_split + 1, right_index, prev_right_prob_sum, node))
                section_queue.put(
                    (left_index, best_split - 1,
                     (prob_sum - prev_right_prob_sum - beta_list[best_split]),
                     node))

                break

    return root