示例#1
0
 def __init__(self, objs=[], hash_type=HashType.QUADRATIC, size=10, c1=1, c2=1):
     # if the hash table uses quadratic probing, then round the size to the nearest power of 2
     if hash_type is HashType.QUADRATIC:
         from math import ceil
         from math import log
         self.size = pow(2, ceil(log(size) / log(2)))
     else:
         self.size = size
     self.item_count = 0
     # Initialize the table
     self.table = []
     # Check that all values are positive
     for obj in objs:
         if HashTable.to_value(obj) < 0:
             print("HashTable only takes positive values.")
             return
     # Chain hash tables have a list at each index
     if hash_type is HashType.CHAIN:
         for i in range(self.size):
             self.table.append([])
     # open addressing hash tables need empty_since_start initialization
     else:
         # Index is a list of index numbers that contain items; it's only needed for open addressed hash tables
         self.index = AVL()
         for i in range(self.size):
             self.table.append(EmptyType.EMPTY_SINCE_START)
     # Initialize other values
     self.hash_type = hash_type
     self.c1 = c1
     self.c2 = c2
     if objs:
         for obj in objs:
             if len(str(obj)) > 0:
                 self.insert(obj)
示例#2
0
def plot_times(filename="English.txt", start=500, stop=5500, step=500):
    """Vary n from 'start' to 'stop', incrementing by 'step'. At each
    iteration, use the create_word_list() from the 'WordList' module to
    generate a list of n randomized words from the specified file.
    
    Time (separately) how long it takes to load a LinkedList, a BST, and
    an AVL with the data set.
    
    Choose 5 random words from the data set. Time how long it takes to
    find each word in each object. Calculate the average search time for
    each object.
    
    Create one plot with two subplots. In the first subplot, plot the
    number of words in each dataset against the build time for each object.
    In the second subplot, plot the number of words against the search time
    for each object.
    
    Inputs:
        filename (str): the file to use in creating the data sets.
        start (int): the lower bound on the sample interval.
        stop (int): the upper bound on the sample interval.
        step (int): the space between points in the sample interval.
    
    Returns:
        Show the plot, but do not return any values.
    """
    interval = (stop-start)/step
    n_list = np.linspace(start,stop,interval+1)
    n_list = np.int16(n_list)
    

    word_list = create_word_list(filename)
    
    load_list = []
    load_BST = []
    load_AVL = []
    
    find_list = []
    find_BST = []
    find_AVL = []
    
    for n in n_list:
        temp_word_list = word_list[:n]
        random_word_indices = np.random.randint(0,n,size=5)
        words_to_find = []
        for x in random_word_indices:
            words_to_find.append(temp_word_list[x])

        L = LinkedList()
        B = BST()
        A = AVL()
        
        start = time()
        for word in temp_word_list:
            L.add(word)
        end = time()
        load_list.append(end-start)

        start = time()
        for word in temp_word_list:
            B.insert(word)
        end = time()
        load_BST.append(end-start)

        start = time()
        for word in temp_word_list:
            A.insert(word)
        end = time()
        load_AVL.append(end-start)
        
        start = time()
        for word in words_to_find:
            iterative_search(L, word)
        end = time()
        find_list.append(end-start)

        start = time()
        for word in words_to_find:
            B.find(word)
        end = time()
        find_BST.append(end-start)

        start = time()
        for word in words_to_find:
            A.find(word)
        end = time()
        find_AVL.append(end-start)
    
    avg_find_list = sum(find_list[:])/5.
    avg_find_BST = sum(find_BST[:])/5.
    avg_find_AVL = sum(find_AVL[:])/5.

    plt.subplot(121)
    list_plot1 = plt.plot(n_list, load_list,label='Singly-Linked List')
    BST_plot1 = plt.plot(n_list, load_BST, label='Binary Search Tree')
    AVL_plot1 = plt.plot(n_list, load_AVL, label='AVL Tree')
    plt.legend()
    plt.xlabel('Data Points')
    plt.ylabel('Seconds')
    plt.title('Build Times')

    plt.subplot(122)
    list_plot2 = plt.plot(n_list, find_list,label='Singly-Linked List')
    BST_plot2 = plt.plot(n_list, find_BST, label='Binary Search Tree')
    AVL_plot2 = plt.plot(n_list, find_AVL, label='AVL Tree')
    plt.legend()
    plt.xlabel('Data Points')
    plt.ylabel('Seconds')
    plt.title('Search Times')

    plt.show() 
示例#3
0
def plot_times(filename="English.txt", start=500, stop=5500, step=500):
    """Vary n from 'start' to 'stop', incrementing by 'step'. At each
    iteration, use the create_word_list() from the 'WordList' module to
    generate a list of n randomized words from the specified file.
    
    Time (separately) how long it takes to load a LinkedList, a BST, and
    an AVL with the data set.
    
    Choose 5 random words from the data set. Time how long it takes to
    find each word in each object. Calculate the average search time for
    each object.
    
    Create one plot with two subplots. In the first subplot, plot the
    number of words in each dataset against the build time for each object.
    In the second subplot, plot the number of words against the search time
    for each object.
    
    Inputs:
        filename (str): the file to use in creating the data sets.
        start (int): the lower bound on the sample interval.
        stop (int): the upper bound on the sample interval.
        step (int): the space between points in the sample interval.
    
    Returns:
        Show the plot, but do not return any values.
    """

    def get_average_time_linked_list(to_search, linked_list, times_left, current_time = 0):
        while times_left > 0:
            start = time.time()
            iterative_search(linked_list, to_search[times_left-1])
            end =time.time()
            current_time +=(end-start)
            times_left -=1
        return current_time/len(to_search)

    def get_average_time_BST(to_search, BST_list, times_left, current_time =0):
        while times_left >0:
            start = time.time()
            BST_list.find(to_search[times_left-1])
            end = time.time()
            current_time +=(end-start)
            times_left -= 1 
        return current_time/len(to_search)
    def get_average_time_AVL(to_search, AVL_list, times_left, current_time = 0):
        while times_left > 0:
            start = time.time()
            AVL_list.find(to_search[times_left-1])
            end = time.time()
            current_time +=(end-start)
            times_left -= 1
        return current_time/len(to_search)


    word_list = create_word_list(filename)
    if (stop-start)%step!=0:
        raise ValueError("Your steps won't get you from start to stop")
    current = start
    time_linked_list = []
    time_BST_list = []
    time_AVL_list = []

    time_linked_list_search = []
    time_BST_list_search = []
    time_AVL_list_search = []

    set_size = []

    while current < stop:
        current_linked_list = LinkedList()
        current_BST = BST()
        current_AVL = AVL()
        current_list = word_list[:current]
        to_search = np.random.permutation(current_list)
        start_linked_time = time.time()

        for x in current_list:
            current_linked_list.add(x)
        end_linked_time = time.time()

        start_BST_time = time.time()
        for y in current_list:
            current_BST.insert(y)
        end_BST_time = time.time()

        start_AVL_time = time.time()
        for z in current_list:
            current_AVL.insert(z)
        end_AVL_time = time.time()

        time_linked_list.append(end_linked_time - start_linked_time)
        time_BST_list.append(end_BST_time - start_BST_time)
        time_AVL_list.append(end_AVL_time- start_AVL_time)

        time_linked_list_search.append(get_average_time_linked_list(to_search,current_linked_list, len(to_search)))
        time_BST_list_search.append(get_average_time_BST(to_search,current_BST, len(to_search)))
        time_AVL_list_search.append(get_average_time_AVL(to_search,current_AVL, len(to_search)))

        set_size.append(current)

        current+=step
    plt.subplot(2,1,1)
    plt.title('Building Data Structures')
    plt.plot(set_size,time_linked_list, label = 'Linked List', linewidth = 3)
    plt.plot(set_size, time_BST_list, label = "BST", linewidth = 3)
    plt.plot(set_size, time_AVL_list, label = "AVL", linewidth = 3)
    plt.legend(loc = 2)

    plt.subplot(2,1,2)
    plt.title("Searching Data Structures")
    plt.plot(set_size, time_linked_list_search, label = 'Linked list', linewidth = 3)
    plt.plot(set_size, time_BST_list_search, label = 'BST', linewidth = 3)
    plt.plot(set_size, time_AVL_list_search, label = 'AVL', linewidth = 3)
    plt.legend(loc = 2)
    plt.show()