def test_binary_tree_search_noright_or_left_node(self): root = None root = binary_tree.insert(root, key=10, value=10) search = binary_tree.search(root, 11) self.assertEqual(search, None) search = binary_tree.search(root, 9) self.assertEqual(search, None)
def test_binary_search(self): """ An unit test to test the searching method fora binary tree. """ r0 = bt.search(self.root, 6) r1 = bt.search(self.root, 2) r2 = bt.search(self.root, 8) r3 = bt.search(self.root, 7.5) self.assertEqual(r0, 'Root') self.assertEqual(r1, 'E') self.assertEqual(r2, 'B') self.assertEqual(r3, None)
def test_binary_tree_search(self): ''' Unit test for search function of binary tree ''' tree = None tree = bt.insert(tree, 5, 'val1') tree = bt.insert(tree, 6, 'val2') tree = bt.insert(tree, 4, 'val3') self.assertEqual(bt.search(tree, 5), 'val1') self.assertEqual(bt.search(tree, 6), 'val2') self.assertEqual(bt.search(tree, 4), 'val3') self.assertEqual(bt.search(tree, 0), None)
def test_search(self): self.tree_root = None self.tree_root = binary_tree.insert(self.tree_root, 'Cindy', 1) self.tree_root = binary_tree.insert(self.tree_root, 'Fu', 2) self.tree_root = binary_tree.insert(self.tree_root, 'Computer', 5) self.tree_root = binary_tree.insert(self.tree_root, 'Science', 10) res1 = binary_tree.search(self.tree_root, 'Cindy') self.assertEqual(res1.value, 1) res2 = binary_tree.search(self.tree_root, 'Fu') self.assertEqual(res2.value, 2) res3 = binary_tree.search(self.tree_root, 'aldksfjlk') self.assertEqual(res3, None)
def test_correct_search(self): for i in range(100): root = None node_1_key = rdm.randint(1, 1000) node_1_value = rdm.randint(1, 1000) node_2_key = rdm.randint(1, 1000) node_2_value = rdm.randint(1, 1000) node_3_key = rdm.randint(1, 1000) node_3_value = rdm.randint(1, 1000) root = bt.insert(root, node_1_key, node_1_value) bt.insert(root, node_2_key, node_2_value) bt.insert(root, node_3_key, node_3_value) self.assertEqual(bt.search(root, node_1_key), node_1_value) self.assertEqual(bt.search(root, node_2_key), node_2_value) self.assertEqual(bt.search(root, node_3_key), node_3_value)
def test_can_search(self): root = None root = bt.insert(root, 11, 1) root = bt.insert(root, 5, 6) root = bt.insert(root, 42, 21) s = bt.search(root, 5) self.assertEqual(s.value, 6)
def test_avl_search(self): """ This functions makes sure that the search function is correctly searching the AVL for a given key and returning its value """ self.n = 10 self.avl, self.keys = self.make_avl_tree(self.n) insert(self.avl.root, 50.5, "Successful search") self.assertEqual(search(self.avl.root, 50.5), "Successful search")
def test_bt_searches(self): """test if the binary tree searchs works for a randomly chosen key/value pair """ f = open("rand.txt", "r") lines = f.readlines() result = [] for x in lines: result.append(x.rstrip().split('\t')) f.close() choice = random.choice(result) Tree = None Tree = bt.add(Tree, result[0], result[1]) search = bt.search(Tree, result[0]) self.assertEqual(search, result[1])
def test_search_in_tree(self): datatree = binary_tree.create_tree('tabsep_testdata.txt') self.assertEqual(binary_tree.search(datatree, '30068'), '22623')
def test_search_not_in_tree(self): datatree = binary_tree.create_tree('tabsep_testdata.txt') self.assertEqual(binary_tree.search(datatree, '2'), None)
""" ########## binary tree ########## """ if args.struc == 'tree': # initializing binary tree Tree = None t0 = time.time() for num in result: Tree = bt.add(Tree, num[0], value=num[1]) t1 = time.time() for i in range(0, 100): choice = random.choice(result) # choosing rand from file key = choice[0] # extracting key val = bt.search(Tree, key) t2 = time.time() print('Insertion time ' + str(t1-t0)) print('Search time ' + str(t2-t1)) """ ########## AVL tree ########## """ if args.struc == 'AVL': # initializing avl tree tree = avl.AVL() # print(tree) t3 = time.time()
def main(): args = initialize() if args.number_pairs <= 1 or args.number_pairs > 10000: print('The number of key/value pairs should be in the range of 2 to \ 10000.') sys.exit(1) if not os.path.exists(args.dataset): print('Input dataset not found.') sys.exit(1) else: f = open(args.dataset, 'r') lines = f.readlines() f.close() t_insert, t_search, t_search_non = [], [], [] # just for plotting if args.data_structure == 'hash' or args.data_structure == 'all': print('\nResults of the hash table') print('=========================') # key insertion table = hash_tables.ChainedHash(10 * int(args.number_pairs), hash_functions.h_rolling) i = 0 # number of pairs taken in / line number key_list = [] start = time.time() for l in lines: key = l.split(' ')[0] value = l.split(' ')[1] key_list.append(key) if i < args.number_pairs: table.add(key, value) i += 1 else: break end = time.time() t_insert.append(end - start) print( 'It requires %8.7f seconds to insert %s keys to the hash table.' % ((end - start), args.number_pairs)) # searching existing keys start = time.time() for key in key_list: table.search(key) end = time.time() t_search.append(end - start) print('It requires %8.7f seconds to search for all the %s keys inerted\ just now in the hash table.' % ((end - start), args.number_pairs)) # searching non-existing keys start = time.time() for key in key_list: table.search(key + '_non') end = time.time() t_search_non.append(end - start) print('It requires %8.7f seconds to search for %s non-existing keys in\ the hash table.\n' % ((end - start), args.number_pairs)) if args.data_structure == 'AVL' or args.data_structure == 'all': print('Results of the AVL tree') print('=======================') # key insertion avl_tree = avl.AVLTree() i = 0 # number of pairs taken in / line number key_list = [] start = time.time() for l in lines: key = l.split(' ')[0] value = l.split(' ')[1] key_list.append(key) if i < args.number_pairs: avl_tree.insert(key, value) i += 1 end = time.time() t_insert.append(end - start) print('It requires %8.7f seconds to insert %s keys to the AVL tree.' % ((end - start), args.number_pairs)) # searching existing keys start = time.time() for key in key_list: avl_tree.search(key) end = time.time() t_search.append(end - start) print('It requires %8.7f seconds to search for all the %s keys inerted\ just now in the AVL tree.' % ((end - start), args.number_pairs)) # searching non-existing keys start = time.time() for key in key_list: avl_tree.search(key + '_non') end = time.time() t_search_non.append(end - start) print('It requires %8.7f seconds to search for %s non-existing keys in\ the AVL tree.\n' % ((end - start), args.number_pairs)) if args.data_structure == 'tree' or args.data_structure == 'all': print('Results of the binary tree') print('==========================') # key insertion i = 0 # number of pairs taken in / line number key_list = [] start = time.time() for l in lines: key = l.split(' ')[0] value = l.split(' ')[1] key_list.append(key) if i < args.number_pairs: if i == 0: root = bt.Node(key, value) i += 1 else: bt.insert(root, key, value) i += 1 else: break end = time.time() t_insert.append(end - start) print( 'It requires %8.7f seconds to insert %s keys to the binary tree.' % ((end - start), args.number_pairs)) # searching existing keys start = time.time() for key in key_list: bt.search(root, key) end = time.time() t_search.append(end - start) print('It requires %8.7f seconds to search for all the %s keys inerted\ just now in the binary tree.' % ((end - start), args.number_pairs)) # searching non-existing keys start = time.time() for key in key_list: bt.search(root, key + '_non') end = time.time() t_search_non.append(end - start) print('It requires %8.7f seconds to search for %s non-existing keys in\ the binary tree.\n' % ((end - start), args.number_pairs)) # Plot a bar chart if "all" is selected if args.data_structure == 'all': rc( 'font', **{ 'family': 'sans-serif', 'sans-serif': ['DejaVu Sans'], 'size': 10 }) # Set the font used for MathJax - more on this later rc('mathtext', **{'default': 'regular'}) plt.rc('font', family='serif') n_groups = 3 # 3 different data structures fig, ax = plt.subplots() index = np.arange(n_groups) bar_width = 0.25 data1 = plt.bar(index, t_insert, bar_width, alpha=0.8, label='Insertion') data2 = plt.bar(index + bar_width, t_search, bar_width, alpha=0.8, label='Searching\n existing keys') data3 = plt.bar(index + 2 * bar_width, t_search_non, bar_width, alpha=0.8, label='Searching\n non-existing keys') if 'rand' in args.dataset: keyword = args.dataset.split('.')[0] + 'om' else: keyword = args.dataset.split('.')[0] plt.title('Manipulation of %s %s key-value pairs' % (args.number_pairs, keyword), weight='semibold') plt.xlabel('Data structures', weight='semibold') plt.ylabel('Time required (s)', weight='semibold') plt.xticks(index + bar_width, ('Hash table', 'AVL tree', 'Binary tree')) plt.legend() plt.tight_layout() plt.grid(True) plt.savefig('Benchmark_%s_%s.png' % (keyword, args.number_pairs)) plt.show()
print('Insert time: {} secs'.format(insert_end_time - insert_start_time)) print('Search existed key time: {} secs'.format(search_end_time - search_start_time)) print('Search not existed key time: {} secs'.format( search_not_exist_end_time - search_not_exist_start_time)) else: tree = None insert_start_time = time.time() for word in data: tree = binary_tree.insert(tree, word, None) insert_end_time = time.time() search_start_time = time.time() for word in data: res = binary_tree.search(tree, word) search_end_time = time.time() search_not_exist_start_time = time.time() for word in data: res = binary_tree.search(tree, word + '__bad') search_not_exist_end_time = time.time() print('Insert time: {} secs'.format(insert_end_time - insert_start_time)) print('Search existed key time: {} secs'.format(search_end_time - search_start_time)) print('Search not existed key time: {} secs'.format( search_not_exist_end_time - search_not_exist_start_time))
def test_binary_tree_search_rightnode(self): root = None root = binary_tree.insert(root, key=10, value=10) root = binary_tree.insert(root, key=11, value=11) search = binary_tree.search(root, 11) self.assertEqual((root.right.key, root.right.value), (11, search))
def test_binary_tree_search_first(self): root = None root = binary_tree.insert(root, key=1, value=10) search = binary_tree.search(root, 1) self.assertEqual((root.key, root.value), (1, search))
def test_binary_tree_search_leftnode(self): root = None root = binary_tree.insert(root, key=10, value=10) root = binary_tree.insert(root, key=9, value=9) search = binary_tree.search(root, 9) self.assertEqual((root.left.key, root.left.value), (9, search))
""" norm_time = np.zeros((3, len(range(0, N)))) non_time = np.zeros((3, len(range(0, N)))) insert_time = np.zeros((3, len(range(0, N)))) for i in range(0, N): # searching benchmarking choice = random.choice(result) # choosing rand from file key = choice[0] # extracting key non_choice = random.choice(non_result) # choosing rand from file non_key = non_choice[0] # extracting key t1 = time.time() for j in range(0, i): val = bt.search(BT_tree, key) t2 = time.time() for j in range(0, i): val = bt.search(BT_tree, non_key) t3 = time.time() for j in range(0, i): AVL_tree.find(choice[1]) t4 = time.time() for j in range(0, i): AVL_tree.find(non_choice[1]) t5 = time.time() for j in range(0, i): Table.search(key) t6 = time.time()
def test_binary_tree_search_None(self): root = None search = binary_tree.search(root, 1) self.assertEqual(root, search)
def main(data_struct, data_set, num_points): """ This function performs our experiment for an AVL tree, BST, or hash table depending on user input. A figure of search and insertion times is produced. Parameters: - data_struct(str): The type of data structure - data_set(str): The type of data set. Random or sorted - num_points(int): The number of data points from the data set that we wish to include in analysis Returns: - None, but a .png file is saved """ key_values = get_data(data_set, num_points) n = list(range(num_points)) t_add = [] t_search = [] t_missing = [] if data_struct == "avl": for num in n: t0 = time.perf_counter() avl_tree = avl.AVL() for k, _ in key_values: avl_tree.insert(k) t1 = time.perf_counter() t_add.append(t1-t0) t0 = time.perf_counter() for k, _ in key_values[0:int(.1*num_points)]: avl_tree.find(str(k)) t1 = time.perf_counter() t_search.append(t1-t0) t0 = time.perf_counter() for k in range(num_points, int(num_points*1.1)): avl_tree.find(str(k)) t1 = time.perf_counter() t_missing.append(t1-t0) make_plot(t_add, t_search, t_missing, "AVL performance", "avl_test.png") elif data_struct == "hash": for num in n: t0 = time.perf_counter() ht = LinearProbe(5*num_points, h_rolling) for k, v in key_values: ht.add(str(k), v) t1 = time.perf_counter() t_add.append(t1-t0) t0 = time.perf_counter() for k, _ in key_values[0:int(.1*num_points)]: ht.search(str(k)) t1 = time.perf_counter() t_search.append(t1-t0) t0 = time.perf_counter() for k in range(num_points, int(num_points*1.1)): ht.search(str(k)) t1 = time.perf_counter() t_missing.append(t1-t0) make_plot(t_add, t_search, t_missing, "Hashing performance", "hash_test.png") elif data_struct == "tree": for num in n: t0 = time.perf_counter() root_key = key_values[0][0] root_val = key_values[0][1] root = Node(root_key, root_val) for k, v in key_values[1:]: insert(root, k, v) t1 = time.perf_counter() t_add.append(t1-t0) t0 = time.perf_counter() for k, _ in key_values[0:int(.1*num_points)]: search(root, k) t1 = time.perf_counter() t_search.append(t1-t0) t0 = time.perf_counter() for k in range(num_points, int(num_points*1.1)): search(root, str(k)) t1 = time.perf_counter() t_missing.append(t1-t0) make_plot(t_add, t_search, t_missing, "BST performance", "bst_test.png")
def main(): parser = argparse.ArgumentParser(description='Store key' 'data structures', prog='insert_key_value_pairs') parser.add_argument('--datastructure', type=str, help='Name of ' "datastructure to use. Choose from 'hash', " "'binary_tree', or 'avl_tree'", required=True) parser.add_argument('--dataset', type=str, help='Name of txt file' ', value pairs', required=True) parser.add_argument('--number_keys', type=int, help='Number of keys from' 'dataset to read in', required=True) args = parser.parse_args() datastructure = args.datastructure filename = args.dataset N = args.number_keys if datastructure == 'hash': print('initializing') hashtable = ht.ChainedHash(10000000, ht.hash_functions.h_rolling) insert_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') hashtable.add(data[0], data[1]) counter += 1 if counter == N: break insert_t1 = time.time() search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') hashtable.search(data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) elif datastructure == 'binary_tree': print('initialize binary tree') insert_t0 = time.time() datatree = binary_tree.create_tree(filename, N) insert_t1 = time.time() search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') binary_tree.search(datatree, data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) elif datastructure == 'avl_tree': print('initialize AVL tree') insert_t0 = time.time() datatree = avl_tree.create_AVLtree(filename, N) insert_t1 = time.time() search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') avl_tree.search(datatree, data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) else: print('does not recognize ')
def test_search_cannot_find_value(self): root = None root = bt.insert(root, 102, 129) s = bt.search(root, 1000) self.assertEqual(s, 'key not found')
def main(): parser = argparse.ArgumentParser(description="creates plots comparing \ search method efficiency across\ multiple data structures") parser.add_argument("--data_structure", type=str, help="specify a data structure\ ('avl', 'binary', or 'hash')", required=True) parser.add_argument("--dataset", type=str, help="data file to add to the data structure", required=True) parser.add_argument("--data_size", type=int, help="size of the data structure (10,000 or less)") args = parser.parse_args() if path.exists(args.dataset) is not True: print("data file doesn't exist!") sys.exit(1) if args.data_size > 10000: print("data size too large! Specify between 30 and 10000") sys.exit(1) elif args.data_size < 30: print("data size too small! Specify between 30 and 10000") if args.data_structure == "hash": # Adding in values index = 0 structure = hash_tables.ChainedHash(20000, hash_functions.h_rolling) keys = [] values = [] start = time.time() for l in open(args.dataset): # for simplicity, we'll just have the key # and value be the same keys.append(l) values.append(l) if index < args.data_size: structure.add(l, l) index += 1 end = time.time() add_time = str(end - start) # Searching those values start = time.time() for key in keys: structure.search(key) end = time.time() search_time = str(end - start) # search for nonexistant keys start = time.time() for key in keys: structure.search(key + '_nonexistant') end = time.time() nonexistant_search = str(end - start) print(args.data_structure + "," + str(args.data_size) + "," + args.dataset + "," + add_time + "," + search_time + "," + nonexistant_search) sys.exit(0) elif args.data_structure == "binary": # inserting into root root = None keys = [] values = [] index = 0 start = time.time() for l in open(args.dataset): keys.append(l) values.append(l) if (index < args.data_size): root = binary_tree.insert(root, key=l, value=l) index = index + 1 end = time.time() add_time = str(end - start) # searching those keys start = time.time() for key in keys: search = binary_tree.search(root, key) # search nonexisting keyes end = time.time() search_time = str(end - start) start = time.time() for key in keys: search = binary_tree.search(root, key + '_nonexistant') end = time.time() nonexistant_search = str(end - start) print(args.data_structure + "," + str(args.data_size) + "," + args.dataset + "," + add_time + "," + search_time + "," + nonexistant_search) sys.exit(0) elif args.data_structure == "avl": # insert our keys structure = avl.AVL() keys = [] values = [] index = 0 start = time.time() for l in open(args.dataset): keys.append(l) values.append(l) if (index < args.data_size): structure.insert(l) index += 1 end = time.time() add_time = str(end - start) # search the keys above start = time.time() for key in keys: structure.find(key) end = time.time() search_time = str(end - start) # search nonexisting keys start = time.time() for key in keys: structure.find(key + '_nonexistant') end = time.time() nonexistant_search = str(end - start) print(args.data_structure + "," + str(args.data_size) + "," + args.dataset + "," + add_time + "," + search_time + "," + nonexistant_search) sys.exit(0) else: print("data structure not recognized!") sys.exit(1) print("Unknown error! Exiting!") sys.exit(2)
def main(): """ test data structures for storing key, value pairs Arguments --------- --datastructure: the datastructure to build storing desired key, value pairs. Choose from 'hash', 'binary_tree', or 'avl_tree'. --dataset: a tab-separated txt file containing lines of key, value pairs to store --number_keys: the number of keys from dataset to read in Returns ------- The specified data structure containing all key, value pairs. Also prints the elapsed time to insert all keys and elapsed time to search for all keys. """ parser = argparse.ArgumentParser(description='Store key, value pairs in ' 'data structures', prog='insert_key_value_pairs') parser.add_argument('--datastructure', type=str, help='Name of ' "datastructure to use. Choose from 'hash', " "'binary_tree', or 'avl_tree'", required=True) parser.add_argument('--dataset', type=str, help='Name of txt file with key' ', value pairs', required=True) parser.add_argument('--number_keys', type=int, help='Number of keys from' 'dataset to read in', required=True) args = parser.parse_args() datastructure = args.datastructure filename = args.dataset N = args.number_keys if datastructure == 'hash': # call hash tables submodule print('initialize hash table') hashtable = ht.ChainedHash(10000000, ht.hash_functions.h_rolling) # measure time to insert all keys in file insert_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') hashtable.add(data[0], data[1]) counter += 1 if counter == N: break insert_t1 = time.time() # measure time to search for all keys search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') hashtable.search(data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) elif datastructure == 'binary_tree': # call binary_tree tree function print('initialize binary tree') # measure time to insert all keys in file insert_t0 = time.time() datatree = binary_tree.create_tree(filename, N) insert_t1 = time.time() # measure time to search for keys search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') binary_tree.search(datatree, data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) elif datastructure == 'avl_tree': # call avl_tree tree function print('initialize AVL tree') # measure time to insert all keys in file insert_t0 = time.time() datatree = avl_tree.create_AVLtree(filename, N) insert_t1 = time.time() # measure time to search for keys search_t0 = time.time() counter = 0 for line in open(filename, 'r'): data = line.rstrip().split('\t') avl_tree.search(datatree, data[0]) counter += 1 if counter == N: break search_t1 = time.time() print('time to insert: ' + str(insert_t1 - insert_t0)) print('time to search: ' + str(search_t1 - search_t0)) else: print('does not recognize datastructure name')
def test_incorrect_search(self): self.assertRaises(ValueError, lambda: bt.search(7, 5)) self.assertRaises(ValueError, lambda: bt.search(float(420.69), 5)) self.assertRaises(ValueError, lambda: bt.search([], 5))
root = None for k in keys: try: k = int(k) root = bt.insert(root, k) except: print('your keys are not all numeric') raise TypeError t1_insert = time.time() insert_time = t1_insert - t0_insert print('insert time', insert_time) '''find all keys / values to an avl tree''' t0_search = time.time() for i in keys: n = bt.search(root, 11) t1_search= time.time() total_time = t1_search - t0_insert print('search time', t1_search-t0_search) print('total time', total_time) if args.struct == 'AVL': tree = avl.avltree() '''insert''' t0_insert = time.time() for k in keys: tree.insert(key) t1_insert = time.time() ''' search