f.close() duplicates = [] # Return the list of duplicates in this data structure #Runtime complexity is O(n2) # Replace the nested for loops below with your improvements #for name_1 in names_1: #for name_2 in names_2: #if name_1 == name_2: #duplicates.append(name_1) tree = BSTNode(names_1[0]) names_1_checker = [name for name in names_1[1:]] #Create a new array, without first value to distinguish for name in names_1_checker: tree.insert(name) for name in names_2: if tree.contains(name): duplicates.append(name) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements # 0(n^2) # for name_1 in names_1:0(n) # for name_2 in names_2: 0(n) # if name_1 == name_2: 0(1) # duplicates.append(name_1) 0(1) bst = None #0(1) for name_1 in names_1: #0(n) # followed the bst tester if bst is None: #0(1) bst = BSTNode(name_1) #0(1) else: bst.insert(name_1) #0(log n) #first loop 0(n log n) for name_2 in names_2: #0(n) if bst.contains(name_2): #0(log n) duplicates.append(name_2) #0(1) #0(n log n) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
start_time = time.time() f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements bst = BSTNode("Root Node") for name_1 in names_1: bst.insert(name_1) for name_2 in names_2: if bst.contains(name_2): duplicates.append(name_2) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
# for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) #Using BST to cut runtime head = names_1[0] names_1 = names_1[1:] bst = BSTNode(head) for name in names_1: bst.insert(name) for name in names_2: if bst.contains(name): duplicates.append(name) # RUNTIME : 0.16530179977416992 seconds , 64 Duplicates end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself. start_time = time.time()
names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements list_one = BSTNode(names_1[0]) for name in names_1: list_one.insert(name) for name in names_2: if list_one.contains(name): duplicates.append(name) # for name_1 in names_1: # if name_1 in names_2: # duplicates.append(name_1) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
f = open('names_2.txt', 'r') bst_names_2 = f.read().split("\n") # List containing 10000 names f.close() # initialize an empty list for the duplicates bst_duplicates = [] # initialize the BSTNode names_bst = BSTNode('findnames') # for every name in the first list for name in bst_names_1: # add every name to the binary search tree names_bst.insert(name) # for every name in teh second list for name in bst_names_2: # check to see if the binary search tree already contains that name if names_bst.contains(name): # if it does, add it to the duplicates list bst_duplicates.append(name) fin_end_time = time.time() print(f"{len(bst_duplicates)} duplicates: \n\n{',' .join(bst_duplicates)}\n\n") # print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") # print (f"initial runtime: {end_time - start_time} seconds") # with the nested loops implementation it took 15.864028692245483 seconds to run. print(f"final tuntime: {fin_end_time - fin_start_time} secones") print( f"\ninitial runtime complexity: {initial_runtime_complexity}, final runtime complexity: {final_runtime_complexity}\n" ) # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem