def test_pivot_unique_counts(self): c_list = CounterList() c_list.append(CounterNode('b', 10)) c_list.append(CounterNode('d', 20)) c_list.append(CounterNode('a', 30)) c_list.append(CounterNode('e', 40)) c_list.append(CounterNode('c', 50)) pivot_index = partition_last_pivot(c_list, 0, 4) for i in range(pivot_index + 1, len(c_list)): self.assertLessEqual(c_list[i].count, c_list[pivot_index].count) for i in range(pivot_index): self.assertGreaterEqual(c_list[i].count, c_list[pivot_index].count) self.add_mark(2)
def test_pivot_counts_same(self): c_list = CounterList() c_list.append(CounterNode('b', 20)) c_list.append(CounterNode('d', 20)) c_list.append(CounterNode('a', 20)) c_list.append(CounterNode('e', 20)) c_list.append(CounterNode('c', 20)) pivot_index = partition_last_pivot(c_list, 0, 4) for i in range(pivot_index): self.assertLessEqual(c_list[i].word, c_list[pivot_index].word) for i in range(pivot_index + 1, len(c_list)): self.assertGreaterEqual(c_list[i].word, c_list[pivot_index].word) self.add_mark(2)
def find_unique_words_in_list1(list1, list2): """This function takes in two counter lists of words and counts , and returns a counter list containing all the words that are unique to the first list and the number of word comparisons that were used. The two lists are assumed to be in alphabetical order and this function takes advantage of this to improve its efficiency. """ CounterList.reset_comparisons() #Declaring new Counterlists counters & pointers new_counter_list = CounterList() comparisons = 0 list1_pointer = 0 list2_pointer = 0 #A main loop that will run untill the pointers are less then the length #of both given lists while list1_pointer < len(list1) and list2_pointer < len(list2): #Well our list1 word is greater than our list2 word so potentially #it could still be in the list2 so check the next element in list2 if list1[list1_pointer].word > list2[list2_pointer].word: comparisons += 1 list2_pointer += 1 #List1 current word is less than list2's therefore it must be unique # as these are alphabetically ordered so can't exist later in list2 elif list1[list1_pointer].word < list2[list2_pointer].word: comparisons += 1 new_counter_list.append(list1[list1_pointer]) list1_pointer += 1 #Then the words are the same so it can't be unique so check the next #words in each list (aslong as we arn't at the end of list1) <-indexing else: comparisons += 1 if list1_pointer != len(list1): comparisons += 1 list1_pointer += 1 list2_pointer += 1 #This is if we hit the end of list2 but still have remaining words in list1 #these will be unique so are all added :) while list1_pointer != len(list1): new_counter_list.append(list1[list1_pointer]) list1_pointer += 1 return new_counter_list, comparisons
def word_counter_bin(words_list): """This function takes a list of strings and returns a CounterList, containing each of the strings and the number of times it occurred in the list, and an integer, the number of of string comparisons the function made. The CounterList is alphabetically ordered""" complete_list = [] #for every word in the given word list make it a string, make it lowercase #and check for puncuation #for word in words_list: #word = str(word) #word = word.lower() #word = "".join(i for i in word if i not in ('!','.',',',"'",'"' \ #,':',';')) #complete_list.append(word) comparisons = 0 #make a new counter list counter_list = CounterList() #for every word in the given words_list for words in words_list: #set up parameters for a binary search top and bottom top = len(counter_list) bottom = 0 while top != bottom: #setup a mid_point mid_point = (bottom + top) // 2 comparisons += 1 #check the middle point to see if its less than the current word #if so then the search continues manipluating the values if counter_list[mid_point].word < words: bottom = mid_point + 1 else: top = mid_point #if we're reched the top of the list then insert in the word at the top length = len(counter_list) if top == length: counter_node = CounterNode(words) counter_list.insert(top, counter_node) #if the bottom word is the same as the word then increase its count if counter_list[bottom].word == words: comparisons += 1 counter_list[bottom].count += 1 #any other case just insert the word at the bottom else: counter_node = CounterNode(words) comparisons += 1 counter_list.insert(bottom, counter_node) return counter_list, comparisons
def word_counter_seq(words_list): """This function takes a list of strings and returns a CounterList, containing each of the strings and the number of times it occurred in the list, and an integer, the number of of string comparisons the function made. The CounterList is un-ordered.""" #makes a new list complete_list = [] #for every word in the given word list make it a string, make it lowercase #and check for puncuation for word in words_list: word = str(word) word = word.lower() word = "".join(i for i in word if i not in \ ('!','.',',',"'",'"',':',';')) complete_list.append(word) comparisons = 0 #make a new counter list counter_list = CounterList() counter_list.append(CounterNode('')) #for every word in the given words_list #set a variable to the length of the CounterList #set a counting index for words in complete_list: length = len(counter_list) index = 0 #if the word correspondng to the index 0 is the same if counter_list[0].word == '': current_node = CounterNode(words,1) counter_list[0].word = current_node.word #while the length of the list is greater than the index #always increment a comparison as this is were we do comparisons else: while length > index: comparisons +=1 if words == counter_list[index].word: counter_list[index].count += 1 break index +=1 #if the main index is the same length of the list, #e.g you've gone through the list and no 2 words are the same if index == length: #then make a new node according to the current word current_node = CounterNode(words,1) #and add it to the counter_list counter_list.append(current_node) return counter_list, comparisons
def setUp(self): """This runs before each test case""" CounterList.reset_comparisons()
def word_counter_freq(words_list): """This function takes a list of strings and returns a CounterList, containing each of the strings and the number of times it occurred in the list, and an integer, the number of of string comparisons the function made. The CounterList is ordered by the number of times a string occurred in the list""" #makes a new list in preperation for de-puncuated words to be added complete_list = [] #for every word in the given word list make it a string, make it lowercase #and check for puncuation for word in words_list: word = str(word) word = word.lower() word = "".join(i for i in word if i not in \ ('!','.',',',"'",'"',':',';')) complete_list.append(word) #setup our comparisons comparisons = 0 #make a new counter list counter_list = CounterList() #set a default value to automatically so we can always add a value to #the counter list counter_list.append(CounterNode('')) # for every word in the given words_list for words in complete_list: #set a variable to the length of the CounterList length = len(counter_list) #set an counting index index = 0 #here is where we force it to compare and add our first value if counter_list[0].word == '': current_node = CounterNode(words, 1) counter_list[0].word = current_node.word else: #while the length of the list is greater than the index while length > index: #always increment a comparison as this is were we do comparisons comparisons += 1 #if the word is the same as the current indexed word if words == counter_list[index].word: #increase the current indexed words count counter_list[index].count += 1 #creating another place holding index that will be used #for comparing our two words count vales loop_counter = index #Making a trigger that will be used for swapping items item_comparing = True #while the current words count is greater than the word #next to its count AND the loop_counter is > 0 so it doesn't #go out of range while counter_list[index].count > \ counter_list[loop_counter-1].count and loop_counter > 0: #finish looping here with the trigger item_comparing = False #Decrement our loop_counter loop_counter -= 1 #now if the trigger is False if item_comparing == False: #switch the two items that need switching counter_list[loop_counter], counter_list[index] = \ counter_list[index], counter_list[loop_counter] break #increase the main index to keep going through words index += 1 #if the main index is the same length of the list, #e.g you'rve gone through the list and no 2 words are the same if index == length: #then make a new node according to the current word current_node = CounterNode(words, 1) #add it to the counter_list counter_list.append(current_node) #return our final list and number of comparisons return counter_list, comparisons
def run_my_tests(): """Run your tests here to keep them tidy""" # Put your testing code here so that we don't run it when marking:) # a simple example list1 = CounterList() list1.append(CounterNode('apple')) list1.append(CounterNode('orange')) list2 = CounterList() list2.append(CounterNode('apple')) list2.append(CounterNode('pear')) print("Expected (['orange': 1], 3)") print('My answer', find_unique_words_in_list1(list1, list2)) print() list1, _ = word_counter_bin( ["oranges", "apples", "pears", "strawberries", "pie"]) list2, _ = word_counter_bin( ["oranges", "apricot", "pears", "pie", "cat", "dog", "eggplant"]) print('My answer', find_unique_words_in_list1(list1, list2)) print() list1, _ = word_counter_bin([ "oranges", "apricot", "pears", "pie", "lettuce", "pumpkin", "oranges", "apples", "pears", "strawberries", "spinach", "cabbage", "zucchini", "cauliflower", "onion", "pie", "broccoli", "banana", "banana", "grape", "plum", "apples", "pineapple" ]) list2, _ = word_counter_bin([ "lettuce", "pumpkin", "spinach", "broccoli", "cauliflower", "onion", "cabbage", "zucchini" ]) print('My answer', find_unique_words_in_list1(list1, list2)) print() list1, _ = word_counter_bin([]) list2, _ = word_counter_bin([]) print('My answer', find_unique_words_in_list1(list1, list2)) print() cactus, comparisons = word_counter_bin(load_file('text_looking_glass.txt')) doggie, comparisons = word_counter_bin(load_file('text_sherlock.txt')) print(find_unique_words_in_list1(cactus, doggie))