def test_pivot_unique_counts(self):
     c_list = CounterList()
     c_list.append(CounterNode('b', 10))
     c_list.append(CounterNode('d', 20))
     c_list.append(CounterNode('a', 30))
     c_list.append(CounterNode('e', 40))
     c_list.append(CounterNode('c', 50))
     pivot_index = partition_last_pivot(c_list, 0, 4)
     for i in range(pivot_index + 1, len(c_list)):
         self.assertLessEqual(c_list[i].count, c_list[pivot_index].count)
     for i in range(pivot_index):
         self.assertGreaterEqual(c_list[i].count, c_list[pivot_index].count)
     self.add_mark(2)
 def test_pivot_counts_same(self):
     c_list = CounterList()
     c_list.append(CounterNode('b', 20))
     c_list.append(CounterNode('d', 20))
     c_list.append(CounterNode('a', 20))
     c_list.append(CounterNode('e', 20))
     c_list.append(CounterNode('c', 20))
     pivot_index = partition_last_pivot(c_list, 0, 4)
     for i in range(pivot_index):
         self.assertLessEqual(c_list[i].word, c_list[pivot_index].word)
     for i in range(pivot_index + 1, len(c_list)):
         self.assertGreaterEqual(c_list[i].word, c_list[pivot_index].word)
     self.add_mark(2)
Пример #3
0
def find_unique_words_in_list1(list1, list2):
    """This function takes in two counter lists of words and counts
    , and returns
    a counter list containing all the words that are unique to the first list
    and the number of word comparisons that were used.
    The two lists are assumed to be 
    in alphabetical order and this function takes
    advantage of this to improve its efficiency.
    """
    CounterList.reset_comparisons()

    #Declaring new Counterlists counters & pointers
    new_counter_list = CounterList()
    comparisons = 0
    list1_pointer = 0
    list2_pointer = 0

    #A main loop that will run untill the pointers are less then the length
    #of both given lists
    while list1_pointer < len(list1) and list2_pointer < len(list2):
        #Well our list1 word is greater than our list2 word so potentially
        #it could still be in the list2 so check the next element in list2
        if list1[list1_pointer].word > list2[list2_pointer].word:
            comparisons += 1
            list2_pointer += 1

        #List1 current word is less than list2's therefore it must be unique
        # as these are alphabetically ordered so can't exist later in list2
        elif list1[list1_pointer].word < list2[list2_pointer].word:
            comparisons += 1
            new_counter_list.append(list1[list1_pointer])
            list1_pointer += 1

        #Then the words are the same so it can't be unique so check the next
        #words in each list (aslong as we arn't at the end of list1) <-indexing
        else:
            comparisons += 1
            if list1_pointer != len(list1):
                comparisons += 1
                list1_pointer += 1
                list2_pointer += 1

    #This is if we hit the end of list2 but still have remaining words in list1
    #these will be unique so are all added :)
    while list1_pointer != len(list1):
        new_counter_list.append(list1[list1_pointer])
        list1_pointer += 1

    return new_counter_list, comparisons
def word_counter_bin(words_list):
    """This function takes a list of strings and
    returns a CounterList, containing each of the
    strings and the number of times it occurred in
    the list, and an integer, the number of
    of string comparisons the function made.
    The CounterList is alphabetically ordered"""
    complete_list = []
    #for every word in the given word list make it a string, make it lowercase
    #and check for puncuation

    #for word in words_list:
    #word = str(word)
    #word = word.lower()
    #word = "".join(i for i in word if i not in  ('!','.',',',"'",'"' \
    #,':',';'))
    #complete_list.append(word)

    comparisons = 0

    #make a new counter list
    counter_list = CounterList()

    #for every word in the given words_list
    for words in words_list:
        #set up parameters for a binary search top and bottom
        top = len(counter_list)
        bottom = 0

        while top != bottom:
            #setup a mid_point
            mid_point = (bottom + top) // 2
            comparisons += 1

            #check the middle point to see if its less than the current word
            #if so then the search continues manipluating the values
            if counter_list[mid_point].word < words:
                bottom = mid_point + 1
            else:
                top = mid_point

        #if we're reched the top of the list then insert in the word at the top
        length = len(counter_list)
        if top == length:
            counter_node = CounterNode(words)
            counter_list.insert(top, counter_node)

        #if the bottom word is the same as the word then increase its count
        if counter_list[bottom].word == words:
            comparisons += 1
            counter_list[bottom].count += 1

        #any other case just insert the word at the bottom
        else:
            counter_node = CounterNode(words)
            comparisons += 1
            counter_list.insert(bottom, counter_node)
    return counter_list, comparisons
def word_counter_seq(words_list):
    """This function takes a list of strings and
    returns a CounterList, containing each of the
    strings and the number of times it occurred in
    the list, and an integer, the number of
    of string comparisons the function made.
    The CounterList is un-ordered."""
    #makes a new list
    complete_list = []
    #for every word in the given word list make it a string, make it lowercase
    #and check for puncuation
    
    for word in words_list:
        word = str(word)
        word = word.lower()
        word = "".join(i for i in word if i not in \
                       ('!','.',',',"'",'"',':',';'))
        complete_list.append(word)
    
    comparisons = 0
    
    #make a new counter list
    counter_list = CounterList()
    counter_list.append(CounterNode(''))
    
    #for every word in the given words_list
    #set a variable to the length of the CounterList
    #set a counting index
    for words in complete_list:
        length = len(counter_list)
        index = 0 
        
        #if the word correspondng to the index 0 is the same
        if counter_list[0].word == '':
            current_node = CounterNode(words,1)
            counter_list[0].word = current_node.word
            
        #while the length of the list is greater than the index
        #always increment a comparison as this is were we do comparisons
        else:
            while length > index:
                
                comparisons +=1
                
                if words == counter_list[index].word:
                    counter_list[index].count += 1
                    break
                index +=1
                
                #if the main index is the same length of the list,
                #e.g you've gone through the list and no 2 words are the same                
                if index == length:
                    #then make a new node according to the current word
                    current_node = CounterNode(words,1)
                    #and add it to the counter_list
                    counter_list.append(current_node)            
    return counter_list, comparisons 
 def setUp(self):
     """This runs before each test case"""
     CounterList.reset_comparisons()
Пример #7
0
def word_counter_freq(words_list):
    """This function takes a list of strings and
    returns a CounterList, containing each of the
    strings and the number of times it occurred in
    the list, and an integer, the number of
    of string comparisons the function made.
    The CounterList is ordered by the number of
    times a string occurred in the list"""

    #makes a new list in preperation for de-puncuated words to be added
    complete_list = []
    #for every word in the given word list make it a string, make it lowercase
    #and check for puncuation

    for word in words_list:
        word = str(word)
        word = word.lower()
        word = "".join(i for i in word if i not in \
                       ('!','.',',',"'",'"',':',';'))
        complete_list.append(word)

#setup our comparisons
    comparisons = 0

    #make a new counter list
    counter_list = CounterList()
    #set a default value to automatically so we can always add a value to
    #the counter list
    counter_list.append(CounterNode(''))

    # for every word in the given words_list
    for words in complete_list:
        #set a variable to the length of the CounterList
        length = len(counter_list)
        #set an counting index
        index = 0

        #here is where we force it to compare and add our first value
        if counter_list[0].word == '':
            current_node = CounterNode(words, 1)
            counter_list[0].word = current_node.word

        else:
            #while the length of the list is greater than the index
            while length > index:
                #always increment a comparison as this is were we do comparisons
                comparisons += 1
                #if the word is the same as the current indexed word
                if words == counter_list[index].word:
                    #increase the current indexed words count
                    counter_list[index].count += 1
                    #creating another place holding index that will be used
                    #for comparing our two words count vales
                    loop_counter = index
                    #Making a trigger that will be used for swapping items
                    item_comparing = True

                    #while the current words count is greater than the word
                    #next to its count AND the loop_counter is > 0 so it doesn't
                    #go out of range
                    while counter_list[index].count > \
                        counter_list[loop_counter-1].count and loop_counter > 0:
                        #finish looping here with the trigger
                        item_comparing = False
                        #Decrement our loop_counter
                        loop_counter -= 1

                    #now if the trigger is False
                    if item_comparing == False:
                        #switch the two items that need switching
                        counter_list[loop_counter], counter_list[index] = \
                        counter_list[index], counter_list[loop_counter]

                    break
                #increase the main index to keep going through words
                index += 1

                #if the main index is the same length of the list,
                #e.g you'rve gone through the list and no 2 words are the same
                if index == length:
                    #then make a new node according to the current word
                    current_node = CounterNode(words, 1)
                    #add it to the counter_list
                    counter_list.append(current_node)

    #return our final list and number of comparisons
    return counter_list, comparisons
Пример #8
0
def run_my_tests():
    """Run your tests here to keep them tidy"""
    # Put your testing code here so that we don't run it when marking:)
    # a simple example
    list1 = CounterList()
    list1.append(CounterNode('apple'))
    list1.append(CounterNode('orange'))
    list2 = CounterList()
    list2.append(CounterNode('apple'))
    list2.append(CounterNode('pear'))
    print("Expected  (['orange': 1], 3)")
    print('My answer', find_unique_words_in_list1(list1, list2))
    print()

    list1, _ = word_counter_bin(
        ["oranges", "apples", "pears", "strawberries", "pie"])
    list2, _ = word_counter_bin(
        ["oranges", "apricot", "pears", "pie", "cat", "dog", "eggplant"])
    print('My answer', find_unique_words_in_list1(list1, list2))
    print()

    list1, _ = word_counter_bin([
        "oranges", "apricot", "pears", "pie", "lettuce", "pumpkin", "oranges",
        "apples", "pears", "strawberries", "spinach", "cabbage", "zucchini",
        "cauliflower", "onion", "pie", "broccoli", "banana", "banana", "grape",
        "plum", "apples", "pineapple"
    ])
    list2, _ = word_counter_bin([
        "lettuce", "pumpkin", "spinach", "broccoli", "cauliflower", "onion",
        "cabbage", "zucchini"
    ])
    print('My answer', find_unique_words_in_list1(list1, list2))
    print()

    list1, _ = word_counter_bin([])
    list2, _ = word_counter_bin([])
    print('My answer', find_unique_words_in_list1(list1, list2))
    print()

    cactus, comparisons = word_counter_bin(load_file('text_looking_glass.txt'))
    doggie, comparisons = word_counter_bin(load_file('text_sherlock.txt'))
    print(find_unique_words_in_list1(cactus, doggie))