Exemplo n.º 1
0
def time_bothcomplete_new(mylist, mytrie, prefix, k):
    """ 
    TLDR: Record the time it takes to find matches for slow- and auto-complete.
    
    With fixed input (file, prefix and k), this function runs slowcomplete and 
    autocomplete 100 times in 100 iterations (in total 10,000 runs) and records 
    the time to run each function 100 times into a list of length 100.
    
    Then it calculates the mean and standard deviation of the two lists.
    """
    # slowcomplete
    record_slow = []

    start = time.time()
    for aa in range(1, 10):
        start = time.time()
        for bb in range(1, 1000):
            autocomplete_me.slowcomplete(mylist, prefix, k)
        time_slow = time.time() - start
        record_slow.append(time_slow)

    # autocomplete
    record_auto = []
    for cc in range(1, 10):
        start = time.time()
        for dd in range(1, 1000):
            autocomplete_me.autocomplete(mytrie, prefix, k)
        time_auto = time.time() - start
        record_auto.append(time_auto)

    return stat.mean(record_slow), stat.stdev(record_slow), \
           stat.mean(record_auto), stat.stdev(record_auto)
Exemplo n.º 2
0
def test_prune_and_rescale():
    def rescalefunc(weight): return (weight//10)
    def rescalefunc2(weight): return (weight-1)
    def rescalefunc3(weight): return (weight//2)

    # test2_auto
    autocomplete_me.prune_trie(test2_auto, 20)
    assert autocomplete_me.autocomplete(test2_auto, "", 5) == [(920, "hermione"), (67, "her")]
    autocomplete_me.rescale_weight(test2_auto, rescalefunc)
    assert autocomplete_me.autocomplete(test2_auto, "", 5) == [(92, "hermione"), (6, "her")]
    assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 92

    # babies_auto
    autocomplete_me.prune_trie(babies_auto, 300)
    assert autocomplete_me.autocomplete(babies_auto, "Gio", 5) == [(3086, "Giovanni"), (372, "Giovani")]
    assert autocomplete_me.autocomplete(babies_auto, "Ayl", 5) == [(1284, "Ayla"), (673, "Aylin"), (379, "Ayleen")]
    autocomplete_me.rescale_weight(babies_auto, rescalefunc2)
    assert autocomplete_me.autocomplete(babies_auto, "Sab", 5) == [(1174, "Sabrina")]
    assert autocomplete_me.autocomplete(babies_auto, "Sel", 5) == [(1043, "Selena"), (570, "Selah")]
    
    # mandarin_auto
    autocomplete_me.prune_trie(mandarin_auto, 4000)
    assert autocomplete_me.autocomplete(mandarin_auto, "不", 5) == [(15666, "不是"), (7167, "不要"), (6942, "不能"), (6892, "不知道"), (4256, "不起")]
    assert autocomplete_me.autocomplete(mandarin_auto, "好", 5) == [(6171, "好了"), (5391, "好吧"), (5170, "好的")]
    autocomplete_me.rescale_weight(mandarin_auto, rescalefunc3)
    assert autocomplete_me.autocomplete(mandarin_auto, "意", 5) == "No words match with the given prefix."
    assert autocomplete_me.autocomplete(mandarin_auto, "我", 5) == [(20865, "我的")]
Exemplo n.º 3
0
def time_bothcomplete(mylist, mytrie, prefix, k):
    """ 
    TLDR: Record the time it takes to find matches for slow- and auto-complete.
    
    With fixed inputs (file, prefix and k), this function runs slowcomplete 
    and autocomplete 1000 iterations, and record the time respectively for 
    each function into a list of length 1000.
    
    Then it calculates the 25th, 50th and 75th quantile of each list to compare
    the speed for the two functions.
    """
    # slowcomplete
    record_slow = []
    for ii in range(1, 1000):
        start = time.time()
        autocomplete_me.slowcomplete(mylist, prefix, k)
        time_slow = time.time() - start
        record_slow.append(time_slow)

    # autocomplete
    record_auto = []
    for ii in range(1, 1000):
        start = time.time()
        autocomplete_me.autocomplete(mytrie, prefix, k)
        time_auto = time.time() - start
        record_auto.append(time_auto)

    return numpy.quantile(record_slow, 0.25), \
           numpy.quantile(record_slow, 0.50), \
           numpy.quantile(record_slow, 0.75), \
           numpy.quantile(record_auto, 0.25), \
           numpy.quantile(record_auto, 0.50), \
           numpy.quantile(record_auto, 0.75)
 def test_random(self):
     # test autocomplete with randomized texts
     self.assertEqual(autocomplete_me.autocomplete("a", randTrie, 5),
                      slow_autocomplete("a", randFile, 5))
     self.assertEqual(autocomplete_me.autocomplete("ac", randTrie, 5),
                      slow_autocomplete("ac", randFile, 5))
     self.assertEqual(autocomplete_me.autocomplete("act", randTrie, 5),
                      slow_autocomplete("act", randFile, 5))
Exemplo n.º 5
0
def test_add_term():
    # test2_auto
    autocomplete_me.add_term(test2_auto, "hermione", 919)
    assert autocomplete_me.Trie(test2_auto).searchTrie("hermion").children["e"].fullword == "hermione"
    assert autocomplete_me.Trie(test2_auto).searchTrie("hermione").weight == 919
    assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 919
    assert autocomplete_me.Trie(test2_auto).searchTrie("her").weight == 67
    assert autocomplete_me.autocomplete(test2_auto, "he", 3) == [(919,'hermione'),(67,'her')]
    # babies_auto
    autocomplete_me.add_term(babies_auto, "Ron", 19800301)
    assert autocomplete_me.autocomplete(babies_auto, "Ro", 3) == [(19800301,'Ron'),(6882,'Robert'),(2562,'Roman')]
    assert autocomplete_me.Trie(babies_auto).searchTrie("Ron").weight == 19800301
    assert autocomplete_me.Trie(babies_auto).searchTrie("Ro").maxweight == 19800301
    assert autocomplete_me.Trie(babies_auto).searchTrie("Ro").weight == -1
Exemplo n.º 6
0
 def test_autocomplete_for_wiki(self):
     wikiTrie = ac.read_terms("wiktionary.txt")
     result = ac.autocomplete("the", wikiTrie, 5)
     expected = [(5627187200, 'the'), (334039800, 'they'),
                 (282026500, 'their'), (250991700, 'them'),
                 (196120000, 'there')]
     self.assertEqual(expected, result)
Exemplo n.º 7
0
    def test_for_random_file(self):
        #generate file of random words and weights
        T = Trie()
        wordList = []
        size = random.randint(600, 2500)
        noRepeat = {}
        for index in range(size):
            ranStr = ''.join([
                random.choice(string.ascii_lowercase)
                for i in range(random.randint(2, 9))
            ])
            ranWeight = random.randint(1, 100000)
            #check for no repeating words
            if ranStr not in noRepeat:
                noRepeat[ranStr] = 1
                wordList.append((ranWeight, ranStr))
                T.addWord(wordStr=ranStr, weight=ranWeight)

        #sort wordList in desceding weight
        wordList.sort(reverse=True)
        result = []
        searchStr = 't'
        for index in range(len(wordList)):
            if len(result) == 5:
                break
            else:
                current = wordList[index][1]
                if current[0] == searchStr:
                    result.append(wordList[index])

        #check the two have same results
        self.assertEqual(result, ac.autocomplete('t', T, 5))
Exemplo n.º 8
0
def test_delete_term():
    # test2_auto
    # case 1: node has no children
    assert autocomplete_me.Trie(test2_auto).searchTrie("in").maxweight == 20
    autocomplete_me.delete_term(test2_auto, "inn")
    assert autocomplete_me.autocomplete(test2_auto, "i", 50) == [(5,'in')]
    assert autocomplete_me.Trie(test2_auto).searchTrie("inn") == False
    assert autocomplete_me.Trie(test2_auto).searchTrie("in").maxweight == 5

    # pokemon_auto
    # case 2: node has children
    autocomplete_me.delete_term(pokemon_auto, "Porygon2")
    assert autocomplete_me.autocomplete(pokemon_auto, "Pory", 2) == [(83878,"Porygon-Z"), (533, "Porygon")]
    assert autocomplete_me.Trie(pokemon_auto).searchTrie("Porygon2") == False
    assert autocomplete_me.Trie(pokemon_auto).searchTrie("Porygon").maxweight == 83878
    autocomplete_me.delete_term(pokemon_auto, "Porygon-Z")
    assert autocomplete_me.autocomplete(pokemon_auto, "Pory", 2) == [(533, "Porygon")]
Exemplo n.º 9
0
 def test_autocomplete_for_movies(self):
     movTrie = ac.read_terms('movies.txt')
     result = ac.autocomplete("The", movTrie, 5)
     expected = [(623357910, 'The Avengers (2012)'),
                 (534858444, 'The Dark Knight (2008)'),
                 (448139099, 'The Dark Knight Rises (2012)'),
                 (422783777, 'The Lion King (1994)'),
                 (408010692, 'The Hunger Games (2012)')]
     self.assertEqual(expected, result)
    def test_wordList(self):
        # test if len(wordList)=len(children) when len(children)<k
        trie = autocomplete_me.Trie()
        trie.insert(Node(weight=123, word="apple"))
        trie.insert(Node(weight=234, word="apples"))
        trie.insert(Node(weight=67, word="applet"))
        trie.insert(Node(weight=88, word="appletree"))

        self.assertEqual(len(autocomplete_me.autocomplete("apple", trie, 6)),
                         4)
Exemplo n.º 11
0
def test_insert_or_update():
    # updating existing words
    autocomplete_me.insert_or_update(test2_auto, "can", 23)
    assert autocomplete_me.Trie(test2_auto).searchTrie("can").weight == 16
    assert autocomplete_me.Trie(test2_auto).searchTrie("ca").maxweight == 16
    autocomplete_me.insert_or_update(test2_auto, "cat", 100)
    autocomplete_me.insert_or_update(test2_auto, "cat", 1000)
    # make sure the maxweight is updated as well
    assert autocomplete_me.Trie(test2_auto).searchTrie("cat").weight == 17
    assert autocomplete_me.Trie(test2_auto).searchTrie("ca").maxweight == 17

    # inserting new words
    # test2_auto
    autocomplete_me.insert_or_update(test2_auto, "hermione", 919)
    assert autocomplete_me.autocomplete(test2_auto, "her", 1) == [(920,'hermione')]
    assert autocomplete_me.autocomplete(test2_auto, "he", 3) == [(920,'hermione'),(67,'her')]
    assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 920
    assert autocomplete_me.Trie(test2_auto).searchTrie("her").weight == 67
    # pokemon_auto
    autocomplete_me.insert_or_update(pokemon_auto, "jinhowchong", 19960308)
    assert autocomplete_me.autocomplete(pokemon_auto, "jin", 5) == [(19960308,'jinhowchong')]
    assert autocomplete_me.Trie(pokemon_auto).searchTrie("jinh").maxweight == 19960308
    assert autocomplete_me.Trie(pokemon_auto).searchTrie("jinhowchong").weight == 19960308
Exemplo n.º 12
0
class graph:
    def random_file(newfile, filename="movies.txt", encoding='UTF-8'):
        ''' creates a text file that is a random subset of a given file
            filename is the original file's name
            newfile is the name of the random subset
        '''
        with open(filename, "r", encoding=encoding) as file:
            allLines = file.readlines()
            fileSize = random.randrange(10000, 100000)
            lines = random.sample(allLines[1:], fileSize)

        with open(newfile, "w", encoding=encoding) as file:
            for line in lines:
                strLine = str(line)
                file.write(strLine)
        return [newfile, fileSize]

    time1, time2, size = [], [], []
    for i in range(30):
        [randFile, fileSize] = random_file("rand_i", "movies.txt")
        #times building trie and records time to list
        start_i = time.clock()
        trie = ac.read_terms(randFile)
        size_i = fileSize
        #adds filesize to size list
        size.append(size_i)
        running1 = time.clock() - start_i
        #times matcher and records time to list
        time1.append(running1)
        newStart_i = time.clock()
        ac.autocomplete('T', trie, 5)
        running2 = time.clock() - newStart_i
        time2.append(running2)

    plt.title('Performance by Input Size')
    plt.xlabel('Input Size')
    plt.ylabel('Execution Time (s)')
    plt.grid(True)
    plt.xlim(10000, 100000)
    plt.ylim(0, 10)

    #plot filesize against two time lists with legends
    plt.scatter(size, time1, color="red", label="Loading data")
    plt.scatter(size, time2, color="blue", label="Matcher")
    plt.legend()

    plt.show()
    def test_auto(self):
        # test autocomplete function
        self.assertEqual(autocomplete_me.autocomplete("t", wikTrie, 5),
                         ([(5627187200, 'the'), (2595609600, 'to'),
                           (1107331800, 'that'), (401542500, 'this'),
                           (334039800, 'they')]))
        self.assertEqual(autocomplete_me.autocomplete("th", wikTrie, 5),
                         ([(5627187200, 'the'), (1107331800, 'that'),
                           (401542500, 'this'), (334039800, 'they'),
                           (282026500, 'their')]))
        self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx",
                          wikTrie, 5)
        self.assertRaises(ValueError, autocomplete_me.autocomplete, "  ",
                          wikTrie, 5)

        self.assertEqual(autocomplete_me.autocomplete("S", pokTrie, 5),
                         ([(2194440, 'Scizor'), (1211390, 'Starmie'),
                           (993018, 'Skarmory'), (981131, 'Salamence'),
                           (232622, 'Sableye')]))
        self.assertEqual(autocomplete_me.autocomplete("Sh", pokTrie, 5),
                         ([(81075, 'Sharpedo'), (55024, 'Shedinja'),
                           (43597, 'Shaymin'), (42367, 'Shuckle'),
                           (31091, 'Shiftry')]))
        self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx",
                          pokTrie, 5)
        self.assertRaises(ValueError, autocomplete_me.autocomplete, "  ",
                          pokTrie, 5)

        self.assertEqual(autocomplete_me.autocomplete("L", babTrie, 5),
                         ([(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'),
                           (9319, 'Landon'), (8930, 'Luke')]))
        self.assertEqual(autocomplete_me.autocomplete("Li", babTrie, 5),
                         ([(16709, 'Liam'), (7899, 'Lily'), (7105, 'Lillian'),
                           (2915, 'Lincoln'), (2759, 'Lilly')]))
        self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx",
                          babTrie, 5)
        self.assertRaises(ValueError, autocomplete_me.autocomplete, "  ",
                          babTrie, 5)

        self.assertEqual(autocomplete_me.autocomplete("T", movTrie, 5),
                         ([(658672302, 'Titanic (1997)'),
                           (623357910, 'The Avengers (2012)'),
                           (534858444, 'The Dark Knight (2008)'),
                           (448139099, 'The Dark Knight Rises (2012)'),
                           (422783777, 'The Lion King (1994)')]))
        self.assertEqual(autocomplete_me.autocomplete("The", movTrie, 5),
                         ([(623357910, 'The Avengers (2012)'),
                           (534858444, 'The Dark Knight (2008)'),
                           (448139099, 'The Dark Knight Rises (2012)'),
                           (422783777, 'The Lion King (1994)'),
                           (408010692, 'The Hunger Games (2012)')]))
        self.assertEqual(
            autocomplete_me.autocomplete("Star Wars", movTrie, 5),
            ([(460935665, 'Star Wars (1977)'),
              (380262555,
               'Star Wars: Episode III - Revenge of the Sith (2005)'),
              (310675583,
               'Star Wars: Episode II - Attack of the Clones (2002)'),
              (309125409, 'Star Wars: Episode VI - Return of the Jedi (1983)'),
              (290475067,
               'Star Wars: Episode V - The Empire Strikes Back (1980)')]))
        self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx",
                          movTrie, 5)
        self.assertRaises(ValueError, autocomplete_me.autocomplete, "  ",
                          movTrie, 5)
Exemplo n.º 14
0
 def test_autocomplete_for_babynames(self):
     babyTrie = ac.read_terms('baby-names.txt')
     result = ac.autocomplete("L", babyTrie, 5)
     expected = [(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'),
                 (9319, 'Landon'), (8930, 'Luke')]
     self.assertEqual(expected, result)
Exemplo n.º 15
0
 def test_autocomplete_for_input_not_in_trie(self):
     babyTrie = ac.read_terms('baby-names.txt')
     result = ac.autocomplete("XXX", babyTrie, 5)
     expected = []
     self.assertEqual(expected, result)
Exemplo n.º 16
0
 def test_autocomplete_for_pokemon(self):
     pokTrie = ac.read_terms("pokemon.txt")
     result = ac.autocomplete("Sh", pokTrie, 5)
     expected = [(81075, 'Sharpedo'), (55024, 'Shedinja'),
                 (43597, 'Shaymin'), (42367, 'Shuckle'), (31091, 'Shiftry')]
     self.assertEqual(expected, result)
Exemplo n.º 17
0
def test_autocomplete():
    assert autocomplete_me.slowcomplete(test_slow, "sab", 1) == autocomplete_me.autocomplete(test_auto, "sab", 1)
    assert autocomplete_me.slowcomplete(test_slow, "c", 3) == autocomplete_me.autocomplete(test_auto, "c", 3)
    assert autocomplete_me.slowcomplete(test_slow, "invalidprefix", 5) == autocomplete_me.autocomplete(test_auto, "invalidprefix", 5)

    assert autocomplete_me.slowcomplete(pokemon_slow, "Z", 1) == autocomplete_me.autocomplete(pokemon_auto, "Z", 1)
    assert autocomplete_me.slowcomplete(pokemon_slow, "Po", 3) == autocomplete_me.autocomplete(pokemon_auto, "Po", 3)
    assert autocomplete_me.slowcomplete(pokemon_slow, "  BLAH", 5) == autocomplete_me.autocomplete(pokemon_auto, "  BLAH", 5)

    assert autocomplete_me.slowcomplete(babies_slow, "H", 1) == autocomplete_me.autocomplete(babies_auto, "H", 1)
    assert autocomplete_me.slowcomplete(babies_slow, "Her", 3) == autocomplete_me.autocomplete(babies_auto, "Her", 3)
    assert autocomplete_me.slowcomplete(babies_slow, "Sabrin", 5) == autocomplete_me.autocomplete(babies_auto, "Sabrin", 5)

    assert autocomplete_me.slowcomplete(mandarin_slow, "", 1) == autocomplete_me.autocomplete(mandarin_auto, "", 1)
    assert autocomplete_me.slowcomplete(mandarin_slow, "永久", 3) == autocomplete_me.autocomplete(mandarin_auto, "永久", 3)
    assert autocomplete_me.slowcomplete(mandarin_slow, "Hermione", 5) == autocomplete_me.autocomplete(mandarin_auto, "Hermione", 5)

    assert autocomplete_me.slowcomplete(trademarks_slow, "HOME", 1) != autocomplete_me.autocomplete(trademarks_auto, "Home", 1)
    assert autocomplete_me.slowcomplete(trademarks_slow, "", 3) == autocomplete_me.autocomplete(trademarks_auto, "", 3)
    assert autocomplete_me.slowcomplete(trademarks_slow, "invalidprefix", 5) == autocomplete_me.autocomplete(trademarks_auto, "invalidprefix", 5)
Exemplo n.º 18
0
import autocomplete_me as ac
import time
import cProfile

#times read_terms() process   
begin1 = time.clock()
words = ac.read_terms("movies.txt")
stop1 = time.clock()
diff1= stop1 - begin1
print("Building the Trie takes: {}".format(diff1))
print("\n")

#times autocomplete() process
begin2 = time.clock()
ac.autocomplete("The", words, 5)
stop2 = time.clock()
diff2 = stop2 - begin2
print("Finishing autocomplete for 'The' takes: {}".format(diff2))
print("\n")

#times all functions separately with "The" as search string and "movies.txt"
# as text input
cProfile.run('ac.autocomplete("The", ac.read_terms("movies.txt"), 5)')
Exemplo n.º 19
0
 def test_autocomplete_for_empty_input(self):
     pokTrie = ac.read_terms("pokemon.txt")
     result = ac.autocomplete("", pokTrie, 5)
     expected = []
     self.assertEqual(expected, result)