def initialTestMaxMatchAlgo():
    testwordlist = ['moon', 'man', 'over', 'the', 'help', 'me',
                 't', 'fi']
    testhashtaglist = ['manoverthemoon', 'helpmeoverthere', 'findmefood', 'whaaaaat']
    for hashtag in testhashtaglist:
        maxmatchedHashtag = ""
        if hashtag == "manoverthemoon":
            #test for the hashtag being made up of entirely of words in the word list
            maxmatchedHashtag = werthman_assgn1.maxMatch("manoverthemoon", testwordlist, maxmatchedHashtag)
            assertion(maxmatchedHashtag == "man over the moon", "manoverthemoon should be changed to man over the moon.")
        elif hashtag == "helpmeoverthere":
            #test for the hashtag being made up of some words from the word list
            maxmatchedHashtag = werthman_assgn1.maxMatch("helpmeoverthere", testwordlist, maxmatchedHashtag)
            assertion(maxmatchedHashtag == "help me over the r e", "helpmeoverthere should be changed to help me over the r e.")
        elif hashtag == "whaaaaat":
            #test for the hashtag being made up of none of the words in the wordlist
            maxmatchedHashtag = werthman_assgn1.maxMatch("whaaaaat", testwordlist, maxmatchedHashtag)
            assertion(maxmatchedHashtag == "w h a a a a a t", "whaaaaat should be changed to w h a a a a a t.")
        elif hashtag == "findmefood":
            #test for the hashtag being made up of none of the words in the wordlist
            maxmatchedHashtag = werthman_assgn1.maxMatch("findmefood", testwordlist, maxmatchedHashtag)
            assertion(maxmatchedHashtag == "fi n d me f o o d", "findmefood should be changed to fi n d me f o o d.")
def testMinEditDistanceAlgo():
    #retrieve the wordlist and hashtag list form the file system
    wordlist = werthman_assgn1.readWordsFromFile("testwordlist.txt", True, 75000)
    hashtags = werthman_assgn1.readWordsFromFile("testhashtaglist.txt", False, 0)
    
    #use the maxmatch algo and change the hashtags and add them to a list
    maxmatchHashtags = []
    for hashtag in hashtags:
        maxmatchHashtags.append(werthman_assgn1.maxMatch(hashtag, wordlist, ""))
    
    #read in the list of what the hashtags should really look like
    correctHashtags = []
    with open("realtesthashtags.txt", "r") as f:
        for line in f:
            correctHashtags.append(line.strip())
    
    #compare each maxmatchHashtag to each correctHashtag word by word
    totalWER = 0.0
    for maxmatchHashtag, correctHashtag in zip(maxmatchHashtags, correctHashtags):
        #convert each string to a list of the words in the string
        maxmatchHashtagAsList = maxmatchHashtag.split()
        correctHashtagAsList = correctHashtag.split()
        
        #test WER for the the hashtags created by maxmatch
        if correctHashtag == "man over the moon" :
            #maxmatchHashtag is the same as the correctHashtag so no changes need to be made
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 0, "Man over the moon should have a min edit distance of 0")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 0, "Man over the moon should have a WER of 0")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)
        elif correctHashtag == "find me food":
            #maxmatchHashtag:fi ndmefood requires two substitutions fi -> find and ndmefood -> me and an insertion of food
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 7, "Find me food should have a min edit distance of 7")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 7.0/3, "Find me food should have a should have a WER of 7/3")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)
        elif correctHashtag == "help me over there":
            #masmatchHashtag:help me over the re requires a substitution the->there and a deletion of re
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 3, "help me over there should have a min edit distance of 3")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 3.0/4, "help me over there should have a WER of 3/4")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)

    #average the WER across of the hashtags
    assertion(totalWER/len(correctHashtags) == ((7.0/3 + 3.0/4)/3.0), "Average WER across test set should be .58")
def finalTestMaxMatchAlgo():
    #retrieve the wordlist and hashtag list form the file system
    wordlist = werthman_assgn1.readWordsFromFile('bigwordlist.txt', True, 75000)
    hashtags = werthman_assgn1.readWordsFromFile('hashtags-train.txt', False, 0)
    
    #use the maxmatch algo and change the hashtags and add them to a list
    maxmatchHashtags = []
    for hashtag in hashtags:
        maxmatchHashtags.append(werthman_assgn1.maxMatch(hashtag, wordlist, ""))
        
    #get the hashtags of the expected output of the maxmatch algo from professor's provided file
    expectedHashtags = []
    with open('hashtags-train-maxmatch.txt', 'r') as f:
        for line in f:
            #strip off whitespace characters like newlines
            expectedHashtags.append(line.strip())
    
    #compare the hashtags created by the maxmatch algo to those provided by the professor
    for maxmatchHashtag, expectedmaxmatchHashtag in zip(maxmatchHashtags, expectedHashtags):
        assertion(maxmatchHashtag == expectedmaxmatchHashtag, "My maxmatch algo hashtag: {0} should be the same as the professor's maxmatch algo: {1}.".format(maxmatchHashtag, expectedmaxmatchHashtag))