def getUnigramPerplexity(): ''' Controller method to find the perplexity of all the test books with all the genres ''' unigram_model = loadUnigramModels() #Reads in the unigrams one file at a time and stores it with their bookname book_tokens = {} for genre in genres: print("\nReading test files for genre {0}".format(genre)) for path in os.listdir(test_path + genre): book_tokens[path] = getTokensForFile(test_path + genre + '/' + path) #Computes the perplixity of every test corpus against each of the unigram models created book_perplexity = defaultdict(dict) for book, unigrams in book_tokens.iteritems(): print('') for genre, model in unigram_model.iteritems(): book_perplexity[book][genre] = computeUnigramPerplexity(model, unigrams) print("Perplexity of '{0}' book on {1} genre model: {2}".format(book,genre,book_perplexity[book][genre])) return book_perplexity
def main(): ''' Controller method for loading the models and calling the sentence generation methods ''' #Generate the unigram model for all the genres or load it from memory unigram_model = loadUnigramModels() #Generate random sentences from the unigram model which ends as soon as sentence end character is presented generateRandomSentenceFromUnigram(unigram_model) #Generate the bigram model for all the genres bigram_model = loadBigramModels('BigramSentenceModel') #Generate random sentences from the bigram model with default seed and n=200 generateRandomSentenceFromBigram(bigram_model, n=200) #Generate random sentences from the bigram model with custom seed and n=100 #Will consider <START> character as seed for history which has not been specified bigram_seed = { 'children':'sjbdsabdoisabdoisbdoias', 'crime':'killed' } generateRandomSentenceFromBigram(bigram_model, seed = bigram_seed, n=200)