def runModel(): print("Model running mode!\n") print("Collecting 10 tweets to iterate through!") data.testTweets = tweetstreamer.getTweets(10) print("Use learning?") goodinput = 0 while(not goodinput): inputstring = raw_input("(y/n): ") if(inputstring == "y"): useLearning = True goodinput = 1 elif(inputstring == "n"): useLearning = False goodinput = 1 else: print("Please enter y or n") # runModel main loop while(len(data.testTweets) != 0): print("\n1: Evaluate new tweet\n2: Exit running mode") inputstring = raw_input("Enter option: ") if(inputstring == "1"): newTweet = data.testTweets.pop() wordsInTweet = re.findall(r'\w+', newTweet.lower()) probTweetFlu = probOfFlu(wordsInTweet) isTweetFlu = probTweetFlu > data.THRESHOLD if(isTweetFlu): data.numFluTweets += 1 try: print("\nNew tweet: " + str(newTweet)) print("Probability the user has flu: " + str(probTweetFlu)) except UnicodeEncodeError: print("Codec cannot encode characters in this tweet!") if(useLearning): learner.updateProbs(wordsInTweet, isTweetFlu) elif(inputstring == "2"): return else: print("\nThat is not an option!\n") print("All test tweets evaluated!!!")
def trainModel(): # Check if probabilities already exist and ask user if they want to use these probs if(len(data.probabilities) != 0): print("Saved model already exists.\nUse this model?\n") while(True): inputstring = raw_input("(y/n): ") if(inputstring == "y"): print("Model trained!") return elif(inputstring == "n"): data.probabilities.clear() break elif(inputstring == "q"): print("Good bye!") quit() ## TODO EXIT ## else: print("Please enter y, n, or q to quit") # Read tweets and categorization data from files if(os.path.exists("./tweets.txt")): tweetsFile = open("./tweets.txt","r") data.tweetsStored = int(tweetsFile.readline()) tweetsFile.close() print("Found tweets.txt with " + str(data.tweetsStored) + " tweets in it!") # Load tweets into memory readTweetsFromFile() # Check if training data already exists for these tweets if(os.path.exists("./trainingdata.txt")): trainingDataFile = open("./trainingdata.txt") data.tweetsCategorized = int(trainingDataFile.readline()) trainingDataFile.close() print("Found trainingdata.txt with " + str(data.tweetsCategorized) + " categorizations in it!") if(data.tweetsStored < data.tweetsCategorized): # We have more categorized tweets than tweets themselves. Error print("Mismatch: more tweets are categorized than exist! Disregarding categorization data.") data.tweetsCategorized = 0 else: readCategorizationFromFile() else: print("No categorization data exists for these tweets!\n") # Ask the user how many tweets they would like in the training set print("Please enter the number of tweets you would like in the training set") inputstring = raw_input(">> ") tweetsWanted = int(inputstring) # If we do not have enough tweets as the user requested if(data.tweetsStored < tweetsWanted): print(str(data.tweetsStored) + " tweets already stored.") print(str(tweetsWanted - data.tweetsStored) + " more tweets needed.") print("Append more tweets or overwrite and collect new tweets?") while(True): inputstring = raw_input("(a/o): ") if(inputstring == "a"): data.tweets.append(tweetstreamer.getTweets(tweetsWanted - data.tweetsStored, data.searchterms)) trainer.categorizeTweets() break elif(inputstring == "o"): data.tweetsStored = 0 data.tweetsCategorized = 0 data.tweets = tweetstreamer.getTweets(tweetsWanted - data.tweetsStored, data.searchterms) trainer.categorizeTweets() break elif(inputstring == "q"): print("Good bye!") ## TODO EXIT ## else: print("Please enter a to append, o to overwrite, or q to quit") # We have more than enough tweets elif(data.tweetsStored >= tweetsWanted): print(str(data.tweetsStored) + " tweets already stored.") print("Use these tweets or overwrite and collect new tweets?") while(True): inputstring = raw_input("(u/o): ") if(inputstring == "u"): readCategorizationFromFile() break elif(inputstring == "o"): data.tweetsStored = 0 data.tweetsCategorized = 0 data.tweets = tweetstreamer.getTweets(tweetsWanted - data.tweetsStored, data.searchterms) trainer.categorizeTweets() break elif(inputstring == "q"): print("Good bye!") quit() else: print("Please enter a to append, o to overwrite, or q to quit") # Now we have the tweets and some categorization or not # Model is now trained # Model is stored in probabilities data.tweetsStored = len(data.tweets) trainer.calculateProbs()