def TextBlobCleanEmoji(): ''' TextBlob model with Emoticon scoring. ''' tweet_counter = 0 with open("results_textblob_emoji.txt", "w", encoding="utf-8") as preresults: newWriter = csv.writer(preresults, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) with open("raw_twitter.txt", "r", encoding="utf-8") as preproccessed: for line in preproccessed.readlines(): tweet_counter += 1 try: print("Processing tweet: {}".format(tweet_counter)) tweet = tweetCleaner.lowercase(line) tweet = tweetCleaner.StopWordRemover(tweet) tweet = tweetCleaner.removeSpecialChars(tweet) tweet, score = tweetProcesser.emoticon_score(tweet) tweet = tweetCleaner.removeAllNonAlpha(tweet) tweet = tweetCleaner.lemmatizer(tweet) wiki = TextBlob(tweet) normalized_score, sentiment_label = tweetProcesser.sentimentClassifier( wiki, score) newWriter.writerow( [normalized_score, sentiment_label, tweet]) except: newWriter.writerow(["0", "neutral", "ERROR"]) print("ERROR processing tweet: {}".format(tweet_counter))
def NLTKCleanAbbrevEmoji(): """ NLTK model with extended abbreviations AND emoticon scoring """ tweet_counter = 0 tweetProcesser.abbreviation_extender() with open("results_nltk_abbrev_emoji.txt", "w", encoding="utf-8") as postresults: newWriter = csv.writer(postresults, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) with open("abbreviations_twitter.txt", "r", encoding="utf-8") as postprocessed: for line in postprocessed.readlines(): total_score = 0 tweet_counter += 1 try: print("Processing tweet: {}".format(tweet_counter)) tweet = tweetCleaner.lowercase(line) tweet = tweetCleaner.StopWordRemover(tweet) tweet = tweetCleaner.removeSpecialChars(tweet) tweet, total_score = tweetProcesser.emoticon_score(tweet) tweet = tweetCleaner.removeAllNonAlpha(tweet) tweet = tweetCleaner.lemmatizer(tweet) lines_list = tokenize.sent_tokenize(tweet) for line in lines_list: ss = sentiment.polarity_scores(line) total_score -= ss["neg"] total_score += ss["pos"] total_score = round(total_score, 3) if total_score == 0: newWriter.writerow([0, "neutral"]) elif total_score > 0: newWriter.writerow([total_score, "positive"]) else: newWriter.writerow([total_score, "negative"]) except: newWriter.writerow([0, "neutral"]) print("ERROR processing tweet: {}".format(tweet_counter))
def NLTKCleanRaw(): ''' Raw NLTK model ''' tweet_counter = 0 with open("results_nltk_raw.txt", "w", encoding="utf-8") as postresults: newWriter = csv.writer(postresults, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) with open("raw_twitter.txt", "r", encoding="utf-8") as postprocessed: for line in postprocessed.readlines(): total_score = 0 tweet_counter += 1 try: print("Processing tweet: {}".format(tweet_counter)) tweet = tweetCleaner.lowercase(line) tweet = tweetCleaner.StopWordRemover(tweet) tweet = tweetCleaner.removeSpecialChars(tweet) tweet = tweetCleaner.removeAllNonAlpha(tweet) tweet = tweetCleaner.lemmatizer(tweet) lines_list = tokenize.sent_tokenize(tweet) for sentence in lines_list: ss = sentiment.polarity_scores(sentence) total_score -= ss["neg"] total_score += ss["pos"] total_score = round(total_score, 3) if total_score == 0: newWriter.writerow([0, "neutral"]) elif total_score > 0: newWriter.writerow([total_score, "positive"]) else: newWriter.writerow([total_score, "negative"]) except: newWriter.writerow([0, "neutral"]) print("ERROR processing tweet: {}".format(tweet_counter))