def main(): text = read_file_2(3) review_tokens = [get_words(asdf) for asdf in text] stopped_sent = [stopword_rem(sentence) for sentence in review_tokens] sents = [] for i in stopped_sent: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) sid = SentimentIntensityAnalyzer() sentiment_scores = [sid.polarity_scores(sent) for sent in sents] bla = [[asdf['pos'], -1 * asdf['neg']] for asdf in sentiment_scores] bla.sort() qwer = range(len(bla)) data = [go.Surface(z=bla, x=qwer, y=qwer)] layout = go.Layout(title='Sentiment Analysis', autosize=False, width=500, height=500, margin=dict(l=65, r=50, b=65, t=90)) fig = go.Figure(data=data, layout=layout) py.plot(fig, filename='Sentiment Analysis')
def demo_vader_instance(text): """ Output polarity scores for a text using Vader approach. :param text: a text whose polarity has to be evaluated. """ from vader import SentimentIntensityAnalyzer vader_analyzer = SentimentIntensityAnalyzer() print(vader_analyzer.polarity_scores(text))
def main(): text = read_file_1() para_tokens = get_words(text) sents = get_sentences(text) sent_tokens = [get_words(tokens) for tokens in sents] stopped_sent = [stopword_rem(sentence) for sentence in sent_tokens] text_score = score_keyphrases_by_textrank(text) freq = [] for words, score in text_score: for w in words.split(): freq.append(w) freq_uniq = get_frequency(freq, freq) aspect_list = [ 'laptop', 'keyboard', 'keys', 'screen', 'graphics', 'processor', 'display', 'body', 'size', 'mouse', 'trackpad', 'track', 'battery', 'sensors' ] scores = [] words = [] for word1 in aspect_list: for word2 in freq_uniq.keys(): if word1 == word2: words.append(word1) scores.append(freq_uniq[word1]) top10 = zip(words, scores) top10 = sorted(top10, key=itemgetter(1)) top10.reverse() sents = read_file_1() sid = SentimentIntensityAnalyzer() sentiment_scores = [sid.polarity_scores(sent) for sent in sents] bla = [[asdf['pos'], -1 * asdf['neg']] for asdf in sentiment_scores] bla.sort() qwer = range(len(bla)) data = [go.Surface(z=bla, x=qwer, y=qwer)] layout = go.Layout(title='Canon G3', autosize=False, width=500, height=500, margin=dict(l=65, r=50, b=65, t=90)) fig = go.Figure(data=data, layout=layout) py.plot(fig, filename='elevations-3d-surface')
def collectAllSentiments(bbcNewFldr): sentimentScoresForAnArticle = [] for filename in os.listdir(bbcNewFldr): text = readFromSingleFile(bbcNewFldr, filename) newsTokens = [getWords(asdf.lower()) for asdf in text] stopWordRmdLst = [stopwordRemove(sentence) for sentence in newsTokens] sents = [] for i in stopWordRmdLst: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) sid = SentimentIntensityAnalyzer() allReviews = ' '.join(sent for sent in sents) sentiment_scores = sid.polarity_scores(allReviews) sentimentScoresForAnArticle.append(sentiment_scores) return sentimentScoresForAnArticle
def full_labeled(texts, keywords, outfile=None): labeled_tweets = [] sia = SentimentIntensityAnalyzer() for text in texts: labels = tweet_classify(text, keywords) sentiment = sia.polarity_scores(text)['compound'] if len(labels) > 0: labeled_tweets.append([labels, text, sentiment]) if outfile is not None: num = len(labeled_tweets) outfile = 'labeled_tweets.csv' with open(outfile, 'w', encoding= 'utf-8') as f: writer = csv.writer(f) writer.writerows(labeled_tweets) print('Wrote {} lines to {}'.format(num, outfile)) else: return labeled_tweets
def main(): # read input text from json file # text = read_file(0) # tokenize the paragraph # para_tokens = get_words(text) # get sentences from the paragraph # sents = get_sentences(text) for i in range(len(files)): sents = read_file_1(i) sid = SentimentIntensityAnalyzer() sentiment_scores = [sid.polarity_scores(sent) for sent in sents] bla = [[asdf['pos'], -1 * asdf['neg']] for asdf in sentiment_scores] bla.sort() qwer = range(len(bla)) data = [go.Surface(z=bla, x=qwer, y=qwer)] layout = go.Layout(title=str(files[i]), autosize=False, width=500, height=500, margin=dict(l=65, r=50, b=65, t=90)) fig = go.Figure(data=data, layout=layout) py.plot(fig, filename=files[i])
lineIn = '\n'.join(text) tagged = getTagsForWords(lineIn) NNP = getNounPositions('NNP', tagged) top3Noun = getTop3NounAndFreq(NNP) #perform the summarization multiple time and compute sentiment information each time for pi in percentageOfSummary: reducedSummaryWithReplc = mainSummaryCalling(lineIn, pi) review_tokens = [ getWords(asdf.lower()) for asdf in reducedSummaryWithReplc ] stopped_sent = [stopwordRemove(sentence) for sentence in review_tokens] sents = [] for i in stopped_sent: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) sid = SentimentIntensityAnalyzer() allReviews = ' '.join(sent for sent in sents) sentiment_scores = sid.polarity_scores(allReviews) positiveScores.append(sentiment_scores['posScore']) negativeScores.append(sentiment_scores['negScore']) compoundScores.append(sentiment_scores['compoundScore']) sentiment_scoresForAnArticle.append([top3Noun, sentiment_scores]) print(sentiment_scores['posScore']) #plot the 3D column chart plotMatrix3DColumnNounPosiNegiScore(sentiment_scoresForAnArticle) #plot the 3D column chart plotMatrix3DColumnNounCompundScoreOccurence(sentiment_scoresForAnArticle)
filenames=collectAllFileNms(amazonRevFldr) reviewContent=collectAllReviews(filenames,amazonRevFldr) #preprocessing the customer reviews reviewTokens = [getWords(asdf) for asdf in reviewContent if asdf!=None] stopWordRmdLst = [stopwordRemove(sentence) for sentence in reviewTokens] sents = [] for i in stopWordRmdLst: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) sid = SentimentIntensityAnalyzer() sentimentScores=[] #choose valid sentiment oputput for sent in sents: if(len(sent)>0): sentimentScores.append(sid.polarity_scores(sent)) #make the sentiment classes as positive and negative documents=[] for si in sentimentScores: if si['compoundScore']>=0: category=1#'pos' else: category=0#'neg' documents.append((list(si['wordsWithEmotion']),category))
print(filenm) filenames.append(filenm) product = [] reviewTitle = [] reviewContent = [] for i in range(len(filenames)): print(filenames[i]) with open(path + '/' + filenames[i]) as dataFile: data = json.load(dataFile) product.append(data['ProductInfo']) for reviews in data['Reviews']: reviewTitle.append(reviews['Title']) reviewContent.append(reviews['Content']) text = reviewContent #text = read_file_2(3) reviewTokens = [getWords(asdf) for asdf in text if asdf != None] stoppWrdRmdSentence = [stopwordsRemove(sentence) for sentence in reviewTokens] sents = [] for i in stoppWrdRmdSentence: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) #perform sentiment analysis on reviews sid = SentimentIntensityAnalyzer() sentiment_scores = [sid.polarity_scores(sent) for sent in sents] ##represent sentiment score to matrixs (matrix, numOfEle) = sentimentScoresToMatrix(sentiment_scores) #perform 3D surface plotting of matrix of sentiments plotMatrix3DSurfByMatPlotLib(matrix, numOfEle, numOfEle)
product.append(data['ProductInfo']) for reviews in data['Reviews']: reviewTitle.append(reviews['Title']) reviewContent.append(reviews['Content']) text = reviewContent #text = read_file_2(3) reviewTokens = [getWords(asdf) for asdf in text if asdf != None] stoppWrdRmdSentence = [stopwordsRemove(sentence) for sentence in reviewTokens] sents = [] for i in stoppWrdRmdSentence: asdf = '' for j in i: asdf = asdf + j + ' ' sents.append(asdf) #perform sentiment analysis on reviews sid = SentimentIntensityAnalyzer() sentiment_scores = [sid.polarity_scores(sent) for sent in sents] ##represent sentiment score to matrixs (matrix, numOfEle) = sentimentScoresToMatrix(sentiment_scores) #write reviews and sentiments to excel writeToExclReviewSentiStat(sents, sentiment_scores) #perform sentiment analysis on all review comments allReviews = ' '.join(sent for sent in sents) sentiment_scoresAllRevs = sid.polarity_scores(allReviews) # for all reviews #compute word frequency wordsAndFreqs = Counter(sentiment_scoresAllRevs['wordsWithEmotion']) #compute word frequency and score wordsFreqsScore = getWordFreqAndScore(sentiment_scoresAllRevs, wordsAndFreqs) #perform sorting words based on their sentiment score in descending order
def demo_vader_tweets(n_instances=None, output=None): """ Classify 10000 positive and negative tweets using Vader approach. :param n_instances: the number of total tweets that have to be classified. :param output: the output file where results have to be reported. """ from collections import defaultdict from nltk.corpus import twitter_samples from vader import SentimentIntensityAnalyzer from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision, recall as eval_recall, f_measure as eval_f_measure) if n_instances is not None: n_instances = int(n_instances/2) fields = ['id', 'text'] positive_json = twitter_samples.abspath("positive_tweets.json") positive_csv = 'positive_tweets.csv' json2csv_preprocess(positive_json, positive_csv, fields, strip_off_emoticons=False, limit=n_instances) negative_json = twitter_samples.abspath("negative_tweets.json") negative_csv = 'negative_tweets.csv' json2csv_preprocess(negative_json, negative_csv, fields, strip_off_emoticons=False, limit=n_instances) pos_docs = parse_tweets_set(positive_csv, label='pos') neg_docs = parse_tweets_set(negative_csv, label='neg') # We separately split subjective and objective instances to keep a balanced # uniform class distribution in both train and test sets. train_pos_docs, test_pos_docs = split_train_test(pos_docs) train_neg_docs, test_neg_docs = split_train_test(neg_docs) training_tweets = train_pos_docs+train_neg_docs testing_tweets = test_pos_docs+test_neg_docs vader_analyzer = SentimentIntensityAnalyzer() gold_results = defaultdict(set) test_results = defaultdict(set) acc_gold_results = [] acc_test_results = [] labels = set() num = 0 for i, (text, label) in enumerate(testing_tweets): labels.add(label) gold_results[label].add(i) acc_gold_results.append(label) score = vader_analyzer.polarity_scores(text)['compound'] if score > 0: observed = 'pos' else: observed = 'neg' num += 1 acc_test_results.append(observed) test_results[observed].add(i) metrics_results = {} for label in labels: accuracy_score = eval_accuracy(acc_gold_results, acc_test_results) metrics_results['Accuracy'] = accuracy_score precision_score = eval_precision(gold_results[label], test_results[label]) metrics_results['Precision [{0}]'.format(label)] = precision_score recall_score = eval_recall(gold_results[label], test_results[label]) metrics_results['Recall [{0}]'.format(label)] = recall_score f_measure_score = eval_f_measure(gold_results[label], test_results[label]) metrics_results['F-measure [{0}]'.format(label)] = f_measure_score for result in sorted(metrics_results): print('{0}: {1}'.format(result, metrics_results[result])) if output: output_markdown(output, Approach='Vader', Dataset='labeled_tweets', Instances=n_instances, Results=metrics_results)
def main(): #read input text from json file text = read_file(0) #tokenize the paragraph para_tokens = get_words(text) #get sentences from the paragraph sents = get_sentences(text) #tokenize each sentence sent_tokens = [get_words(tokens) for tokens in sents] #remove stopwords from each sentence stopped_sent = [stopword_rem(sentence) for sentence in sent_tokens] #Extract aspects from the sample text text_score = score_keyphrases_by_textrank(text) #Calculate the frequency of the aspects freq = [] for words, score in text_score: for w in words.split(): freq.append(w) #Calculate the frequency of each word freq_uniq = get_frequency(freq, freq) # aspect_list = [ 'laptop', 'keyboard', 'keys', 'screen', 'graphics', 'processor', 'display', 'body', 'size', 'mouse', 'trackpad', 'track', 'battery', 'sensors' ] #get top 10 aspects based on frequency scores = [] words = [] for word1 in aspect_list: for word2 in freq_uniq.keys(): if word1 == word2: words.append(word1) scores.append(freq_uniq[word1]) top10 = zip(words, scores) top10 = sorted(top10, key=itemgetter(1)) top10.reverse() #get the lines that the aspects occur in i = 0 aspect_sent = [] aspect_topic = [] for qwer in sents: for top in top10: for bla in qwer.split(): if top[0] == bla: aspect_sent.append(i) aspect_topic.append(top[0]) i = i + 1 # aspect_sent = zip(aspect_topic,aspect_sent) # aspect1 = ['graphics', 'screen', 'size'] # aspect2 = ['keyboard', 'keys', 'key'] aspect_sent_uniq = list(set(aspect_sent)) sid = SentimentIntensityAnalyzer() sentiment_scores = [ sid.polarity_scores(sents[i]) for i in aspect_sent_uniq ] pos_sents = "" neg_sents = "" j = 0 print "\n\n Positive: \n" for i in range(0, len(aspect_sent_uniq)): if sentiment_scores[i]['pos'] > sentiment_scores[i]['neg'] and j < 10: print sents[aspect_sent_uniq[i]] pos_sents = sents[aspect_sent_uniq[i]] + " " j = j + 1 print "\n\nPositive Sentences Polarity:" print sid.polarity_scores(pos_sents) j = 0 print "\n\n Negative: \n" for i in range(0, len(aspect_sent_uniq)): if sentiment_scores[i]['neg'] > sentiment_scores[i]['pos'] and j < 10: print sents[aspect_sent_uniq[i]] neg_sents = sents[aspect_sent_uniq[i]] + " " j = j + 1 print "\n\nNegitive Sentences Polarity:" print sid.polarity_scores(neg_sents)