def getting_sentiment(word,pos): flag = 0 if 'NN' in pos: tag = 'n' elif 'JJ' in pos: tag = 'a' if pos == 'JJS': flag = 1 elif 'VB' in pos: tag = 'v' elif 'RB' in pos: tag = 'r' else: tag = '' stemmer = WordNetLemmatizer() if tag != '': x = stemmer.lemmatize(word,tag) else: x = stemmer.lemmatize(word) try: score = float(score_dic[x]) #* float(m1) except KeyError: if len(swn.senti_synsets(x,tag)) > 0: score = swn.senti_synsets(x,tag)[0].pos_score() * 5 else: score = 100 if flag == 1 and score != -100 and score < 4: score = score + 1 elif flag == 1 and score != -100 and score > -4 and score < 0: score = score - 1 print word + '--->' + str(score) return score
def sentiwordnetSentimentWordsPresenceFeatures(wordsTagged): features = {} for word, tag in wordsTagged: wordnetTag = translateFromNltkToWordnetTag(tag) wordNegated = isWordNegated(word) word = stripNegation(word) if wordnetTag: synsets = list(sentiwordnet.senti_synsets(word, wordnetTag)) if not synsets: synsets = list(sentiwordnet.senti_synsets(word)) else: synsets = list(sentiwordnet.senti_synsets(word)) if len(synsets) > 0: synset = synsets[0] if synset.pos_score() > 0.5: if wordNegated: features["neg_word_presence"] = True else: features["pos_word_presence"] = True if synset.neg_score() > 0.5: if wordNegated: features["pos_word_presence"] = True else: features["neg_word_presence"] = True return features
def get_score(adjective): if adjective not in d: scores = swn.senti_synsets(adjective) pos_scores = [i.pos_score() for i in scores] neg_scores = [i.neg_score() for i in scores] obj_scores = [i.obj_score() for i in scores] pos_score = maxi(pos_scores) neg_score = maxi(neg_scores) obj_score = maxi(obj_scores) if len(scores) == 0: d[adjective] = (-1,-1,-1,-1,-1,-1) return (-1,-1,-1,-1,-1,-1) scores_ad = swn.senti_synsets(adjective,pos='ar') pos_scores_ad = [i.pos_score() for i in scores_ad] neg_scores_ad = [i.neg_score() for i in scores_ad] obj_scores_ad = [i.obj_score() for i in scores_ad] pos_score_ad = maxi(pos_scores_ad) neg_score_ad = maxi(neg_scores_ad) obj_score_ad = maxi(obj_scores_ad) d[adjective] = (pos_score,neg_score,obj_score,pos_score_ad,neg_score_ad,obj_score_ad) else: (pos_score,neg_score,obj_score,pos_score_ad,neg_score_ad,obj_score_ad) = d[adjective] if pos_score == -1: return -100 if pos_score_ad > neg_score_ad: return pos_score_ad elif pos_score_ad < neg_score_ad: return -neg_score_ad elif pos_score > neg_score: return pos_score elif pos_score < neg_score: return -neg_score else: return 0
def processoFeatures(resposta): frases = tokenizerFrases.tokenize(resposta["corpo"]) palavras = [] palavrasTexto = {} for frase in frases: palavrasTemp = tokenizerPalavras.tokenize(frase) for palavra in palavrasTemp: palavrasTexto[palavra] = True posTags = pos_tag(palavras) positivo = 0 negativo = 0 for palavra, tag in posTags: synsets = None if tag.startswith("J"): synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADJ) elif tag.startswith("V"): synsets = sentiwordnet.senti_synsets(palavra, wordnet.VERB) elif tag.startswith("N"): synsets = sentiwordnet.senti_synsets(palavra, wordnet.NOUN) elif tag.startswith("R"): synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADV) else: synsets = sentiwordnet.senti_synsets(palavra, "") if synsets != None: synsets = list(synsets) if len(synsets) > 0: synset = synsets[0] positivo = positivo + synset.pos_score() negativo = negativo + synset.neg_score() if positivo > negativo: return (palavrasTexto, "positivo") elif negativo > positivo: return (palavrasTexto, "negativo") else: return (palavrasTexto, "neutro")
def analiseSentimento(resposta): texto = resposta['corpo'] frases = sentencesTokenizer.tokenize(texto) palavras = [] for frase in frases: palavras.extend(wordsTokenizer.tokenize(frase)) posTags = pos_tag(palavras) positivo = 0 negativo = 0 for palavra, tag in posTags: synsets = None if tag.startswith('J'): synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADJ) elif tag.startswith('V'): synsets = sentiwordnet.senti_synsets(palavra, wordnet.VERB) elif tag.startswith('N'): synsets = sentiwordnet.senti_synsets(palavra, wordnet.NOUN) elif tag.startswith('R'): synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADV) else: synsets = sentiwordnet.senti_synsets(palavra, '') if synsets != None: synsets = list(synsets) if len(synsets) > 0: synset = synsets[0] positivo = positivo + synset.pos_score() negativo = negativo + synset.neg_score() if positivo > negativo: return (resposta, 'positivo') elif negativo > positivo: return (resposta, 'negativo') else: return (resposta, 'neutro')
def senti_analisys(tokens): #print tokens scorePosTot = 0 scoreNegTot = 0 scoreObjTot = 0 scoreObjNorm = scoreNegNorm = scorePosNorm = 0 count = 0 for token,part in tokens: if part.startswith("JJ") or part.startswith("NN") or part.startswith("VB"): scorePos = 0 scoreNeg = 0 scoreObj = 0 #print swn.senti_synsets(token) #if token == "wonderful": # print "i'm "+token,swn.senti_synsets(token) if swn.senti_synsets(token) != []: list_synset = list(swn.senti_synsets(token)) dim_synset = list_synset.__len__() for i in list_synset: scorePos += i.pos_score() scoreNeg += i.neg_score() scoreObj += i.obj_score() scorePos = scorePos / dim_synset scoreNeg = scoreNeg / dim_synset scoreObj = scoreObj / dim_synset #print "The token is: "+token + "\n\tscore pos: "+str(round(scorePos,2)) + "\n\tscore neg: "+str(round(scoreNeg,2))+\ #"\n\tscore obj: " + str(round(scoreObj,2)) scorePosTot += scorePos scoreNegTot += scoreNeg scoreObjTot += scoreObj count += 1 if count != 0: scorePosNorm = scorePosTot / count scoreNegNorm = scoreNegTot / count scoreObjNorm = scoreObjTot / count #print "NORM: "+ "\n\tscorePOS: "+str(round(scorePosNorm,2)) + "\n\tscoreNEG: "+str(round(scoreNegNorm,2)) \ #+ "\n\tscoreOBJ: "+str(round(scoreObjNorm,2)) if scoreNegNorm < scorePosNorm : #print "POSITIVE" return 1,scorePosNorm elif scoreNegNorm > scorePosNorm: #print "NEGATIVE" return -1,-scoreNegNorm else: #print "OBJECTIVE" return 0,0
def get_net_pos_neg(word): netPos = 0 netNeg = 0 if len(list(swn.senti_synsets(word))) != 0: sentisyn = list(swn.senti_synsets(word)) for item in sentisyn: netPos += item.pos_score() netNeg += item.neg_score() return netPos, netNeg
def strip_proppers_POS(text, search): text = text.decode('utf-8', 'ignore') tokens = nltk.word_tokenize(text.lower()) tagged = nltk.tag._pos_tag(tokens, tagset, tagger) res = [] search_index = [i for i, val in enumerate(tokens) if (p.singular_noun(val) == search or (not p.singular_noun(val) and val == search)) ] words = [(word, pos) for word, pos in tagged if (pos[0] == "J") and len(word) > 2 and word not in stop and not p.singular_noun(word) and eng_check.check(word) and not any(ccc.isdigit() for ccc in word)] adj_count = 0 for a in range(0, len(tagged)): if tagged[a] in words: if tagged[a][1][0] == "J": adj = tagged[a][0] dist = min([abs(a-s) for s in search_index]) score = 0 adj_synset = swn.senti_synsets(adj, 'a') if len(adj_synset) <= 0: adj_synset = swn.senti_synsets(adj, 'v') if len(adj_synset) <= 0: synonyms = [] for ss in wn.synsets(adj): for j in ss.lemma_names(): synonyms.append(j) if len(synonyms) > 1: synonym_count = 0 for s in range(0, len(synonyms)): if synonym_count < 2 and synonyms[s] != adj: w1 = synonyms[s] adj_synset1 = swn.senti_synsets(w1, 'a') if len(adj_synset1) > 0: score += adj_synset1[0].pos_score()\ - adj_synset1[0].neg_score() synonym_count += 1 score = score/2 else: score = adj_synset[0].pos_score() \ - adj_synset[0].neg_score() try: res.append((adj, score/(pow(dist, 2)))) adj_count += 1 except: pass return (res, adj_count)
def getSentimentOfWord(self, word): try: sentSet = list(swn.senti_synsets(word)) except: #print("swn.senti_synsets(word) threw an error") return 0 #if not found, assume objective word if len(sentSet) == 0: #print('empty sentSet for word '+word) return 0 #else: #print('non empty sentSet for word '+word) totalPos = 0 totalNeg = 0 totalObj = 0 for sentiword in sentSet: totalPos += sentiword.pos_score() totalNeg += sentiword.neg_score() totalObj += sentiword.obj_score() totalPos = totalPos / len(sentSet) totalNeg = totalNeg / len(sentSet) totalObj = totalObj / len(sentSet) #determine sentiment if (totalPos >= totalObj) and (totalPos >= totalNeg): return 1 if (totalNeg >= totalObj) and (totalNeg >= totalPos): return -1 if (totalObj >= totalPos) and (totalObj >= totalNeg): return 0
def sentiwordnetSentimentScoreFeatures(wordsTagged): posScoreSum = 0.0 negScoreSum = 0.0 for word, tag in wordsTagged: wordnetTag = translateFromNltkToWordnetTag(tag) word = stripNegation(word) if wordnetTag: synsets = list(sentiwordnet.senti_synsets(word, wordnetTag)) else: synsets = list(sentiwordnet.senti_synsets(word)) if len(synsets) > 0: synset = synsets[0] posScoreSum = synset.pos_score() negScoreSum = synset.neg_score() return {"pos_neg_score": posScoreSum - negScoreSum}
def sentiment(word): # print word posScore = 0 negScore = 0 if word[:1] == "~" and len(getAntonyms(word[1:])) != 0: word = getAntonyms(word[1:]).keys()[0] opinions = swn.senti_synsets(word) for o in list(opinions): negScore += o.neg_score() posScore += o.pos_score() # print "POS " + str(posScore) # print "NEG " + str(negScore) negWords = ['rude','arrogant','boring','difficult','terrible','hard','dull','long','tricky','impossible','long','intimidating','ridiculous','tough','challenging'] posWords = ['exciting', 'cool','smart','incredible','super','great','good','excellent','engaging','clear','entertaining','interesting','easy','straightforward','helpful','amazing','awesome','related','funny','doable'] if word.lower() in negWords: return 'neg' elif word.lower() in posWords: return 'pos' if word[:1] == "~": if word[1:].lower() in negWords: return 'pos' elif word[1:].lower() in posWords: return 'neg' if posScore > negScore: return 'pos' elif posScore < negScore: return 'neg' else: return 'neut'
def score(self, tokens): pos_value = 0.0 neg_value = 0.0 obj_value = 0.0 #TODO disambiguation via POS tagging using nps_chat or Brown Corpus nltk_tagged = nltk.pos_tag(tokens) #print(nltk_tagged) lengthOfData = 0 for word in nltk_tagged: meanings = list(swn.senti_synsets(word[0], self.get_wordnet_pos(word[1]))) if len(meanings) > 0: wordSynset0 = meanings[0] pos_value += wordSynset0.pos_score() neg_value += wordSynset0.neg_score() obj_value += wordSynset0.obj_score() lengthOfData += 1 if lengthOfData > 0: pos_value = pos_value neg_value = neg_value obj_value = obj_value/lengthOfData return [ pos_value , neg_value, obj_value]
def main(): # str=['s','a','f','e','v','i','w','j','k','x','k'] # for i in str: # for j in str: # i = raw_input("Length:") str = raw_input("Letter sequence:") f = open("combinations.txt",'w') k = list(itertools.permutations(str,int(i))) for i in k: for l in i: f.write(l) f.write('\n') f.close() h = [] with open('combinations.txt') as hai: h = [word.lower().strip() for word in hai ] dic = {} for o in h: if o not in dic: dic [o]= 0 else: dic[o] += 1 m = open("out.txt",'w') for l in dic: v= list(swn.senti_synsets(l)) if v: m.write(l) m.write('\n')
def getSentimentFeatures(feats, text, prefix): pos_sum = 0 neg_sum = 0 most_positive = 0 most_negative = 0 for string in text.lower().split(' '): if len(string) > 0 and string[0] == '#': string = string[1:] senti_synset = list(swn.senti_synsets(string)) if len(senti_synset) > 0: senti_synset = senti_synset[0] #just use the 1st one for now pos_score = senti_synset.pos_score() if pos_score > most_positive: most_positive = pos_score pos_sum += pos_score neg_score = senti_synset.neg_score() if neg_score > most_negative: most_negative = neg_score neg_sum += neg_score feats[prefix + 'POS_SUM'] = pos_sum feats[prefix + 'NEG_SUM'] = neg_sum feats[prefix + 'MEAN_POS_NEG'] = (pos_sum + neg_sum) / 2.0 feats[prefix + 'POS_NEG_GAP'] = pos_sum - neg_sum feats[prefix + 'SINGLE_POS_GAP'] = most_positive - (pos_sum + neg_sum) / 2.0 feats[prefix + 'SINGLE_NEG_GAP'] = most_negative - (pos_sum + neg_sum) / 2.0
def main(): args = sys.argv if(len(args) != 2): print('usage: python proj filename') return -1 lines = [] values = [] data = open(args[1], 'r') for line in data: temp = line.split('.') for sen in temp: tokens = word_tokenize(sen.strip('\n').strip(',').strip('-')) if tokens != []: lines.append(tokens) total_pos = 0 total_neg = 0 for line in lines: pos = 0.0 neg = 0.0 pcount = 0 ncount = 0 for word in line: sp = 0 sn = 0 sub_pos = 0 sub_neg = 0 x = swn.senti_synsets(word) for a in x: if(a.pos_score() > 0): sub_pos += 1 sp += a.pos_score() if a.neg_score() > 0: sub_neg += 1 sn += a.neg_score() # if(sub_pos != 0): # sp /= sub_pos # if(sub_neg != 0): # sn /= sub_neg pos += sp neg += sn if sp > 0: pcount += 1 if sn > 0: ncount += 1 if(pos == 0) or (neg == 0): values.append((pos, neg)) total_pos += pos total_neg += neg else: values.append((pos/(pos+neg), neg/(pos+neg))) total_pos += (pos/(pos+neg)) total_neg += (neg/(pos+neg)) print(str(total_pos / len(values)) + ',' + str(total_neg / len(values))) for x in range(0, len(lines)): # print('sentence: ' + str(lines[x])) print(str(values[x][0]) + ',' + str(values[x][1]))
def getNumberOfAppearances(self,tokens): total = 0 for token in tokens: if len(s.senti_synsets(token)) > 0: total += 1 return total
def get_sentiment_score(ls): ''' input type sentence this method estimate a score for the sentence based on the swn model ''' from nltk.tokenize import word_tokenize import re word_list = word_tokenize(ls.content) punctuation = re.compile(r'[-.?!,":;()|0-9]') # remove these punctuations and number word_list = [punctuation.sub("", word) for word in word_list] word_list = filter(None, word_list) #filters empty ls.tokens = word_list pos_score = 0.0 neg_score = 0.0 num_valid_word = 0.1 for w in word_list: try: res = swn.senti_synsets(w) pos_score += res[0]._pos_score neg_score += res[0]._neg_score num_valid_word += 1 except: pass ls.score = (pos_score - neg_score)/num_valid_word """
def SentimentAnalysis_RGO_Belief_Propagation(nxg): #Bayesian Pearl Belief Propagation is done by #assuming the senti scores as probabilities with positive #and negative signs and the Recursive Gloss Overlap #definition graph being the graphical model. #Sentiment as a belief potential is passed through #the DFS tree of this graph. dfs_positive_belief_propagated=1.0 core_positive_belief_propagated=1.0 dfs_negative_belief_propagated=1.0 core_negative_belief_propagated=1.0 core_xnegscore=core_xposscore=1.0 dfs_knegscore=dfs_kposscore=dfs_vposscore=dfs_vnegscore=1.0 sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) kcore_nxg=nx.k_core(nxg,6,nx.core_number(nxg)) for x in sorted_core_nxg: xsset = swn.senti_synsets(x[0]) if len(xsset) > 2: core_xnegscore = float(xsset[0].neg_score())*10.0 core_xposscore = float(xsset[0].pos_score())*10.0 if core_xnegscore == 0.0: core_xnegscore = 1.0 if core_xposscore == 0.0: core_xposscore = 1.0 core_positive_belief_propagated *= float(core_xposscore) core_negative_belief_propagated *= float(core_xnegscore) print "Core Number: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(core_positive_belief_propagated), float(core_negative_belief_propagated)) #for k,v in nx.dfs_edges(nxg): for k,v in nx.dfs_edges(kcore_nxg): ksynset = swn.senti_synsets(k) vsynset = swn.senti_synsets(v) if len(ksynset) > 2: dfs_knegscore = float(ksynset[0].neg_score())*10.0 dfs_kposscore = float(ksynset[0].pos_score())*10.0 if len(vsynset) > 2: dfs_vnegscore = float(vsynset[0].neg_score())*10.0 dfs_vposscore = float(vsynset[0].pos_score())*10.0 dfs_kposscore_vposscore = float(dfs_kposscore*dfs_vposscore) dfs_knegscore_vnegscore = float(dfs_knegscore*dfs_vnegscore) if dfs_kposscore_vposscore == 0.0: dfs_kposscore_vposscore = 1.0 if dfs_knegscore_vnegscore == 0.0: dfs_knegscore_vnegscore = 1.0 dfs_positive_belief_propagated *= float(dfs_kposscore_vposscore) dfs_negative_belief_propagated *= float(dfs_knegscore_vnegscore) print "K-Core DFS: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(dfs_positive_belief_propagated),float(dfs_negative_belief_propagated)) return (dfs_positive_belief_propagated, dfs_negative_belief_propagated, core_positive_belief_propagated, core_negative_belief_propagated)
def __getitem__(self, k): synsets = list(swn.senti_synsets(k)) if synsets: p, n = synsets[0].pos_score(), synsets[0].neg_score() v = (float(p) - float(n), float(p) + float(n)) return v else: return None
def sentiWordNetProc(self, sentence): #D = {} if sentence == "": D = [0,0,0,0,0] return D s = nltk.word_tokenize(sentence) l = len(s) num_pos_tokens = 0 num_neg_tokens = 0 maximal_sentiment = 0 sentimentScores = list() posSentimentSum = 0 negSentimentSum = 0 for i in range(l): senti_type = swn.senti_synsets(s[i]) if senti_type: n = senti_type[0].neg_score() p = senti_type[0].pos_score() if (i-1) >= 0: if s[i-1] in negation_list: p = float(n-p)/2 + p n = float(p-n)/2 + n elif (i-2) >= 0: if s[i-2] in negation_list: p = float(n-p)/2 + p n = float(p-n)/2 + n posSentimentSum+=float(p) negSentimentSum+=float(n) if float(p)>float(n): num_pos_tokens+=1 sentimentScores.append(float(p)) else: num_neg_tokens+=1 sentimentScores.append(float(n)) if len(sentimentScores)>0: maximal_sentiment = max(sentimentScores) else: maximal_sentiment = 0 D = [] D.append(num_pos_tokens) D.append(num_neg_tokens) D.append(maximal_sentiment) D.append(posSentimentSum) D.append(negSentimentSum) return D
def remove_nonSentiWord(word): shouldInclude = False if len(list(swn.senti_synsets(word))) == 0: # print word, " not in sentiWordNet" if len(list(wn.synsets(word))) == 0: # print word, " not in wordNet" shouldInclude = False else: shouldInclude = True else: synSet = list(swn.senti_synsets(word)) # print "Word: ", word for item in synSet: # print "+ ", item.pos_score(), " - ", item.neg_score(), " Neutral ", item.obj_score() if item.pos_score() > sentiWordNet_ThreshHold or item.neg_score() > sentiWordNet_ThreshHold: shouldInclude = True break return shouldInclude
def rem_nonevaluative(self): for key, value in self.wdict.items(): scores = swn.senti_synsets(key, 'a') if not list(scores): del self.wdict[key] continue score = list(scores)[0] objectivity = score.obj_score() if objectivity >= 0.5: del self.wdict[key]
def sentianalysis_sentences(text): # This function takes a string and counts the sentiment score for each word in sentence, # obtain the sentence sentiment score by dividing the sum of each word's sentiment score by # number of words in that sentence, then averages the sentiment scores of each sentence, # after that, return the sentence count and the average sentiment scores. try: if type(text) == str: text_lower = text.lower() words = nltk.Text(word_tokenize(text_lower)) sentences = nltk.Text(sent_tokenize(text_lower)) tokens_sentence = [nltk.word_tokenize(sentence) for sentence in sentences] taggedlist=[] for token in tokens_sentence: taggedlist.append(nltk.pos_tag(token)) wnl = nltk.WordNetLemmatizer() score_list=[] for index1,taggedsentence in enumerate(taggedlist): score_list.append([]) for index2,taggedtoken in enumerate(taggedsentence): # Correct the tags to the form which sentiwordnet recognizes newtag='' lemmatized_token=wnl.lemmatize(taggedtoken[0]) if taggedtoken[1].startswith('NN'): newtag='n' elif taggedtoken[1].startswith('JJ'): newtag='a' elif taggedtoken[1].startswith('V'): newtag='v' elif taggedtoken[1].startswith('R'): newtag='r' else: newtag='' if(newtag!=''): synsets = list(swn.senti_synsets(lemmatized_token, newtag)) #Getting average of all possible sentiments score=0 if(len(synsets)>0): for syn in synsets: score+=syn.pos_score()-syn.neg_score() score_list[index1].append(score/len(synsets)) sentence_sentiment=[] for score_sentence in score_list: if len(score_sentence) != 0: sentence_sentiment.append(sum([word_score for word_score in score_sentence])/len(score_sentence)) return(len(sentence_sentiment), sum(score_sentence for score_sentence in sentence_sentiment)/len(sentence_sentiment)) else: print("The input text is not a string, please check agian!") return(0,0) except: return(0,0)
def word_score(word,tag): if(dick.has_key(word)): score = dick[word] else: if tag == "k": all_words = swn.senti_synsets(word) else: all_words = swn.senti_synsets(word,tag) score = [0.0, 0.0, 0.0] for words in all_words: score[0] = score[0] + words.pos_score() score[1] = score[1] + words.neg_score() score[2] = score[2] + words.obj_score() if(len(all_words) > 0): score[0] = score[0] / len(all_words) score[1] = score[1] / len(all_words) score[2] = score[2] / len(all_words) dick[word] = score #print (word, " : ", score) return score
def calc(self): keys = [] for key in self.all_words: scores = swn.senti_synsets(key, 'a') if not list(scores): continue score = list(scores)[0] objectivity = score.obj_score() if objectivity < 0.5: keys.append(key) self.keys = keys
def sentiwordnet_values(word): value = (swn.senti_synsets(word)) value_list = list(value) Positive_Average = 0 Negative_Average = 0 if (len(value_list) > 0): Positive_Average += value_list[0].pos_score() Negative_Average += value_list[0].neg_score() averages = list() averages.append(Positive_Average+1) averages.append(Negative_Average+1) return averages
def get_word_primary_score(cls, word_pair): # firstly checking the special words special_words = cls.read_special_word() if word_pair[0] in special_words: return special_words[word_pair[0]] # not in sentiwordnet then return 0 synsets = swn.senti_synsets(word_pair[0], word_pair[1]) if not len(synsets): return 0 score_list = [synset.pos_score() - synset.neg_score() for synset in synsets] return sum(score_list) / float(len(score_list))
def get_avg_sentiment(word,objectivity=None): syns=swn.senti_synsets(word) pos=0.0 neg=0.0 if syns: if objectivity ==1: pos=np.mean([x._pos_score*x._obj_score for x in syns]) neg=np.mean([x._neg_score*x._obj_score for x in syns]) else: pos=np.mean([x._pos_score for x in syns]) neg=np.mean([x._neg_score for x in syns]) return (pos,neg)
def analyze_sentiment_sentiwordnet_lexicon(review, verbose=False): # pre-process text review = normalize_accented_characters(review) review = html_parser.unescape(review) review = strip_html(review) # tokenize and POS tag text tokens text_tokens = nltk.word_tokenize(review) tagged_text = nltk.pos_tag(text_tokens) pos_score = neg_score = token_count = obj_score = 0 # get wordnet synsets based on POS tags # get sentiment scores if synsets are found for word, tag in tagged_text: ss_set = None if 'NN' in tag and swn.senti_synsets(word, 'n'): ss_set = swn.senti_synsets(word, 'n')[0] elif 'VB' in tag and swn.senti_synsets(word, 'v'): ss_set = swn.senti_synsets(word, 'v')[0] elif 'JJ' in tag and swn.senti_synsets(word, 'a'): ss_set = swn.senti_synsets(word, 'a')[0] elif 'RB' in tag and swn.senti_synsets(word, 'r'): ss_set = swn.senti_synsets(word, 'r')[0] # if senti-synset is found if ss_set: # add scores for all found synsets pos_score += ss_set.pos_score() neg_score += ss_set.neg_score() obj_score += ss_set.obj_score() token_count += 1 # aggregate final scores final_score = pos_score - neg_score norm_final_score = round(float(final_score) / token_count, 2) final_sentiment = 'positive' if norm_final_score >= 0 else 'negative' if verbose: norm_obj_score = round(float(obj_score) / token_count, 2) norm_pos_score = round(float(pos_score) / token_count, 2) norm_neg_score = round(float(neg_score) / token_count, 2) # to display results in a nice table sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score, norm_pos_score, norm_neg_score, norm_final_score]], columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], ['Predicted Sentiment', 'Objectivity', 'Positive', 'Negative', 'Overall']], labels=[[0,0,0,0,0],[0,1,2,3,4]])) print sentiment_frame return final_sentiment
def assess_polarity1(tokens): """ sum polarities and divide by number of tokens :param tokens: :return: scores """ polarity = 0.0 for token in tokens: syn = list(swn.senti_synsets(token)) if syn: syn = syn[0] polarity += syn.neg_score() polarity += syn.pos_score() return polarity / len(tokens)
def extract_senti_wordnet(text): #pos_score = 0 #neg_score = 0 overall_score = 0 for word in text: synsets = swn.senti_synsets(word.decode('utf-8', 'ignore')) for synonym in list(synsets): #print synonym.pos_score #pos_score += synonym.pos_score() #neg_score += synonym.neg_score() overall_score += (synonym.pos_score() - synonym.neg_score()) overall_score /= len(text) #neg_score /= len(text) return {'senti_score': overall_score}
def tag_sentence(self, sentence, tag_with_lemmas=False): tag_sentence = [] for (word, lemma, postag) in sentence: tag = self.wordnet_pos_code(postag[0]) senti_word = swn.senti_synsets(word) pos_score = 0.0 neg_score = 0.0 for sw in senti_word: pos_score += sw.pos_score() neg_score += sw.neg_score() s = (word, lemma, postag, pos_score, neg_score) tag_sentence.append(s) return tag_sentence
def get_synsets(self, token): stopwords = nltk.corpus.stopwords.words('english') lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES) stop_poss = [ 'PUNCT', 'X', 'SYM', 'CONJ', 'ADP', 'PART', 'SPACE', 'INTJ' ] if token.text in stopwords or token.pos_ in stop_poss: return [] swn_pos = self.get_swn_pos(token.pos_) lemma = lemmatizer(token.text, token.pos_) return list(swn.senti_synsets(lemma[0], pos=swn_pos))[:5]
def forth_feature(vocab_of_total_words,X_set): #4th feature words_polarity_standard=[] for word in vocab_of_total_words: #for each word in the list vocabulary try: #putting it in the try block because there can be a case when the word is not present in the sentilist synset=list(swn_word.senti_synsets(word)) common_meaning=synset[0] if common_meaning.pos_score()>common_meaning.neg_score(): weight=common_meaning.pos_score() elif common_meaning.pos_score()<common_meaning.neg_score(): weight=-common_meaning.neg_score() else: weight=0 except: weight=0 words_polarity_standard.append(weight) no_of_pos_and_neg=np.zeros((len(X_set), 2),dtype=int) count=0 for row in X_set: words_polarity_standard_array=np.array(words_polarity_standard) positive_word_count=0 negative_word_count=0 weights_of_all_words_in_review=np.multiply(row,words_polarity_standard_array) for a_word in weights_of_all_words_in_review: if a_word>0: positive_word_count=positive_word_count+1 if a_word<0: negative_word_count=negative_word_count+1 no_of_pos_and_neg[count][0]=positive_word_count no_of_pos_and_neg[count][1]=negative_word_count count=count+1 return no_of_pos_and_neg
def _initialize_(self, reviews, saveAs=None, saveOverride=False): """ wordOccuranceMatrix: numDocs x vocabSize matrix encoding the bag of words representation of each document """ self.wordOccuranceMatrix = self.processReviews(reviews, saveAs, saveOverride) numDocs, vocabSize = self.wordOccuranceMatrix.shape # Pseudocounts self.n_dt = np.zeros((numDocs, self.numTopics)) self.n_dts = np.zeros((numDocs, self.numTopics, self.numSentiments)) self.n_d = np.zeros((numDocs)) self.n_vts = np.zeros((vocabSize, self.numTopics, self.numSentiments)) self.n_ts = np.zeros((self.numTopics, self.numSentiments)) self.topics = {} self.sentiments = {} self.priorSentiment = {} alphaVec = self.alpha * np.ones(self.numTopics) gammaVec = self.gamma * np.ones(self.numSentiments) for i, word in enumerate(self.vectorizer.get_feature_names()): synsets = swn.senti_synsets(word) posScore = np.mean([s.pos_score() for s in synsets]) negScore = np.mean([s.neg_score() for s in synsets]) if posScore >= 0.1 and posScore > negScore: self.priorSentiment[i] = 1 elif negScore >= 0.1 and negScore > posScore: self.priorSentiment[i] = 0 for d in range(numDocs): topicDistribution = sampleFromDirichlet(alphaVec) sentimentDistribution = np.zeros( (self.numTopics, self.numSentiments)) for t in range(self.numTopics): sentimentDistribution[t, :] = sampleFromDirichlet(gammaVec) for i, w in enumerate(word_indices( self.wordOccuranceMatrix[d, :])): t = sampleFromCategorical(topicDistribution) s = sampleFromCategorical(sentimentDistribution[t, :]) self.topics[(d, i)] = t self.sentiments[(d, i)] = s self.n_dt[d, t] += 1 self.n_dts[d, t, s] += 1 self.n_d[d] += 1 self.n_vts[w, t, s] += 1 self.n_ts[t, s] += 1
def analyze_one_word(word): """ Findout the pos/neg score of a word. Don't care about the part of speech for now. Just pick the first synset found. """ pos_score = 0.0 neg_score = 0.0 try: synset = swn.senti_synsets(word)[0] pos_score = synset.pos_score() neg_score = synset.neg_score() except: pass return pos_score, neg_score
def findSentiment(self,token,pos_tag): #find average score for alla pos tags if self.average: synsets = s.senti_synsets(token) else: #find score for specific pos tag try: synsets = s.senti_synsets(token,pos_tag.lower()) except: #catch, if an error occures(if there is not such a pos tag) synsets = s.senti_synsets(token) if len(synsets)>0 : #calculate score for all sentimens(neutral,positive,negative) neutral = 0 positive = 0 negative = 0 #average score of all synsets (allagi an vrethei kaliteros tropos) for synset in synsets: neutral += synset.obj_score() positive += synset.pos_score() negative += synset.neg_score() neutral = neutral/float(len(synsets)) positive = positive/float(len(synsets)) negative = negative/float(len(synsets)) #return sentiment with max score if max(neutral,positive,negative) == neutral : return neutral elif max(neutral,positive,negative) == positive : return positive else: return negative else: return 0
def get_sent_max_pos_neg(corpus): for curr_sentence in corpus: curr_sentence = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', curr_sentence) curr_sentence = re.sub(r'@[a-zA-Z0-9_]+', ' ', curr_sentence) sentence_tokenized = tokenizer(curr_sentence) sent_list = [] for i in range(len(sentence_tokenized)-2): curr_word = sentence_tokenized[i] next_word = sentence_tokenized[i+1] next_next = sentence_tokenized[i+2] if curr_word.startswith('#'): curr_word = curr_word[1:] if next_word.startswith('#'): next_word = next_word[1:] if next_next.startswith('#'): next_next = next_next[1:] curr_senti_synsets = swn.senti_synsets(curr_word) next_senti_synsets = swn.senti_synsets(next_word) next_next_senti_synsets = swn.senti_synsets(next_next) if len(curr_senti_synsets) > 0 and len(next_senti_synsets) > 0 and len(next_next_senti_synsets) > 0: curr_pos = curr_senti_synsets[0].pos_score() curr_neg = curr_senti_synsets[0].neg_score() next_pos = next_senti_synsets[0].pos_score() next_neg = next_senti_synsets[0].neg_score() next_next_pos = next_next_senti_synsets[0].pos_score() next_next_neg = next_next_senti_synsets[0].neg_score() max_pos = max(curr_pos, next_pos, next_next_pos) max_neg = max(curr_neg, next_neg, next_next_neg) curr_sent = max_pos - max_neg if curr_sent != 0: sent_list.append(curr_sent) if len(sent_list) > MAX_LEN: sent_list = sent_list[:MAX_LEN] for i in range(MAX_LEN - len(sent_list)): sent_list.append(0.0) inp_X.append(sent_list) return inp_X
def get_score(adjective): if adjective not in d: scores = swn.senti_synsets(adjective) pos_scores = [i.pos_score() for i in scores] neg_scores = [i.neg_score() for i in scores] obj_scores = [i.obj_score() for i in scores] pos_score = maxi(pos_scores) neg_score = maxi(neg_scores) obj_score = maxi(obj_scores) if len(scores) == 0: d[adjective] = (-1, -1, -1, -1, -1, -1) return (-1, -1, -1, -1, -1, -1) scores_ad = swn.senti_synsets(adjective, pos='ar') pos_scores_ad = [i.pos_score() for i in scores_ad] neg_scores_ad = [i.neg_score() for i in scores_ad] obj_scores_ad = [i.obj_score() for i in scores_ad] pos_score_ad = maxi(pos_scores_ad) neg_score_ad = maxi(neg_scores_ad) obj_score_ad = maxi(obj_scores_ad) d[adjective] = (pos_score, neg_score, obj_score, pos_score_ad, neg_score_ad, obj_score_ad) else: (pos_score, neg_score, obj_score, pos_score_ad, neg_score_ad, obj_score_ad) = d[adjective] if pos_score == -1: return -100 if pos_score_ad > neg_score_ad: return pos_score_ad elif pos_score_ad < neg_score_ad: return -neg_score_ad elif pos_score > neg_score: return pos_score elif pos_score < neg_score: return -neg_score else: return 0
def get_score(sentense_list): for sentence in sentense_list: filtered_words = [] sentence = sentence.lower() # 소문자로 retokenize = RegexpTokenizer("[\w]+") split_sentence = retokenize.tokenize(sentence) # 토큰 단위로 자르기 filtered_sentence = [w for w in split_sentence if not w in stop_words] # 정지단어 필터링 wnl = nltk.WordNetLemmatizer() # 원형 복원 for f in filtered_sentence: filtered_words.append(wnl.lemmatize(str(f))) # 원형 복원 pos_tagged = nltk.pos_tag(filtered_words) positive_score = 0.0 negative_score = 0.0 synonym_avg = [] for pos_word in pos_tagged: if pos_word[1].startswith('N'): # 명사 newtag = 'n' elif pos_word[1].startswith('J'): #형용사 newtag = 'a' elif pos_word[1].startswith('V'): # 동사 newtag = 'v' elif pos_word[1].startswith('R'): newtag = 'r' else: newtag = '' if newtag != '': # pos가 존재하면 synsets = list(swn.senti_synsets(pos_word[0], newtag)) if (len(synsets) > 0): positive_score += sum([s.pos_score() for s in synsets]) / len(synsets) negative_score += sum([s.neg_score() for s in synsets]) / len(synsets) if (len(pos_tagged) > 0): positive_score /= len(pos_tagged) negative_score /= len(pos_tagged) if positive_score > negative_score: print("positive") elif positive_score < negative_score: print("negative") else: print("objective") else: print("objective")
def SentimentAnalysis_RGO_Belief_Propagation_MarkovRandomFields(nxg): #The RGO undirected graph is factored into maximal cliques and #sum potentials of clique nodes are construed as probabilities #which are productized empath = [] nxg_undirected = nxg.to_undirected(nxg) clique = list(nx.find_cliques(nxg_undirected)) clique_potential_product_pos = 1.0 clique_potential_product_neg = 1.0 clique_potential_product_obj = 1.0 for c in clique: clique_potential_pos = 0.0 clique_potential_neg = 0.0 clique_potential_obj = 0.0 print "clique:", c for v in c: if SentimentScoringAlgorithm == "Empath": pos = 0.0001 neg = 0.0001 empath_dict = lexicon.analyze(v.decode("utf-8")) empath_list = sorted(empath_dict.items(), key=operator.itemgetter(1), reverse=True) #print "empath_dict:",empath_dict obj = empath_list[0][1] if obj == 0.0: obj = 0.00001 else: sset = swn.senti_synsets(v.decode("utf-8")) if len(sset) > 0: pos = float(sset[0].pos_score()) neg = float(sset[0].neg_score()) obj = float(sset[0].obj_score()) if pos == 0.0: pos = 0.00001 if neg == 0.0: neg = 0.00001 if obj == 0.0: obj = 0.00001 clique_potential_pos += pos clique_potential_neg += neg clique_potential_obj += obj clique_potential_product_pos *= float(clique_potential_pos) clique_potential_product_neg *= float(clique_potential_neg) clique_potential_product_obj *= float(clique_potential_obj) lenclique = len(clique) if lenclique == 0: lenclique = 1 return clique_potential_product_pos / lenclique, clique_potential_product_neg / lenclique, clique_potential_product_obj / lenclique
def get_sentiment_wordnet(word): # word = 'crazy' list_senti = list(swn.senti_synsets(word)) sum_pos = 0; sum_neg = 0 for senti in list_senti: sum_pos += senti.pos_score(); sum_neg += senti.neg_score() avg_pos = sum_pos * 1.0 / len(list_senti) if len(list_senti) > 0 else 0 avg_neg = sum_neg * 1.0 / len(list_senti) if len(list_senti) > 0 else 0 print(''.join([word, ' (', str(round(avg_pos, 3)), ', ', str(round(avg_neg, 3)), ')'])) return avg_pos, avg_neg
def average_polarity(input_word): words = list(swn.senti_synsets(input_word)) output_polarity = [] if len(words) == 1: this_word = words[0] return this_word.pos_score() - this_word.neg_score() for this_word in words: usage = int(this_word.synset.name()[-2:]) if usage < 3: output_polarity.append(this_word.pos_score() - this_word.neg_score()) if len(output_polarity) == 0: return 0 else: return np.mean(output_polarity)
def polarity_score_2(word, pos): pos_arr = [] neg_arr = [] neut_arr = [] if len(swn.senti_synsets(word, pos)) == 0: return 0.0 for s in swn.senti_synsets(word, pos): pos_arr.append(s.pos_score()) neg_arr.append(s.neg_score()) neut_arr.append(s.obj_score()) pos = round(np.mean(np.array(pos_arr)), 2) neg = round(np.mean(np.array(neg_arr)), 2) subj = round(np.mean(np.array(neut_arr)), 2) #return "%s,%s,%s,%s" %(word,pos,neg,subj) #print "%s,%s,%s,%s" %(word,pos,neg,subj) if pos > neg: return pos elif neg > pos: #return float('-' + str(neg)) return neg * -1.0 elif pos == 0.0 and neg == 0.0: return 0.0 else: return 0.0
def get_positive_negative_feature(tokens, normalization_function): features_count_by_sentiment = {} features_count_by_sentiment[0] = 0 features_count_by_sentiment[1] = 0 for token in tokens: token = normalization_function(token) token_analyzed = swn.senti_synsets(token) list_token_analyzed = list(token_analyzed) if token_analyzed and (len(list_token_analyzed) > 0): token_sentiments = list_token_analyzed[0] if token_sentiments._pos_score > token_sentiments._neg_score: features_count_by_sentiment[1] = features_count_by_sentiment[1] + 1 elif token_sentiments._pos_score < token_sentiments._neg_score: features_count_by_sentiment[0] = features_count_by_sentiment[0] + 1 return features_count_by_sentiment
def LexiconScore(bow,postag): lexicalPostag = ['NN','NNP','NND','MD','JJ','RB'] lexiconScore = {} translator = Translator() allWord = set(wn.all_lemma_names()) for i in range(len(bow)): if bow[i] not in lexiconScore: englishWord = translator.translate(bow[i], dest='en') word = englishWord.text score = 0 if word in allWord: if postag[i] in lexicalPostag: if (postag[i] == 'NN' or postag[i] == 'NNP'or postag[i] == 'NND'): allsyn = list(swn.senti_synsets(word, 'n')) if len(allsyn) != 0: lexicon = swn.senti_synset(word+'.n.01') score = lexicon.pos_score() - lexicon.neg_score() elif postag[i] == 'VB' or postag[i] == 'MD': allsyn = list(swn.senti_synsets(word, 'v')) if len(allsyn) != 0: lexicon = swn.senti_synset(word+'.v.01') score = lexicon.pos_score() - lexicon.neg_score() elif postag[i] == 'JJ': allsyn = list(swn.senti_synsets(word, 'a')) if len(allsyn) != 0: lexicon = swn.senti_synset(word+'.a.01') score = lexicon.pos_score() - lexicon.neg_score() elif postag[i] == 'RB': allsyn = list(swn.senti_synsets(word, 'r')) if len(allsyn) != 0: lexicon = swn.senti_synset(word+'.r.01') score = lexicon.pos_score() - lexicon.neg_score() lexiconScore[bow[i]] = score else: lexiconScore[bow[i]] = 0 return lexiconScore
def superNaiveSentimentAnalysis(review): reviewpolarity = 0.0 numExceptions = 0.0 for word in review.lower().split(): weight = 0.0 try: common_meaning = list(swn.senti_synsets(word))[0] if common_meaning.pos_score() > common_meaning.neg_score(): weight = weight + common_meaning.pos_score() elif common_meaning.pos_score() < common_meaning.neg_score(): weight = weight - common_meaning.neg_score() except: numExceptions = numExceptions + 1 reviewpolarity = reviewpolarity + weight return reviewpolarity
def tweetToSWNVector(word): vec = np.zeros(3) pos_score, neg_score, obj_score = 0, 0, 0 l = list(swn.senti_synsets('21')) try: pos_score += l[0].pos_score() neg_score -= l[0].neg_score() obj_score += l[0].obj_score() except IndexError: pos_score += 0.0 neg_score -= 0.0 obj_score += 0.0 vec[0], vec[1], vec[2] = pos_score, neg_score, obj_score return vec
def findscore(unimportant_tokens): tokens = find_imp_words(unimportant_tokens) positive_score = 0.0 negative_score = 0.0 objective_score = 0.0 valence = [0.0 for i in range(len(tokens))] i = 0 for word in tokens: positive_word_score = 0 negative_word_score = 0 for item in list(swn.senti_synsets(word)): positive_word_score = positive_word_score + (item.pos_score()) negative_word_score = negative_word_score + (item.neg_score()) if (len(list(swn.senti_synsets(word))) != 0): positive_word_score = (positive_word_score / len(list(swn.senti_synsets(word)))) negative_word_score = (negative_word_score / len(list(swn.senti_synsets(word)))) valence[i] = (positive_word_score - negative_word_score) i = i + 1 return tokens, valence
def SentimentAnalysis_SentiWordNet(text): tokens = text.encode("utf-8").split() sumposscore = 0.0 sumnegscore = 0.0 sumobjscore = 0.0 for t in tokens: sset = swn.senti_synsets(t.decode("utf-8")) if len(sset) > 0: negscore = sset[0].neg_score() posscore = sset[0].pos_score() objscore = sset[0].obj_score() sumposscore += posscore sumnegscore += negscore sumobjscore += objscore return (sumposscore, sumnegscore, sumobjscore)
def english_sentiments(word): pscore = 0 nscore = 0 ll = 0 for wt in ['n', 'v', 'r', 'a']: z = list(swn.senti_synsets(word, wt)) if len(z) > 0: p, n = z[0].pos_score(), z[0].neg_score() pscore += p nscore += n ll + 1 if ll > 0: pscore = float(pscore) / ll nscore = float(nscore) / ll return pscore, nscore
def compute_sub_posneg(X): print("sub & posneg") x_subjectivity=[] x_posneg=[] for sentence in X: taggedsentence = [] obj_score = 0.0 p_count = 0.0 n_count = 0.0 taggedsentence.append(tagger.tag(sentence.split())) wnl = nltk.WordNetLemmatizer() for idx, words in enumerate(taggedsentence): for idx2, t in enumerate(words): newtag = '' lemmatizedsent = wnl.lemmatize(t[0]) if t[1].startswith('NN'): newtag = 'n' elif t[1].startswith('JJ'): newtag = 'a' elif t[1].startswith('V'): newtag = 'v' elif t[1].startswith('R'): newtag = 'r' else: newtag = '' if (newtag != ''): synsets = list(swn.senti_synsets(lemmatizedsent, newtag)) score = 0.0 obj_wordscore = 0.0 if (len(synsets) > 0): for syn in synsets: score += syn.pos_score() - syn.neg_score() obj_wordscore +=syn.obj_score() # print(syn.pos_score, syn.neg_score()) score = score / len(synsets) if(score>=0): p_count +=1 else: n_count +=1 obj_score += obj_wordscore / len(synsets) #print(t, p_count, n_count, obj_score) if(n_count==0): n_count=1 x_subjectivity.append(p_count/n_count) x_posneg.append(obj_score) x_subjectivity = [float(i)/max(x_subjectivity) for i in x_subjectivity] x_posneg = [float(i)/max(x_posneg) for i in x_posneg] return x_subjectivity, x_posneg
def main(): args = sys.argv if (len(args) != 2): print('usage: python proj filename') return -1 lines = [] values = [] data = open(args[1], 'r') for line in data: temp = line.split('.') for sen in temp: tokens = pos_tag( word_tokenize(sen.strip('\n').strip(',').strip('-'))) if tokens != []: lines.append(tokens) total_pos = 0 total_neg = 0 for line in lines: pos = 0.0 neg = 0.0 count = 0 for word in line: tag = 'n' if (word[1] == 'VB'): tag = 'v' if (word[1] == 'JJ'): tag = 'a' if (word[1] == 'RB'): tag = 'r' x = swn.senti_synsets(word[0], tag) if (x != []): for a in x: pos += a.pos_score() neg += a.neg_score() count += 1 if (pos + neg > 0): values.append((pos / (pos + neg), neg / (pos + neg))) total_pos += pos total_neg += neg else: values.append((0, 0)) print( str(total_pos / (total_pos + total_neg)) + ',' + str(total_neg / (total_pos + total_neg))) for x in range(0, len(lines)): print(str(values[x][0]) + ',' + str(values[x][1]))
def CrearSolucion(request): if request.method == 'POST': length = request.POST.get('length') letras = request.POST.get('letters') print(letras) f = open("combinations.txt", 'w') k = list(itertools.permutations(letras, int(length))) for i in k: for l in i: f.write(l) f.write('\n') f.close() h = [] with open('combinations.txt') as hai: h = [word.lower().strip() for word in hai] dicionario = {} for o in h: if o not in dicionario: dicionario[o] = 0 else: dicionario[o] += 1 lista = [] for l in dicionario: v = list(swn.senti_synsets(l)) if v: lista.append(l) print(lista) datos = {} datos['length'] = length datos['palabras'] = letras datos['lista'] = lista return HttpResponse(json.dumps(datos), content_type='application/json') else: return HttpResponse(json.dumps({'error': 'error'}), content_type='application/json')
def runLexicalAnalysis(self, file): lemmatizer = WordNetLemmatizer() individual_scores = [] multiple_scores = [] # warning = False for tag in file: # if ("not" or "n't" in tag[0].lower()) and (not warning): # self.output += "*** WARNING NEGATION DETECTED ***" + "\n" + \ # "Lexical approach may not handle negation well. " + \ # "As a result, this sentiment score may not be accurate" + "\n\n" # warning = True lemma = lemmatizer.lemmatize(tag[0]) if tag[1].startswith('NN'): syntag = 'n' elif tag[1].startswith('JJ'): syntag = 'a' elif tag[1].startswith('V'): syntag = 'v' elif tag[1].startswith('RB'): syntag = 'r' else: syntag = '' if syntag and syntag != '': try: synset = sentiwordnet.senti_synset(lemma + "." + syntag + ".01") score = synset.pos_score() - synset.neg_score() individual_scores.append(score) except WordNetError: pass score = 0 synsets = list(sentiwordnet.senti_synsets(lemma, syntag)) if len(synsets) > 0: for syn in synsets: score += syn.pos_score() - syn.neg_score() multiple_scores.append(score / len(synsets)) individual_score = self.standardizeScores(sum(individual_scores)) multiple_score = self.standardizeScores(sum(multiple_scores)) self.output += "Individual Synset Score: " + "{0:.4f}".format(individual_score) + "\n" + \ "Multiple Synset Score: " "{0:.4f}".format(multiple_score) + "\n" + \ "-------------------------------------------------------------\n"
def superNaiveSentiment(self, review): reviewPolarity = 0.0 exc = 0 for word in review.lower().split(): weight = 0.0 try: common_meaning = list(swn.senti_synsets(word))[0] if common_meaning.pos_score() > common_meaning.neg_score(): weight = weight + common_meaning.pos_score() elif common_meaning.pos_score < common_meaning.neg_score(): weight = weight - common_meaning.neg_score() except: exc = exc + 1 reviewPolarity = reviewPolarity + weight return reviewPolarity
def GetWordSentimentScore(taggedword): wordtype = GetWordType(taggedword) if not wordtype: #if the word type is not included in SentiSynSet return False #there exists no sentiment score wordscore = swn.senti_synsets(taggedword[0], wordtype) pos, neg = 0.0, 0.0 count = 0 for val in wordscore: pos = pos + val.pos_score() neg = neg + val.neg_score() count += 1 if(pos == 0.0 and neg == 0.0): return False #there exists no sentiment score else: #print('%10s' % taggedword[0],'\t', wordtype, '\t\t', (pos, neg)) return {'pos':pos, 'neg':neg}
def superNaiveSentiment(review): reviewPolarity = 0.0 numExceptions = 0 for word in review.lower().split(): weight = 0.0 try: common_meaning = swn.senti_synsets(word)[0] if common_meaning.pos_score() > common_meaning.neg_score(): weight = weight + common_meaning.pos_score() elif common_meaning.pos_score() < common_meaning.neg_score(): weight = weight - common_meaning.neg_score() except: numExceptions = numExceptions + 1 #print "Word: " + word + " weight: " + str(weight) reviewPolarity = reviewPolarity + weight return reviewPolarity
def check_sentiment(doc): for token in doc: senti_texts = list(swn.senti_synsets(token.text)) if senti_texts: if any([ sum([st.pos_score() for st in senti_texts[:5]]) / len(senti_texts[:5]) > 0.5, sum([st.neg_score() for st in senti_texts[:5]]) / len(senti_texts[:5]) > 0.5, ]): return 1 return 0
def get_sentiment_value(self, word, partofspeech): try: return self.exception_sentim_values[word] except KeyError: sets_count = 0 sentiment_value = 0 for sentisynset in swn.senti_synsets(word, partofspeech): sets_count += 1 sentiment_value += sentisynset.pos_score( ) # * sentisynset.obj_score() sentiment_value -= sentisynset.neg_score( ) # * sentisynset.obj_score() if sets_count == 0: return 0 else: return sentiment_value / sets_count