def getting_sentiment(word,pos):
    flag = 0
    if 'NN' in pos:
        tag = 'n'
    elif 'JJ' in pos:
        tag = 'a'
        if pos == 'JJS':
            flag = 1
    elif 'VB' in pos:
        tag = 'v'
    elif 'RB' in pos:
        tag = 'r'
    else:
        tag = ''
    stemmer = WordNetLemmatizer()
    if tag != '':
        x = stemmer.lemmatize(word,tag)
    else:
        x = stemmer.lemmatize(word)

    try:
        score = float(score_dic[x]) #* float(m1)
    except KeyError:
        if len(swn.senti_synsets(x,tag)) > 0:
            score = swn.senti_synsets(x,tag)[0].pos_score() * 5
        else:
            score = 100

    if flag == 1 and score != -100 and score < 4:
        score = score + 1
    elif flag == 1 and score != -100 and score > -4 and score < 0:
        score = score - 1
    print word + '--->' + str(score)
    return score
示例#2
0
def sentiwordnetSentimentWordsPresenceFeatures(wordsTagged):
    features = {}
    for word, tag in wordsTagged:
        wordnetTag = translateFromNltkToWordnetTag(tag)
        wordNegated = isWordNegated(word)
        word = stripNegation(word)
        if wordnetTag:
            synsets = list(sentiwordnet.senti_synsets(word, wordnetTag))
            if not synsets:
                synsets = list(sentiwordnet.senti_synsets(word))
        else:
            synsets = list(sentiwordnet.senti_synsets(word))
        if len(synsets) > 0:
            synset = synsets[0]
            if synset.pos_score() > 0.5:
                if wordNegated:
                    features["neg_word_presence"] = True
                else:
                    features["pos_word_presence"] = True
            if synset.neg_score() > 0.5:
                if wordNegated:
                    features["pos_word_presence"] = True
                else:
                    features["neg_word_presence"] = True
    return features
def get_score(adjective):
    if adjective not in d: 
	scores = swn.senti_synsets(adjective)
    	pos_scores = [i.pos_score() for i in scores]
    	neg_scores = [i.neg_score() for i in scores]
    	obj_scores = [i.obj_score() for i in scores]
    	pos_score = maxi(pos_scores)
    	neg_score = maxi(neg_scores)
    	obj_score = maxi(obj_scores)
        if len(scores) == 0:
            d[adjective] = (-1,-1,-1,-1,-1,-1)
            return (-1,-1,-1,-1,-1,-1)

    	scores_ad = swn.senti_synsets(adjective,pos='ar')
    	pos_scores_ad = [i.pos_score() for i in scores_ad]
    	neg_scores_ad = [i.neg_score() for i in scores_ad]
    	obj_scores_ad = [i.obj_score() for i in scores_ad]
    	pos_score_ad = maxi(pos_scores_ad)
    	neg_score_ad = maxi(neg_scores_ad)
    	obj_score_ad = maxi(obj_scores_ad)
    	d[adjective] = (pos_score,neg_score,obj_score,pos_score_ad,neg_score_ad,obj_score_ad)
    else:
	 (pos_score,neg_score,obj_score,pos_score_ad,neg_score_ad,obj_score_ad) = d[adjective]
    if pos_score == -1:
        return -100
    if pos_score_ad > neg_score_ad:
    	return pos_score_ad 
    elif pos_score_ad < neg_score_ad:
       	return -neg_score_ad 
    elif pos_score > neg_score:
        return pos_score
    elif pos_score < neg_score:
        return -neg_score
    else:
        return 0
def processoFeatures(resposta):
    frases = tokenizerFrases.tokenize(resposta["corpo"])
    palavras = []
    palavrasTexto = {}
    for frase in frases:
        palavrasTemp = tokenizerPalavras.tokenize(frase)
        for palavra in palavrasTemp:
            palavrasTexto[palavra] = True
    posTags = pos_tag(palavras)
    positivo = 0
    negativo = 0
    for palavra, tag in posTags:
        synsets = None
        if tag.startswith("J"):
            synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADJ)
        elif tag.startswith("V"):
            synsets = sentiwordnet.senti_synsets(palavra, wordnet.VERB)
        elif tag.startswith("N"):
            synsets = sentiwordnet.senti_synsets(palavra, wordnet.NOUN)
        elif tag.startswith("R"):
            synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADV)
        else:
            synsets = sentiwordnet.senti_synsets(palavra, "")
        if synsets != None:
            synsets = list(synsets)
            if len(synsets) > 0:
                synset = synsets[0]
                positivo = positivo + synset.pos_score()
                negativo = negativo + synset.neg_score()
    if positivo > negativo:
        return (palavrasTexto, "positivo")
    elif negativo > positivo:
        return (palavrasTexto, "negativo")
    else:
        return (palavrasTexto, "neutro")
def analiseSentimento(resposta):
	texto = resposta['corpo']
	frases = sentencesTokenizer.tokenize(texto)
	palavras = []
	for frase in frases:
		palavras.extend(wordsTokenizer.tokenize(frase))
	posTags = pos_tag(palavras)
	positivo = 0
	negativo = 0
	for palavra, tag in posTags:
		synsets = None
		if tag.startswith('J'):
			synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADJ)
		elif tag.startswith('V'):
			synsets = sentiwordnet.senti_synsets(palavra, wordnet.VERB)
		elif tag.startswith('N'):
			synsets = sentiwordnet.senti_synsets(palavra, wordnet.NOUN)
		elif tag.startswith('R'):
			synsets = sentiwordnet.senti_synsets(palavra, wordnet.ADV)
		else:
			synsets = sentiwordnet.senti_synsets(palavra, '')
		if synsets != None:
			synsets = list(synsets)
			if len(synsets) > 0:
				synset = synsets[0]
				positivo = positivo + synset.pos_score()
				negativo = negativo + synset.neg_score()
	if positivo > negativo:
		return (resposta, 'positivo')
	elif negativo > positivo:
		return (resposta, 'negativo')
	else:
		return (resposta, 'neutro')
def senti_analisys(tokens):
    #print tokens
    scorePosTot = 0
    scoreNegTot = 0
    scoreObjTot = 0
    scoreObjNorm = scoreNegNorm = scorePosNorm = 0
    count = 0
    for token,part in tokens:

        if part.startswith("JJ") or part.startswith("NN") or part.startswith("VB"):

            scorePos = 0
            scoreNeg = 0
            scoreObj = 0
            #print swn.senti_synsets(token)
            #if token == "wonderful":
             #   print "i'm "+token,swn.senti_synsets(token)
            if swn.senti_synsets(token) != []:
                list_synset = list(swn.senti_synsets(token))
                dim_synset = list_synset.__len__()

                for i in list_synset:
                    scorePos += i.pos_score()
                    scoreNeg += i.neg_score()
                    scoreObj += i.obj_score()

                scorePos = scorePos / dim_synset
                scoreNeg = scoreNeg / dim_synset
                scoreObj = scoreObj / dim_synset

                #print "The token is: "+token + "\n\tscore pos: "+str(round(scorePos,2)) + "\n\tscore neg: "+str(round(scoreNeg,2))+\
                      #"\n\tscore obj: " + str(round(scoreObj,2))


                scorePosTot += scorePos
                scoreNegTot += scoreNeg
                scoreObjTot += scoreObj
            count += 1

    if count != 0:
        scorePosNorm = scorePosTot / count
        scoreNegNorm = scoreNegTot / count
        scoreObjNorm = scoreObjTot / count

    #print "NORM: "+ "\n\tscorePOS: "+str(round(scorePosNorm,2)) + "\n\tscoreNEG: "+str(round(scoreNegNorm,2)) \
          #+ "\n\tscoreOBJ: "+str(round(scoreObjNorm,2))
    if scoreNegNorm < scorePosNorm :
        #print "POSITIVE"
        return 1,scorePosNorm
    elif scoreNegNorm > scorePosNorm:
        #print "NEGATIVE"
        return -1,-scoreNegNorm
    else:
        #print "OBJECTIVE"
        return 0,0
def get_net_pos_neg(word):
    netPos = 0
    netNeg = 0

    if len(list(swn.senti_synsets(word))) != 0:
        sentisyn = list(swn.senti_synsets(word))
        for item in sentisyn:
            netPos += item.pos_score()
            netNeg += item.neg_score()

    return netPos, netNeg
示例#8
0
文件: ranks.py 项目: Zephrys/Bro
def strip_proppers_POS(text, search):
    text = text.decode('utf-8', 'ignore')
    tokens = nltk.word_tokenize(text.lower())
    tagged = nltk.tag._pos_tag(tokens, tagset, tagger)
    res = []

    search_index = [i for i, val in enumerate(tokens)
                    if (p.singular_noun(val) == search or
                    (not p.singular_noun(val) and val == search))
                    ]

    words = [(word, pos) for word, pos in tagged if (pos[0] == "J") and
             len(word) > 2 and
             word not in stop and
             not p.singular_noun(word) and
             eng_check.check(word) and
             not any(ccc.isdigit() for ccc in word)]

    adj_count = 0
    for a in range(0, len(tagged)):
        if tagged[a] in words:
            if tagged[a][1][0] == "J":
                adj = tagged[a][0]
                dist = min([abs(a-s) for s in search_index])
                score = 0
                adj_synset = swn.senti_synsets(adj, 'a')
                if len(adj_synset) <= 0:
                    adj_synset = swn.senti_synsets(adj, 'v')
                if len(adj_synset) <= 0:
                    synonyms = []
                    for ss in wn.synsets(adj):
                        for j in ss.lemma_names():
                            synonyms.append(j)
                    if len(synonyms) > 1:
                        synonym_count = 0
                        for s in range(0, len(synonyms)):
                            if synonym_count < 2 and synonyms[s] != adj:
                                w1 = synonyms[s]
                                adj_synset1 = swn.senti_synsets(w1, 'a')
                                if len(adj_synset1) > 0:
                                    score += adj_synset1[0].pos_score()\
                                        - adj_synset1[0].neg_score()
                                    synonym_count += 1
                        score = score/2
                else:
                    score = adj_synset[0].pos_score() \
                        - adj_synset[0].neg_score()
                try:
                    res.append((adj, score/(pow(dist, 2))))
                    adj_count += 1
                except:
                    pass
    return (res, adj_count)
 def getSentimentOfWord(self, word):
     try:
         sentSet = list(swn.senti_synsets(word))
     except:
         #print("swn.senti_synsets(word) threw an error")
         return 0
     
     #if not found, assume objective word
     if len(sentSet) == 0:
         #print('empty sentSet for word '+word)
         return 0
     #else:
         #print('non empty sentSet for word '+word)
         
     totalPos = 0
     totalNeg = 0
     totalObj = 0
     for sentiword in sentSet:
         totalPos += sentiword.pos_score()
         totalNeg += sentiword.neg_score()
         totalObj += sentiword.obj_score()
     
     totalPos = totalPos / len(sentSet)
     totalNeg = totalNeg / len(sentSet)
     totalObj = totalObj / len(sentSet)
         
     #determine sentiment
     if (totalPos >= totalObj) and (totalPos >= totalNeg):
         return 1
     if (totalNeg >= totalObj) and (totalNeg >= totalPos):
         return -1
     if (totalObj >= totalPos) and (totalObj >= totalNeg):
         return 0
示例#10
0
def sentiwordnetSentimentScoreFeatures(wordsTagged):
    posScoreSum = 0.0
    negScoreSum = 0.0
    for word, tag in wordsTagged:
        wordnetTag = translateFromNltkToWordnetTag(tag)
        word = stripNegation(word)
        if wordnetTag:
            synsets = list(sentiwordnet.senti_synsets(word, wordnetTag))
        else:
            synsets = list(sentiwordnet.senti_synsets(word))
        if len(synsets) > 0:
            synset = synsets[0]
            posScoreSum = synset.pos_score()
            negScoreSum = synset.neg_score()

    return {"pos_neg_score": posScoreSum - negScoreSum}
示例#11
0
def sentiment(word):
    # print word
    posScore = 0
    negScore = 0
    if word[:1] == "~" and len(getAntonyms(word[1:])) != 0:
        word = getAntonyms(word[1:]).keys()[0]
    opinions = swn.senti_synsets(word)
    for o in list(opinions):
        negScore += o.neg_score()
        posScore += o.pos_score()
    # print "POS " + str(posScore)
    # print "NEG " + str(negScore)
    negWords = ['rude','arrogant','boring','difficult','terrible','hard','dull','long','tricky','impossible','long','intimidating','ridiculous','tough','challenging']
    posWords = ['exciting', 'cool','smart','incredible','super','great','good','excellent','engaging','clear','entertaining','interesting','easy','straightforward','helpful','amazing','awesome','related','funny','doable']
    if word.lower() in negWords:
        return 'neg' 
    elif word.lower() in posWords:
        return 'pos'
    if word[:1] == "~":
        if word[1:].lower() in negWords:
            return 'pos'
        elif word[1:].lower() in posWords:
            return 'neg'

    if posScore > negScore:
        return 'pos'
    elif posScore < negScore:
        return 'neg'
    else:
        return 'neut'
 def score(self, tokens):
     pos_value = 0.0
     neg_value = 0.0
     obj_value = 0.0
     
     #TODO disambiguation via POS tagging using nps_chat or Brown Corpus
    
         
     nltk_tagged = nltk.pos_tag(tokens)
     
     
     #print(nltk_tagged)
     
     lengthOfData = 0
     for word in nltk_tagged:
         
         meanings = list(swn.senti_synsets(word[0], self.get_wordnet_pos(word[1])))
         
         if len(meanings) > 0:
             wordSynset0 = meanings[0]
             pos_value += wordSynset0.pos_score()
             neg_value += wordSynset0.neg_score()
             obj_value += wordSynset0.obj_score()
             lengthOfData += 1
             
     if lengthOfData > 0:
         pos_value = pos_value
         neg_value = neg_value
         obj_value = obj_value/lengthOfData
         
     return [ pos_value , neg_value, obj_value]
示例#13
0
def main():
    # str=['s','a','f','e','v','i','w','j','k','x','k']
    # for i in str:
    #     for j in str:
    #
    i = raw_input("Length:")
    str = raw_input("Letter sequence:")
    f = open("combinations.txt",'w')
    k = list(itertools.permutations(str,int(i)))

    for i in k:
        for l in i:
            f.write(l)
        f.write('\n')
    f.close()

    h = []
    with open('combinations.txt') as hai:
        h = [word.lower().strip() for word in hai ]

    dic = {}
    for o in h:
        if o not in dic:
            dic [o]= 0
        else:
            dic[o] += 1

    m = open("out.txt",'w')

    for l in dic:
        v= list(swn.senti_synsets(l))
        if v:
            m.write(l)
            m.write('\n')
示例#14
0
def getSentimentFeatures(feats, text, prefix):
	pos_sum = 0
	neg_sum = 0
	most_positive = 0
	most_negative = 0

	for string in text.lower().split(' '):
		if len(string) > 0 and string[0] == '#':
			string = string[1:]
		senti_synset = list(swn.senti_synsets(string))
		if len(senti_synset) > 0:
			senti_synset = senti_synset[0] #just use the 1st one for now
			pos_score = senti_synset.pos_score()
			if pos_score > most_positive: most_positive = pos_score
			pos_sum += pos_score
			neg_score = senti_synset.neg_score()
			if neg_score > most_negative: most_negative = neg_score
			neg_sum += neg_score

	feats[prefix + 'POS_SUM'] = pos_sum
	feats[prefix + 'NEG_SUM'] = neg_sum
	feats[prefix + 'MEAN_POS_NEG'] = (pos_sum + neg_sum) / 2.0
	feats[prefix + 'POS_NEG_GAP'] = pos_sum - neg_sum
	feats[prefix + 'SINGLE_POS_GAP'] = most_positive - (pos_sum + neg_sum) / 2.0
	feats[prefix + 'SINGLE_NEG_GAP'] = most_negative - (pos_sum + neg_sum) / 2.0
def main():
	args = sys.argv
	if(len(args) != 2):
		print('usage: python proj filename')
		return -1
	lines = []
	values = []
	data = open(args[1], 'r')
	for line in data:
		temp = line.split('.')
		for sen in temp:
			tokens = word_tokenize(sen.strip('\n').strip(',').strip('-'))
			if tokens != []:
				lines.append(tokens)
	total_pos = 0
	total_neg = 0

	for line in lines:
		pos = 0.0
		neg = 0.0
		pcount = 0
		ncount = 0
		for word in line:
			sp = 0
			sn = 0
			sub_pos = 0
			sub_neg = 0
			x = swn.senti_synsets(word)
			
			for a in x:
				if(a.pos_score() > 0):
					sub_pos += 1
					sp += a.pos_score()
				if a.neg_score() > 0:
					sub_neg += 1
					sn += a.neg_score()
	#		if(sub_pos != 0):
	#			sp /= sub_pos
	#		if(sub_neg != 0):
	#			sn /= sub_neg
			pos += sp
			neg += sn
			if sp > 0:
				pcount += 1
			if sn > 0:
				ncount += 1
		
		if(pos == 0) or (neg == 0):
			values.append((pos, neg))
			total_pos += pos
			total_neg += neg

		else:	
			values.append((pos/(pos+neg), neg/(pos+neg)))
			total_pos += (pos/(pos+neg))
			total_neg += (neg/(pos+neg))
	print(str(total_pos / len(values)) + ',' + str(total_neg / len(values)))
	for x in range(0, len(lines)):
	#	print('sentence: ' + str(lines[x]))
		print(str(values[x][0]) + ',' + str(values[x][1]))
    def getNumberOfAppearances(self,tokens):
        total = 0
        for token in tokens:
            if len(s.senti_synsets(token)) > 0:
                total += 1

        return total
def get_sentiment_score(ls):
    '''
    input type sentence
    this method estimate a score for the sentence based on the swn model 
    '''
    from nltk.tokenize import word_tokenize
    import re
    word_list = word_tokenize(ls.content)
    punctuation = re.compile(r'[-.?!,":;()|0-9]') # remove these punctuations and number 
    word_list = [punctuation.sub("", word) for word in word_list]  
    word_list = filter(None, word_list) #filters empty         
    ls.tokens = word_list

    pos_score = 0.0
    neg_score = 0.0
    num_valid_word = 0.1
    for w in word_list:
        try:
            res = swn.senti_synsets(w)
            pos_score += res[0]._pos_score
            neg_score += res[0]._neg_score
            num_valid_word += 1
        except:
            pass
            
    ls.score = (pos_score - neg_score)/num_valid_word
    
    """
def SentimentAnalysis_RGO_Belief_Propagation(nxg):
	#Bayesian Pearl Belief Propagation is done by
	#assuming the senti scores as probabilities with positive
	#and negative signs and the Recursive Gloss Overlap
	#definition graph being the graphical model.
	#Sentiment as a belief potential is passed through 
	#the DFS tree of this graph.  
	dfs_positive_belief_propagated=1.0
	core_positive_belief_propagated=1.0
	dfs_negative_belief_propagated=1.0
	core_negative_belief_propagated=1.0
	core_xnegscore=core_xposscore=1.0
	dfs_knegscore=dfs_kposscore=dfs_vposscore=dfs_vnegscore=1.0
	sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True)
	kcore_nxg=nx.k_core(nxg,6,nx.core_number(nxg))
	for x in sorted_core_nxg:
	      xsset = swn.senti_synsets(x[0])
	      if len(xsset) > 2:
	     		core_xnegscore = float(xsset[0].neg_score())*10.0
	      		core_xposscore = float(xsset[0].pos_score())*10.0
	      if core_xnegscore == 0.0:
			core_xnegscore = 1.0
	      if core_xposscore == 0.0:
			core_xposscore = 1.0
	      core_positive_belief_propagated *= float(core_xposscore)
	      core_negative_belief_propagated *= float(core_xnegscore)
	print "Core Number: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(core_positive_belief_propagated), float(core_negative_belief_propagated))
	#for k,v in nx.dfs_edges(nxg):
	for k,v in nx.dfs_edges(kcore_nxg):
	      ksynset = swn.senti_synsets(k)
	      vsynset = swn.senti_synsets(v)
	      if len(ksynset) > 2:
	     		dfs_knegscore = float(ksynset[0].neg_score())*10.0
	      		dfs_kposscore = float(ksynset[0].pos_score())*10.0
	      if len(vsynset) > 2:
			dfs_vnegscore = float(vsynset[0].neg_score())*10.0
			dfs_vposscore = float(vsynset[0].pos_score())*10.0
	      dfs_kposscore_vposscore = float(dfs_kposscore*dfs_vposscore)
	      dfs_knegscore_vnegscore = float(dfs_knegscore*dfs_vnegscore)
	      if dfs_kposscore_vposscore == 0.0:
		dfs_kposscore_vposscore = 1.0
	      if dfs_knegscore_vnegscore == 0.0:
		dfs_knegscore_vnegscore = 1.0
	      dfs_positive_belief_propagated *= float(dfs_kposscore_vposscore)
	      dfs_negative_belief_propagated *= float(dfs_knegscore_vnegscore)
	print "K-Core DFS: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(dfs_positive_belief_propagated),float(dfs_negative_belief_propagated))
	return (dfs_positive_belief_propagated, dfs_negative_belief_propagated, core_positive_belief_propagated, core_negative_belief_propagated)
示例#19
0
文件: __init__.py 项目: clips/pattern
 def __getitem__(self, k):
     synsets = list(swn.senti_synsets(k))
     if synsets:
         p, n = synsets[0].pos_score(), synsets[0].neg_score()
         v = (float(p) - float(n), float(p) + float(n))
         return v
     else:
         return None
    def sentiWordNetProc(self, sentence):
        
        #D = {}
        
        if sentence == "":
            D = [0,0,0,0,0]
            return D
        
        s = nltk.word_tokenize(sentence)
        l = len(s)
        
	num_pos_tokens = 0
	num_neg_tokens = 0
	maximal_sentiment = 0
	sentimentScores = list()
	posSentimentSum = 0
	negSentimentSum = 0
	
	for i in range(l):
	    
	   senti_type = swn.senti_synsets(s[i])
	   if senti_type:
	        n = senti_type[0].neg_score()
	        p = senti_type[0].pos_score()
	        
	        if (i-1) >= 0:
                    if s[i-1] in negation_list:
                        p = float(n-p)/2 + p
                        n = float(p-n)/2 + n
                    elif (i-2) >= 0:
                        if s[i-2] in negation_list:
                            p = float(n-p)/2 + p
                            n = float(p-n)/2 + n
	        
	        
		posSentimentSum+=float(p)
		negSentimentSum+=float(n)
		
		if float(p)>float(n):
			num_pos_tokens+=1
			sentimentScores.append(float(p))
		else:
			num_neg_tokens+=1
			sentimentScores.append(float(n))
	if len(sentimentScores)>0:
		maximal_sentiment = max(sentimentScores)
	else:
		maximal_sentiment = 0
		
	D = []
	D.append(num_pos_tokens)
	D.append(num_neg_tokens)
	D.append(maximal_sentiment)
	D.append(posSentimentSum)
	D.append(negSentimentSum)
	
	return D
def remove_nonSentiWord(word):
    shouldInclude = False
    if len(list(swn.senti_synsets(word))) == 0:
        # print word, " not in sentiWordNet"
        if len(list(wn.synsets(word))) == 0:
            # print word, " not in wordNet"
            shouldInclude = False
        else:
            shouldInclude = True
    else:
        synSet = list(swn.senti_synsets(word))
        # print "Word: ", word
        for item in synSet:
            # print "+ ", item.pos_score(), " - ", item.neg_score(), " Neutral ", item.obj_score()
            if item.pos_score() > sentiWordNet_ThreshHold or item.neg_score() > sentiWordNet_ThreshHold:
                shouldInclude = True
                break
    return shouldInclude
 def rem_nonevaluative(self):
     for key, value in self.wdict.items():
         scores = swn.senti_synsets(key, 'a')
         if not list(scores):
             del self.wdict[key]
             continue
         score = list(scores)[0]
         objectivity = score.obj_score()
         if objectivity >= 0.5:
             del self.wdict[key]
示例#23
0
def sentianalysis_sentences(text):
    # This function takes a string and counts the sentiment score for each word in sentence,
    # obtain the sentence sentiment score by dividing the sum of each word's sentiment score by 
    # number of words in that sentence, then averages the sentiment scores of each sentence, 
    # after that, return the sentence count and the average sentiment scores.
    try:
        if type(text) == str:
            text_lower = text.lower()
            words = nltk.Text(word_tokenize(text_lower))
            sentences = nltk.Text(sent_tokenize(text_lower))
            tokens_sentence = [nltk.word_tokenize(sentence) for sentence in sentences]
            taggedlist=[]
            for token in tokens_sentence:
                taggedlist.append(nltk.pos_tag(token))

            wnl = nltk.WordNetLemmatizer()
            score_list=[]

            for index1,taggedsentence in enumerate(taggedlist):
                score_list.append([])
                for index2,taggedtoken in enumerate(taggedsentence):
                    # Correct the tags to the form which sentiwordnet recognizes
                    newtag=''
                    lemmatized_token=wnl.lemmatize(taggedtoken[0])
                    if taggedtoken[1].startswith('NN'):
                        newtag='n'
                    elif taggedtoken[1].startswith('JJ'):
                        newtag='a'
                    elif taggedtoken[1].startswith('V'):
                        newtag='v'
                    elif taggedtoken[1].startswith('R'):
                        newtag='r'
                    else:
                        newtag=''       
                    if(newtag!=''):    
                        synsets = list(swn.senti_synsets(lemmatized_token, newtag))

                        #Getting average of all possible sentiments
                        score=0
                        if(len(synsets)>0):
                            for syn in synsets:
                                score+=syn.pos_score()-syn.neg_score()
                            score_list[index1].append(score/len(synsets))

            sentence_sentiment=[]
            for score_sentence in score_list:
                if len(score_sentence) != 0:
                    sentence_sentiment.append(sum([word_score for word_score in score_sentence])/len(score_sentence))
            
            return(len(sentence_sentiment), sum(score_sentence for score_sentence in sentence_sentiment)/len(sentence_sentiment))
        else:
            print("The input text is not a string, please check agian!")
            return(0,0)
    except:
        return(0,0)
示例#24
0
def word_score(word,tag):
	if(dick.has_key(word)):
		score = dick[word]
	else:
		if tag == "k":
			all_words = swn.senti_synsets(word)
		else:
			all_words = swn.senti_synsets(word,tag)
		score = [0.0, 0.0, 0.0]
		for words in all_words:
			score[0] = score[0] + words.pos_score()
			score[1] = score[1] + words.neg_score()
			score[2] = score[2] + words.obj_score()
		if(len(all_words) > 0):
			score[0] = score[0] / len(all_words)
			score[1] = score[1] / len(all_words)
			score[2] = score[2] / len(all_words)
		dick[word] = score
	#print (word, " : ", score)
	return score
 def calc(self):
     keys = []
     for key in self.all_words:
         scores = swn.senti_synsets(key, 'a')
         if not list(scores):
             continue
         score = list(scores)[0]
         objectivity = score.obj_score()
         if objectivity < 0.5:
             keys.append(key)
     self.keys = keys
def sentiwordnet_values(word):
    value = (swn.senti_synsets(word))
    value_list = list(value)
    Positive_Average = 0
    Negative_Average = 0
    if (len(value_list) > 0):
        Positive_Average += value_list[0].pos_score()
        Negative_Average += value_list[0].neg_score()
    averages = list()
    averages.append(Positive_Average+1)
    averages.append(Negative_Average+1)
    return averages
    def get_word_primary_score(cls, word_pair):
        # firstly checking the special words
        special_words = cls.read_special_word()
        if word_pair[0] in special_words:
            return special_words[word_pair[0]]
        # not in sentiwordnet then return 0
        synsets = swn.senti_synsets(word_pair[0], word_pair[1])
        if not len(synsets):
            return 0

        score_list = [synset.pos_score() - synset.neg_score() for synset in synsets]
        return sum(score_list) / float(len(score_list))
def get_avg_sentiment(word,objectivity=None):
    syns=swn.senti_synsets(word)
    pos=0.0
    neg=0.0
    if syns:
        if objectivity ==1:
            pos=np.mean([x._pos_score*x._obj_score for x in syns])
            neg=np.mean([x._neg_score*x._obj_score for x in syns])
        else:
            pos=np.mean([x._pos_score for x in syns])
            neg=np.mean([x._neg_score for x in syns])
    return (pos,neg)
def analyze_sentiment_sentiwordnet_lexicon(review,
                                           verbose=False):
    # pre-process text
    review = normalize_accented_characters(review)
    review = html_parser.unescape(review)
    review = strip_html(review)
    # tokenize and POS tag text tokens
    text_tokens = nltk.word_tokenize(review)
    tagged_text = nltk.pos_tag(text_tokens)
    pos_score = neg_score = token_count = obj_score = 0
    # get wordnet synsets based on POS tags
    # get sentiment scores if synsets are found
    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and swn.senti_synsets(word, 'n'):
            ss_set = swn.senti_synsets(word, 'n')[0]
        elif 'VB' in tag and swn.senti_synsets(word, 'v'):
            ss_set = swn.senti_synsets(word, 'v')[0]
        elif 'JJ' in tag and swn.senti_synsets(word, 'a'):
            ss_set = swn.senti_synsets(word, 'a')[0]
        elif 'RB' in tag and swn.senti_synsets(word, 'r'):
            ss_set = swn.senti_synsets(word, 'r')[0]
        # if senti-synset is found        
        if ss_set:
            # add scores for all found synsets
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1
    
    # aggregate final scores
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_obj_score = round(float(obj_score) / token_count, 2)
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        # to display results in a nice table
        sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score,
                                         norm_pos_score, norm_neg_score,
                                         norm_final_score]],
                                         columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Objectivity',
                                                                       'Positive', 'Negative', 'Overall']], 
                                                              labels=[[0,0,0,0,0],[0,1,2,3,4]]))
        print sentiment_frame
        
    return final_sentiment
示例#30
0
def assess_polarity1(tokens):
    """
    sum polarities and divide by number of tokens
    :param tokens:
    :return: scores
    """
    polarity = 0.0
    for token in tokens:
        syn = list(swn.senti_synsets(token))
        if syn:
            syn = syn[0]
            polarity += syn.neg_score()
            polarity += syn.pos_score()
    return polarity / len(tokens)
示例#31
0
def extract_senti_wordnet(text):
	#pos_score = 0
	#neg_score = 0
	overall_score = 0
	for word in text:
		synsets = swn.senti_synsets(word.decode('utf-8', 'ignore'))
		for synonym in list(synsets):
			#print synonym.pos_score
			#pos_score += synonym.pos_score()
			#neg_score += synonym.neg_score()
			overall_score += (synonym.pos_score() - synonym.neg_score())
	overall_score /= len(text)
	#neg_score /= len(text)
	return {'senti_score': overall_score}
    def tag_sentence(self, sentence, tag_with_lemmas=False):
        tag_sentence = []

        for (word, lemma, postag) in sentence:
            tag = self.wordnet_pos_code(postag[0])
            senti_word = swn.senti_synsets(word)
            pos_score = 0.0
            neg_score = 0.0
            for sw in senti_word:
                pos_score += sw.pos_score()
                neg_score += sw.neg_score()
            s = (word, lemma, postag, pos_score, neg_score)
            tag_sentence.append(s)
        return tag_sentence
示例#33
0
    def get_synsets(self, token):
        stopwords = nltk.corpus.stopwords.words('english')
        lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES)
        stop_poss = [
            'PUNCT', 'X', 'SYM', 'CONJ', 'ADP', 'PART', 'SPACE', 'INTJ'
        ]

        if token.text in stopwords or token.pos_ in stop_poss:
            return []

        swn_pos = self.get_swn_pos(token.pos_)
        lemma = lemmatizer(token.text, token.pos_)

        return list(swn.senti_synsets(lemma[0], pos=swn_pos))[:5]
示例#34
0
def forth_feature(vocab_of_total_words,X_set): #4th feature
 words_polarity_standard=[]
 for word in vocab_of_total_words: #for each word in the list vocabulary 
     try: #putting it in the try block because there can be a case when the word is not present in the sentilist
        synset=list(swn_word.senti_synsets(word))
        common_meaning=synset[0]
        
        if common_meaning.pos_score()>common_meaning.neg_score():
            
            weight=common_meaning.pos_score()
        
        elif common_meaning.pos_score()<common_meaning.neg_score():
            
            weight=-common_meaning.neg_score()
            
        else:
            weight=0
     except:
        
        weight=0
    
     words_polarity_standard.append(weight)
     
 no_of_pos_and_neg=np.zeros((len(X_set), 2),dtype=int)
   
 count=0

 for row in X_set:  


    words_polarity_standard_array=np.array(words_polarity_standard)
   
    positive_word_count=0
    negative_word_count=0
    
    weights_of_all_words_in_review=np.multiply(row,words_polarity_standard_array)
    
    for a_word in weights_of_all_words_in_review:
        
        if a_word>0:
            positive_word_count=positive_word_count+1
        if a_word<0:
            negative_word_count=negative_word_count+1    
    
    no_of_pos_and_neg[count][0]=positive_word_count
    no_of_pos_and_neg[count][1]=negative_word_count
    
    count=count+1

 return no_of_pos_and_neg
示例#35
0
    def _initialize_(self, reviews, saveAs=None, saveOverride=False):
        """
        wordOccuranceMatrix: numDocs x vocabSize matrix encoding the
        bag of words representation of each document
        """
        self.wordOccuranceMatrix = self.processReviews(reviews, saveAs,
                                                       saveOverride)
        numDocs, vocabSize = self.wordOccuranceMatrix.shape

        # Pseudocounts
        self.n_dt = np.zeros((numDocs, self.numTopics))
        self.n_dts = np.zeros((numDocs, self.numTopics, self.numSentiments))
        self.n_d = np.zeros((numDocs))
        self.n_vts = np.zeros((vocabSize, self.numTopics, self.numSentiments))
        self.n_ts = np.zeros((self.numTopics, self.numSentiments))
        self.topics = {}
        self.sentiments = {}
        self.priorSentiment = {}

        alphaVec = self.alpha * np.ones(self.numTopics)
        gammaVec = self.gamma * np.ones(self.numSentiments)

        for i, word in enumerate(self.vectorizer.get_feature_names()):
            synsets = swn.senti_synsets(word)
            posScore = np.mean([s.pos_score() for s in synsets])
            negScore = np.mean([s.neg_score() for s in synsets])
            if posScore >= 0.1 and posScore > negScore:
                self.priorSentiment[i] = 1
            elif negScore >= 0.1 and negScore > posScore:
                self.priorSentiment[i] = 0

        for d in range(numDocs):

            topicDistribution = sampleFromDirichlet(alphaVec)
            sentimentDistribution = np.zeros(
                (self.numTopics, self.numSentiments))
            for t in range(self.numTopics):
                sentimentDistribution[t, :] = sampleFromDirichlet(gammaVec)
            for i, w in enumerate(word_indices(
                    self.wordOccuranceMatrix[d, :])):
                t = sampleFromCategorical(topicDistribution)
                s = sampleFromCategorical(sentimentDistribution[t, :])

                self.topics[(d, i)] = t
                self.sentiments[(d, i)] = s
                self.n_dt[d, t] += 1
                self.n_dts[d, t, s] += 1
                self.n_d[d] += 1
                self.n_vts[w, t, s] += 1
                self.n_ts[t, s] += 1
def analyze_one_word(word):
    """ Findout the pos/neg score of a word. Don't care about the part
        of speech for now. Just pick the first synset found.
    """
    pos_score = 0.0
    neg_score = 0.0
    try:
        synset = swn.senti_synsets(word)[0]
        pos_score = synset.pos_score()
        neg_score = synset.neg_score()
    except:
        pass

    return pos_score, neg_score
    def findSentiment(self,token,pos_tag):
        #find average score for alla pos tags
        if self.average:
            synsets = s.senti_synsets(token)
        else: #find score for specific pos tag
            try:
                synsets = s.senti_synsets(token,pos_tag.lower())
            except:
                #catch, if an error occures(if there is not such a pos tag)
                synsets = s.senti_synsets(token)

        if len(synsets)>0 :
            #calculate score for all sentimens(neutral,positive,negative)
            neutral = 0
            positive = 0
            negative = 0

            #average score of all synsets (allagi an vrethei kaliteros tropos)
            for synset in synsets:
                neutral += synset.obj_score()
                positive += synset.pos_score()
                negative += synset.neg_score()

            neutral = neutral/float(len(synsets))
            positive = positive/float(len(synsets))
            negative = negative/float(len(synsets))

            #return sentiment with max score
            if max(neutral,positive,negative) == neutral :
                return neutral
            elif max(neutral,positive,negative) == positive :
                return positive
            else:
                return negative

        else:
            return 0
示例#38
0
def get_sent_max_pos_neg(corpus):
	for curr_sentence in corpus:
		curr_sentence = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', curr_sentence)
		curr_sentence = re.sub(r'@[a-zA-Z0-9_]+', ' ', curr_sentence)
		sentence_tokenized = tokenizer(curr_sentence)
		sent_list = []
		for i in range(len(sentence_tokenized)-2):
			curr_word = sentence_tokenized[i]
			next_word = sentence_tokenized[i+1]
			next_next = sentence_tokenized[i+2]
			if curr_word.startswith('#'):
				curr_word = curr_word[1:]
			if next_word.startswith('#'):
				next_word = next_word[1:]
			if next_next.startswith('#'):
				next_next = next_next[1:]
			curr_senti_synsets = swn.senti_synsets(curr_word)
			next_senti_synsets = swn.senti_synsets(next_word)
			next_next_senti_synsets = swn.senti_synsets(next_next)
			if len(curr_senti_synsets) > 0 and len(next_senti_synsets) > 0 and len(next_next_senti_synsets) > 0:
				curr_pos = curr_senti_synsets[0].pos_score()
				curr_neg = curr_senti_synsets[0].neg_score()
				next_pos = next_senti_synsets[0].pos_score()
				next_neg = next_senti_synsets[0].neg_score()
				next_next_pos = next_next_senti_synsets[0].pos_score()
				next_next_neg = next_next_senti_synsets[0].neg_score()
				max_pos = max(curr_pos, next_pos, next_next_pos)
				max_neg = max(curr_neg, next_neg, next_next_neg)
				curr_sent = max_pos - max_neg
				if curr_sent != 0:
					sent_list.append(curr_sent)
		if len(sent_list) > MAX_LEN:
			sent_list = sent_list[:MAX_LEN]
		for i in range(MAX_LEN - len(sent_list)):
			sent_list.append(0.0)
		inp_X.append(sent_list)
	return inp_X
def get_score(adjective):
    if adjective not in d:
        scores = swn.senti_synsets(adjective)
        pos_scores = [i.pos_score() for i in scores]
        neg_scores = [i.neg_score() for i in scores]
        obj_scores = [i.obj_score() for i in scores]
        pos_score = maxi(pos_scores)
        neg_score = maxi(neg_scores)
        obj_score = maxi(obj_scores)
        if len(scores) == 0:
            d[adjective] = (-1, -1, -1, -1, -1, -1)
            return (-1, -1, -1, -1, -1, -1)

        scores_ad = swn.senti_synsets(adjective, pos='ar')
        pos_scores_ad = [i.pos_score() for i in scores_ad]
        neg_scores_ad = [i.neg_score() for i in scores_ad]
        obj_scores_ad = [i.obj_score() for i in scores_ad]
        pos_score_ad = maxi(pos_scores_ad)
        neg_score_ad = maxi(neg_scores_ad)
        obj_score_ad = maxi(obj_scores_ad)
        d[adjective] = (pos_score, neg_score, obj_score, pos_score_ad,
                        neg_score_ad, obj_score_ad)
    else:
        (pos_score, neg_score, obj_score, pos_score_ad, neg_score_ad,
         obj_score_ad) = d[adjective]
    if pos_score == -1:
        return -100
    if pos_score_ad > neg_score_ad:
        return pos_score_ad
    elif pos_score_ad < neg_score_ad:
        return -neg_score_ad
    elif pos_score > neg_score:
        return pos_score
    elif pos_score < neg_score:
        return -neg_score
    else:
        return 0
示例#40
0
def get_score(sentense_list):
    for sentence in sentense_list:
        filtered_words = []
        sentence = sentence.lower()  # 소문자로
        retokenize = RegexpTokenizer("[\w]+")
        split_sentence = retokenize.tokenize(sentence)  # 토큰 단위로 자르기
        filtered_sentence = [w for w in split_sentence
                             if not w in stop_words]  # 정지단어 필터링
        wnl = nltk.WordNetLemmatizer()  # 원형 복원

        for f in filtered_sentence:
            filtered_words.append(wnl.lemmatize(str(f)))  # 원형 복원
        pos_tagged = nltk.pos_tag(filtered_words)

        positive_score = 0.0
        negative_score = 0.0
        synonym_avg = []

        for pos_word in pos_tagged:
            if pos_word[1].startswith('N'):  # 명사
                newtag = 'n'
            elif pos_word[1].startswith('J'):  #형용사
                newtag = 'a'
            elif pos_word[1].startswith('V'):  # 동사
                newtag = 'v'
            elif pos_word[1].startswith('R'):
                newtag = 'r'
            else:
                newtag = ''
            if newtag != '':  # pos가 존재하면
                synsets = list(swn.senti_synsets(pos_word[0], newtag))
                if (len(synsets) > 0):
                    positive_score += sum([s.pos_score()
                                           for s in synsets]) / len(synsets)
                    negative_score += sum([s.neg_score()
                                           for s in synsets]) / len(synsets)

        if (len(pos_tagged) > 0):
            positive_score /= len(pos_tagged)
            negative_score /= len(pos_tagged)

            if positive_score > negative_score:
                print("positive")
            elif positive_score < negative_score:
                print("negative")
            else:
                print("objective")
        else:
            print("objective")
def SentimentAnalysis_RGO_Belief_Propagation_MarkovRandomFields(nxg):
    #The RGO undirected graph is factored into maximal cliques and
    #sum potentials of clique nodes are construed as probabilities
    #which are productized
    empath = []
    nxg_undirected = nxg.to_undirected(nxg)
    clique = list(nx.find_cliques(nxg_undirected))
    clique_potential_product_pos = 1.0
    clique_potential_product_neg = 1.0
    clique_potential_product_obj = 1.0
    for c in clique:
        clique_potential_pos = 0.0
        clique_potential_neg = 0.0
        clique_potential_obj = 0.0
        print "clique:", c
        for v in c:
            if SentimentScoringAlgorithm == "Empath":
                pos = 0.0001
                neg = 0.0001
                empath_dict = lexicon.analyze(v.decode("utf-8"))
                empath_list = sorted(empath_dict.items(),
                                     key=operator.itemgetter(1),
                                     reverse=True)
                #print "empath_dict:",empath_dict
                obj = empath_list[0][1]
                if obj == 0.0:
                    obj = 0.00001
            else:
                sset = swn.senti_synsets(v.decode("utf-8"))
                if len(sset) > 0:
                    pos = float(sset[0].pos_score())
                    neg = float(sset[0].neg_score())
                    obj = float(sset[0].obj_score())
                    if pos == 0.0:
                        pos = 0.00001
                    if neg == 0.0:
                        neg = 0.00001
                    if obj == 0.0:
                        obj = 0.00001
            clique_potential_pos += pos
            clique_potential_neg += neg
            clique_potential_obj += obj
        clique_potential_product_pos *= float(clique_potential_pos)
        clique_potential_product_neg *= float(clique_potential_neg)
        clique_potential_product_obj *= float(clique_potential_obj)
    lenclique = len(clique)
    if lenclique == 0:
        lenclique = 1
    return clique_potential_product_pos / lenclique, clique_potential_product_neg / lenclique, clique_potential_product_obj / lenclique
def get_sentiment_wordnet(word):
#     word = 'crazy'
    list_senti = list(swn.senti_synsets(word))
    sum_pos = 0; sum_neg = 0
    
    for senti in list_senti:
        sum_pos += senti.pos_score(); sum_neg += senti.neg_score()
        

    avg_pos = sum_pos * 1.0 / len(list_senti) if len(list_senti) > 0 else 0
    avg_neg = sum_neg * 1.0 / len(list_senti) if len(list_senti) > 0 else 0
    
    print(''.join([word, ' (', str(round(avg_pos, 3)), ', ', str(round(avg_neg, 3)), ')']))  
          
    return avg_pos, avg_neg     
示例#43
0
def average_polarity(input_word):
    words = list(swn.senti_synsets(input_word))
    output_polarity = []
    if len(words) == 1:
        this_word = words[0]
        return this_word.pos_score() - this_word.neg_score()
    for this_word in words:
        usage = int(this_word.synset.name()[-2:])
        if usage < 3:
            output_polarity.append(this_word.pos_score() -
                                   this_word.neg_score())
    if len(output_polarity) == 0:
        return 0
    else:
        return np.mean(output_polarity)
def polarity_score_2(word, pos):
    pos_arr = []
    neg_arr = []
    neut_arr = []
    if len(swn.senti_synsets(word, pos)) == 0:
        return 0.0
    for s in swn.senti_synsets(word, pos):
        pos_arr.append(s.pos_score())
        neg_arr.append(s.neg_score())
        neut_arr.append(s.obj_score())
    pos = round(np.mean(np.array(pos_arr)), 2)
    neg = round(np.mean(np.array(neg_arr)), 2)
    subj = round(np.mean(np.array(neut_arr)), 2)
    #return "%s,%s,%s,%s" %(word,pos,neg,subj)
    #print "%s,%s,%s,%s" %(word,pos,neg,subj)
    if pos > neg:
        return pos
    elif neg > pos:
        #return float('-' + str(neg))
        return neg * -1.0
    elif pos == 0.0 and neg == 0.0:
        return 0.0
    else:
        return 0.0
示例#45
0
def get_positive_negative_feature(tokens, normalization_function):
    features_count_by_sentiment = {}
    features_count_by_sentiment[0] = 0
    features_count_by_sentiment[1] = 0
    for token in tokens:
        token = normalization_function(token)
        token_analyzed = swn.senti_synsets(token)
        list_token_analyzed = list(token_analyzed)
        if token_analyzed and (len(list_token_analyzed) > 0):
            token_sentiments = list_token_analyzed[0]
            if token_sentiments._pos_score > token_sentiments._neg_score:
                features_count_by_sentiment[1] = features_count_by_sentiment[1] + 1
            elif token_sentiments._pos_score < token_sentiments._neg_score:
                features_count_by_sentiment[0] = features_count_by_sentiment[0] + 1
    return features_count_by_sentiment
def LexiconScore(bow,postag):
    lexicalPostag = ['NN','NNP','NND','MD','JJ','RB']
    lexiconScore = {}
    translator = Translator()
    allWord = set(wn.all_lemma_names())
    for i in range(len(bow)):
        if bow[i] not in lexiconScore:
            englishWord = translator.translate(bow[i], dest='en')
            word = englishWord.text
            score = 0
            if word in allWord:
                if postag[i] in lexicalPostag:
                    if (postag[i] == 'NN' or postag[i] == 'NNP'or postag[i] == 'NND'):
                        allsyn = list(swn.senti_synsets(word, 'n'))
                        if len(allsyn) != 0:
                            lexicon = swn.senti_synset(word+'.n.01')
                            score = lexicon.pos_score() - lexicon.neg_score()
                    elif postag[i] == 'VB' or postag[i] == 'MD':
                        allsyn = list(swn.senti_synsets(word, 'v'))
                        if len(allsyn) != 0:
                            lexicon = swn.senti_synset(word+'.v.01')
                            score = lexicon.pos_score() - lexicon.neg_score()
                    elif postag[i] == 'JJ':
                        allsyn = list(swn.senti_synsets(word, 'a'))
                        if len(allsyn) != 0:
                            lexicon = swn.senti_synset(word+'.a.01')
                            score = lexicon.pos_score() - lexicon.neg_score()
                    elif postag[i] == 'RB':
                        allsyn = list(swn.senti_synsets(word, 'r'))
                        if len(allsyn) != 0:
                            lexicon = swn.senti_synset(word+'.r.01')
                            score = lexicon.pos_score() - lexicon.neg_score()
                lexiconScore[bow[i]] = score
            else:
                lexiconScore[bow[i]] = 0
    return lexiconScore
示例#47
0
def superNaiveSentimentAnalysis(review):
    reviewpolarity = 0.0
    numExceptions = 0.0
    for word in review.lower().split():
        weight = 0.0
        try:
            common_meaning = list(swn.senti_synsets(word))[0]
            if common_meaning.pos_score() > common_meaning.neg_score():
                weight = weight + common_meaning.pos_score()
            elif common_meaning.pos_score() < common_meaning.neg_score():
                weight = weight - common_meaning.neg_score()
        except:
            numExceptions = numExceptions + 1
        reviewpolarity = reviewpolarity + weight
    return reviewpolarity
示例#48
0
def tweetToSWNVector(word):
    vec = np.zeros(3)
    pos_score, neg_score, obj_score = 0, 0, 0
    l = list(swn.senti_synsets('21'))
    try:
        pos_score += l[0].pos_score()
        neg_score -= l[0].neg_score()
        obj_score += l[0].obj_score()              
    except IndexError:
        pos_score += 0.0
        neg_score -= 0.0
        obj_score += 0.0                       
    
    vec[0], vec[1], vec[2] = pos_score, neg_score, obj_score
    return vec
def findscore(unimportant_tokens):
    tokens = find_imp_words(unimportant_tokens)
    positive_score = 0.0
    negative_score = 0.0
    objective_score = 0.0
    valence = [0.0 for i in range(len(tokens))]
    i = 0
    for word in tokens:
        positive_word_score = 0
        negative_word_score = 0
        for item in list(swn.senti_synsets(word)):
            positive_word_score = positive_word_score + (item.pos_score())
            negative_word_score = negative_word_score + (item.neg_score())

        if (len(list(swn.senti_synsets(word))) != 0):
            positive_word_score = (positive_word_score /
                                   len(list(swn.senti_synsets(word))))
            negative_word_score = (negative_word_score /
                                   len(list(swn.senti_synsets(word))))

        valence[i] = (positive_word_score - negative_word_score)
        i = i + 1

    return tokens, valence
def SentimentAnalysis_SentiWordNet(text):
    tokens = text.encode("utf-8").split()
    sumposscore = 0.0
    sumnegscore = 0.0
    sumobjscore = 0.0
    for t in tokens:
        sset = swn.senti_synsets(t.decode("utf-8"))
        if len(sset) > 0:
            negscore = sset[0].neg_score()
            posscore = sset[0].pos_score()
            objscore = sset[0].obj_score()
            sumposscore += posscore
            sumnegscore += negscore
            sumobjscore += objscore
    return (sumposscore, sumnegscore, sumobjscore)
示例#51
0
def english_sentiments(word):
    pscore = 0
    nscore = 0
    ll = 0
    for wt in ['n', 'v', 'r', 'a']:
        z = list(swn.senti_synsets(word, wt))
        if len(z) > 0:
            p, n = z[0].pos_score(), z[0].neg_score()
            pscore += p
            nscore += n
            ll + 1
    if ll > 0:
        pscore = float(pscore) / ll
        nscore = float(nscore) / ll
    return pscore, nscore
def compute_sub_posneg(X):
    print("sub & posneg")
    x_subjectivity=[]
    x_posneg=[]
    for sentence in X:
        taggedsentence = []
        obj_score = 0.0
        p_count = 0.0
        n_count = 0.0
        taggedsentence.append(tagger.tag(sentence.split()))
        wnl = nltk.WordNetLemmatizer()
        for idx, words in enumerate(taggedsentence):
            for idx2, t in enumerate(words):
                newtag = ''
                lemmatizedsent = wnl.lemmatize(t[0])
                if t[1].startswith('NN'):
                    newtag = 'n'
                elif t[1].startswith('JJ'):
                    newtag = 'a'
                elif t[1].startswith('V'):
                    newtag = 'v'
                elif t[1].startswith('R'):
                    newtag = 'r'
                else:
                    newtag = ''
                if (newtag != ''):
                    synsets = list(swn.senti_synsets(lemmatizedsent, newtag))
                    score = 0.0
                    obj_wordscore = 0.0
                    if (len(synsets) > 0):
                        for syn in synsets:
                            score += syn.pos_score() - syn.neg_score()
                            obj_wordscore +=syn.obj_score()
                           # print(syn.pos_score, syn.neg_score())
                        score = score / len(synsets)
                        if(score>=0):
                            p_count +=1
                        else:
                            n_count +=1
                        obj_score += obj_wordscore / len(synsets)
                        #print(t, p_count, n_count, obj_score)
        if(n_count==0):
            n_count=1
        x_subjectivity.append(p_count/n_count)
        x_posneg.append(obj_score)
    x_subjectivity = [float(i)/max(x_subjectivity) for i in x_subjectivity]
    x_posneg = [float(i)/max(x_posneg) for i in x_posneg]
    return x_subjectivity, x_posneg
def main():
    args = sys.argv
    if (len(args) != 2):
        print('usage: python proj filename')
        return -1
    lines = []
    values = []
    data = open(args[1], 'r')
    for line in data:
        temp = line.split('.')
        for sen in temp:
            tokens = pos_tag(
                word_tokenize(sen.strip('\n').strip(',').strip('-')))
            if tokens != []:
                lines.append(tokens)
    total_pos = 0
    total_neg = 0

    for line in lines:
        pos = 0.0
        neg = 0.0
        count = 0
        for word in line:
            tag = 'n'
            if (word[1] == 'VB'):
                tag = 'v'
            if (word[1] == 'JJ'):
                tag = 'a'
            if (word[1] == 'RB'):
                tag = 'r'
            x = swn.senti_synsets(word[0], tag)
            if (x != []):
                for a in x:
                    pos += a.pos_score()
                    neg += a.neg_score()
                count += 1
        if (pos + neg > 0):
            values.append((pos / (pos + neg), neg / (pos + neg)))
            total_pos += pos
            total_neg += neg
        else:
            values.append((0, 0))

    print(
        str(total_pos / (total_pos + total_neg)) + ',' +
        str(total_neg / (total_pos + total_neg)))
    for x in range(0, len(lines)):
        print(str(values[x][0]) + ',' + str(values[x][1]))
示例#54
0
def CrearSolucion(request):

    if request.method == 'POST':

        length = request.POST.get('length')
        letras = request.POST.get('letters')
        print(letras)

        f = open("combinations.txt", 'w')
        k = list(itertools.permutations(letras, int(length)))

        for i in k:
            for l in i:
                f.write(l)
            f.write('\n')
        f.close()

        h = []
        with open('combinations.txt') as hai:
            h = [word.lower().strip() for word in hai]

        dicionario = {}
        for o in h:
            if o not in dicionario:
                dicionario[o] = 0
            else:
                dicionario[o] += 1

        lista = []

        for l in dicionario:
            v = list(swn.senti_synsets(l))
            if v:
                lista.append(l)

        print(lista)

        datos = {}

        datos['length'] = length
        datos['palabras'] = letras
        datos['lista'] = lista

        return HttpResponse(json.dumps(datos), content_type='application/json')

    else:
        return HttpResponse(json.dumps({'error': 'error'}),
                            content_type='application/json')
    def runLexicalAnalysis(self, file):
        lemmatizer = WordNetLemmatizer()
        individual_scores = []
        multiple_scores = []
        # warning = False

        for tag in file:
            # if ("not" or "n't" in tag[0].lower()) and (not warning):
            #   self.output += "*** WARNING NEGATION DETECTED ***" + "\n" + \
            #                  "Lexical approach may not handle negation well. " + \
            #                  "As a result, this sentiment score may not be accurate" + "\n\n"
            #   warning = True

            lemma = lemmatizer.lemmatize(tag[0])
            if tag[1].startswith('NN'):
                syntag = 'n'
            elif tag[1].startswith('JJ'):
                syntag = 'a'
            elif tag[1].startswith('V'):
                syntag = 'v'
            elif tag[1].startswith('RB'):
                syntag = 'r'
            else:
                syntag = ''

            if syntag and syntag != '':
                try:
                    synset = sentiwordnet.senti_synset(lemma + "." + syntag +
                                                       ".01")
                    score = synset.pos_score() - synset.neg_score()
                    individual_scores.append(score)
                except WordNetError:
                    pass

                score = 0
                synsets = list(sentiwordnet.senti_synsets(lemma, syntag))
                if len(synsets) > 0:
                    for syn in synsets:
                        score += syn.pos_score() - syn.neg_score()

                    multiple_scores.append(score / len(synsets))

        individual_score = self.standardizeScores(sum(individual_scores))
        multiple_score = self.standardizeScores(sum(multiple_scores))

        self.output += "Individual Synset Score: " + "{0:.4f}".format(individual_score) + "\n" + \
                       "Multiple Synset Score: " "{0:.4f}".format(multiple_score) + "\n" + \
                       "-------------------------------------------------------------\n"
示例#56
0
    def superNaiveSentiment(self, review):
        reviewPolarity = 0.0
        exc = 0

        for word in review.lower().split():
            weight = 0.0
            try:
                common_meaning = list(swn.senti_synsets(word))[0]
                if common_meaning.pos_score() > common_meaning.neg_score():
                    weight = weight + common_meaning.pos_score()
                elif common_meaning.pos_score < common_meaning.neg_score():
                    weight = weight - common_meaning.neg_score()
            except:
                exc = exc + 1
            reviewPolarity = reviewPolarity + weight
        return reviewPolarity
示例#57
0
def GetWordSentimentScore(taggedword):
    wordtype = GetWordType(taggedword)
    if not wordtype: #if the word type is not included in SentiSynSet
        return False #there exists no sentiment score
    wordscore = swn.senti_synsets(taggedword[0], wordtype)
    pos, neg = 0.0, 0.0
    count = 0
    for val in wordscore:
        pos = pos + val.pos_score()
        neg = neg + val.neg_score() 
        count += 1
    if(pos == 0.0 and neg == 0.0):
        return False #there exists no sentiment score
    else:
        #print('%10s' % taggedword[0],'\t', wordtype, '\t\t', (pos, neg))
        return {'pos':pos, 'neg':neg}   
示例#58
0
def superNaiveSentiment(review):
    reviewPolarity = 0.0
    numExceptions = 0
    for word in review.lower().split():
        weight = 0.0
        try:
            common_meaning = swn.senti_synsets(word)[0]
            if common_meaning.pos_score() > common_meaning.neg_score():
                weight = weight + common_meaning.pos_score()
            elif common_meaning.pos_score() < common_meaning.neg_score():
                weight = weight - common_meaning.neg_score()
        except:
            numExceptions = numExceptions + 1
        #print "Word: " + word + " weight: " + str(weight)
        reviewPolarity = reviewPolarity + weight
    return reviewPolarity
示例#59
0
def check_sentiment(doc):
    for token in doc:
        senti_texts = list(swn.senti_synsets(token.text))
        if senti_texts:
            if any([
                    sum([st.pos_score()
                         for st in senti_texts[:5]]) / len(senti_texts[:5]) >
                    0.5,
                    sum([st.neg_score()
                         for st in senti_texts[:5]]) / len(senti_texts[:5]) >
                    0.5,
            ]):

                return 1

    return 0
示例#60
0
 def get_sentiment_value(self, word, partofspeech):
     try:
         return self.exception_sentim_values[word]
     except KeyError:
         sets_count = 0
         sentiment_value = 0
         for sentisynset in swn.senti_synsets(word, partofspeech):
             sets_count += 1
             sentiment_value += sentisynset.pos_score(
             )  # * sentisynset.obj_score()
             sentiment_value -= sentisynset.neg_score(
             )  # * sentisynset.obj_score()
         if sets_count == 0:
             return 0
         else:
             return sentiment_value / sets_count