Exemplo n.º 1
0
def objectivity(text):
    objectivity_score = 0  #score to determine if the file is objective or not
    pairs = pos_tag(text)  #list of tuples of the words from a text file
    pairs = dict(pairs)  #dictionary of pairs
    for item in text:
        # Adjectives in POS tag are JJ and in synsets are A:
        if pairs[item][0].lower() == 'j':
            synset = wn.synsets(item, 'a')
            # In case the synset does exist
            if synset != []:
                synset = synset[0]
                sentiSynset = swn.senti_synset(synset.name())
                objectivity_score = objectivity_score + sentiSynset.pos_score(
                ) - sentiSynset.neg_score()

        # Adverbs in POS tag are RB and in synsets are R:
        elif pairs[item][0].lower() == 'r':
            synset = wn.synsets(item, 'r')
            # In case the synset does exist
            if synset != []:
                synset = synset[0]
                sentiSynset = swn.senti_synset(synset.name())
                objectivity_score = objectivity_score + sentiSynset.pos_score(
                ) - sentiSynset.neg_score()
    return objectivity_score
Exemplo n.º 2
0
def getScore(word, pos):
    temp = word + '.' + pos + '.01'
    try:
        return (swn.senti_synset(temp).pos_score(),
                swn.senti_synset(temp).neg_score())
    except:
        return (0, 0)
Exemplo n.º 3
0
def swn_score(text):
    """ Calculate score with sentiwordnet library.
        Return score for sentence.
    """
    score = 0.0

    if text is not None:
        # mark negation
        words = mark_negation(text.split())

        # remove stopwords
        words = [t for t in words if t not in stopwords.words('english')]

        # select sense for each word
        words_sense = {}
        for word in words:
            clean_word = word.replace('_NEG', '')
            if wn.synsets(clean_word):
                words_sense[word] = wn.synsets(clean_word)[0]

        # calculate score
        for word, sense in words_sense.items():
            pos_score = swn.senti_synset(sense.name()).pos_score()
            neg_score = swn.senti_synset(sense.name()).neg_score()
            if '_NEG' in word:
                pos_score, neg_score = neg_score, pos_score
            score += (pos_score - neg_score)
        if len(words_sense) != 0:
            score /= len(words_sense)
    return score
def get_senti_score(sentence):
    token = nltk.word_tokenize(sentence)
    tagged = nltk.pos_tag(token)

    avg_senti_score = 0
    num_senti_words = 0

    for pair in tagged:
        word = pair[0]
        tag = convert_tag(pair[1])
        if tag != '':
            senti_word_1 = word + '.' + tag + '.01'
            senti_word_2 = word + '.' + tag + '.02'
            senti_word_3 = word + '.' + tag + '.03'
            try:
                senti_score_1 = sentiwordnet.senti_synset(senti_word_1)
                senti_score_2 = sentiwordnet.senti_synset(senti_word_2)
                senti_score_3 = sentiwordnet.senti_synset(senti_word_3)
                senti_score_pos = (senti_score_1.pos_score() + senti_score_2.pos_score() + senti_score_3.pos_score()) / 3
                senti_score_neg = (senti_score_1.neg_score() + senti_score_2.neg_score() + senti_score_3.neg_score()) / 3
                avg_senti_score += (senti_score_pos - senti_score_neg)
                num_senti_words += 1
            except:
                avg_senti_score += 0

    if num_senti_words > 0:
        avg_senti_score /= num_senti_words

    if avg_senti_score >= 0:
        adjusted_score = math.ceil(avg_senti_score * 100)
    else:
        adjusted_score = math.floor(avg_senti_score * 100)

    return adjusted_score
Exemplo n.º 5
0
def getSimilarityScore(word1, word2):
    word1synsets = wn.synsets(word1)
    word2synsets = wn.synsets(word2)
    synsetname1 = [wn.synset(str(syns.name())) for syns in word1synsets]
    synsetname2 = [wn.synset(str(syns.name())) for syns in word2synsets]
    pathsimlist = []
    for sset1, sset2 in [(sset1,sset2) for sset1 in synsetname1\
    for sset2 in synsetname2]:
        pathsim = sset1.path_similarity(sset2)
        #wupsim = sset1.wup_similarity(sset2)

        pol1 = 0
        pol2 = 0
        #print sset1.name()
        sense1 = swn.senti_synset(sset1.name())
        if sense1.pos_score() > sense1.neg_score():
            pol1 = 1
        else:
            pol1 = -1

        sense2 = swn.senti_synset(sset2.name())
        if sense2.pos_score() > sense2.neg_score():
            pol2 = 1
        else:
            pol2 = -1

        if pol1 != pol2:
            return pathsimlist

        if pathsim != None:
            pathsimlist.append(pathsim)
            #print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,\
            #"\t",sseta, "\t", ssetb, "\t", sseta.definition(), "\t", ssetb.definition()

    return pathsimlist
Exemplo n.º 6
0
def sentiment_weight(tokens_pos):
    pos_senti = []
    neg_senti = []
    
    for token, pos in tokens_pos:
        logging.debug("Senti: {} {}".format( token, pos))
        if pos in nouns and len(token) > 1:
          for n in [7,6,5,4,3,2,1]:
            try:
                senti_term = swn.senti_synset(token + '.n.' + str(n))
                sentiment_score(pos_senti, neg_senti, senti_term)
                logging.debug("Sentiment Noun 1 {} {} {} {}".format( senti_term, senti_term.pos_score(), senti_term.neg_score(), senti_term.obj_score()))
                #break
            except:
                pass
        
        if pos in adjs and len(token) > 1:
          for n in [7,6,5,4,3,2,1]:
            try:
                senti_term = swn.senti_synset(token + '.a.' + str(n))
                sentiment_score(pos_senti, neg_senti, senti_term)
                logging.debug("Sentiment Adjectives 1 {} {} {} {}".format( senti_term, senti_term.pos_score(), senti_term.neg_score(), senti_term.obj_score()))
                #break
            except:
                pass

        if pos in verbs and len(token) > 1:
          for n in [7,6,5,4,3,2,1]:
            try:
                senti_term = swn.senti_synset(stemmer.stem(token) + '.v.' + str(n))
                sentiment_score(pos_senti, neg_senti, senti_term)
                logging.debug("Sentiment Verbs 1 {} {} {} {}".format( senti_term, senti_term.pos_score(), senti_term.neg_score(), senti_term.obj_score()))
                #break
            except:
                pass

        if pos in adverbs and len(token) > 1:
          for n in [7,6,5,4,3,2,1]:
            try:
                senti_term = swn.senti_synset(stemmer.stem(token) + '.r.' + str(n))
                sentiment_score(pos_senti, neg_senti, senti_term)
                logging.debug("Sentiment Adverbs 1 {} {} {} {}".format( senti_term, senti_term.pos_score(), senti_term.neg_score(), senti_term.obj_score()))
                #break
            except:
                pass
            
    positive = 0
    negative = 0
    pos_sum = np.sum(pos_senti)
    neg_sum = np.sum(neg_senti) 
    
    # normalizing intensity of polarity
    if  np.isnan(pos_sum) == False and np.isnan(neg_sum) == False and pos_sum + neg_sum > 0:
        positive = pos_sum / (pos_sum + neg_sum )
        negative = neg_sum / (pos_sum + neg_sum )

    logging.debug("Weights:  Positive Feedbacks: {} Negative Feedbacks: {} Positive Weight: {} Negative Weight: {}".format(  pos_senti, neg_senti, positive, negative ))
    return positive
Exemplo n.º 7
0
def calc_sentiment_polarity(word_list):
    # tags_dict = {"N": "n", "V": "v", "J": "a", "R": "r"}
    offsets = ["01", "02", "03", "04", "05", "06", "07", "08", "09"]
    tags = ["n", "v", "a", "r"]

    pos_scores = []
    neg_scores = []

    # lemmatizer = WordNetLemmatizer()

    for i, word in enumerate(word_list):
        try:
            word, tag = tuple(word.split("."))
            s = None
            for offset in offsets:
                query = word + "." + tag + "." + offset
                try:
                    s = swn.senti_synset(query)
                    break
                except:
                    continue

            if s is None:
                _tags = list(tags)
                _tags.remove(tag)
                for tag in _tags:
                    for offset in offsets:
                        query = word + "." + tag + "." + offset
                        try:
                            s = swn.senti_synset(query)
                            break
                        except:
                            continue

                    if s is not None:
                        break

            if s is None:
                neg_scores.append(0)
                pos_scores.append(0)
            elif i == 0 or word_list[i - 1] != "not":
                neg_scores.append(s.neg_score())
                pos_scores.append(s.pos_score())
            else:
                neg_scores.append(-s.neg_score())
                pos_scores.append(-s.pos_score())
        except KeyError:
            pass

    if len(pos_scores) > 0:
        sentiment = (np.sum(pos_scores) - np.sum(neg_scores)) / len(pos_scores)
        polarity = np.std(np.append(pos_scores, neg_scores * -1))
    else:
        sentiment = 0
        polarity = 0
    return sentiment, polarity
def polarity_score_2(text_syn_set_list):
    limp =[]
    for s in text_syn_set_list:
        pos_score = swn.senti_synset(s.name()).pos_score()
        neg_score = swn.senti_synset(s.name()).neg_score()
        polarity_score = pos_score - neg_score

        limp.append(polarity_score)
    arr = np.array(limp)
    return round(arr.mean(),2)
Exemplo n.º 9
0
def attachToCausalVerbDict(causalVerb, causalVerbDict):  # helper for attaching
    # a particular causal verb to our dictionary
    givenVerbDictionary = {}
    # extract word of causal verb
    causalVerb = causalVerb.name()
    # add positive, negative, and objective scores to this
    givenVerbDictionary["posScore"] = swn.senti_synset(causalVerb).pos_score()
    givenVerbDictionary["negScore"] = swn.senti_synset(causalVerb).neg_score()
    givenVerbDictionary["objScore"] = swn.senti_synset(causalVerb).obj_score()
    # then attach that dictioanry to the causal verb dict
    causalVerbDict[causalVerb] = givenVerbDictionary
Exemplo n.º 10
0
def search_swn(word):
    try:
        sum_positive = (swn.senti_synset(word + ".a.01")).pos_score() # + (swn.senti_synset(word + ".n.01")).pos_score()
        sum_negative = (swn.senti_synset(word + ".a.01")).neg_score() + (swn.senti_synset(word + ".n.01")).neg_score()

        if sum_positive > sum_negative:
            return sum_positive
        elif sum_positive < sum_negative:
            return sum_negative*(-1)
        return 0
    except:
        return 0
Exemplo n.º 11
0
    def get_sentiment_of_word(word, lemma, pos):

        synsets = wordnet.synsets(word, pos=pos)

        if len(synsets) != 0:

            memorized_synset_01 = None
            check_boolean_01 = False

            memorized_synset_rest = None
            check_boolean_rest = False

            list_of_numbers = [
                '04', '02', '03', '05', '06', '07', '08', '09', '10', '11',
                '12'
            ]

            for synset in synsets:
                synset_split = synset.name().split(".")
                if synset_split[0] == lemma:
                    swn_synset = sentiwordnet.senti_synset(synset.name())
                    pos_score = swn_synset.pos_score()
                    neg_score = swn_synset.neg_score()

                    if pos_score > neg_score:
                        return [1, 0, 0]
                    elif neg_score > pos_score:
                        return [0, 1, 0]
                    else:
                        return [0, 0, 1]
                if synset_split[2] == '01' and not check_boolean_01:
                    memorized_synset_01 = synset
                    check_boolean_01 = True
                elif synset_split[
                        2] in list_of_numbers and not check_boolean_rest:
                    memorized_synset_rest = synset
                    check_boolean_rest = True
            if check_boolean_01:
                synset = memorized_synset_01
            else:
                synset = memorized_synset_rest

            swn_synset = sentiwordnet.senti_synset(synset.name())
            pos_score = swn_synset.pos_score()
            neg_score = swn_synset.neg_score()

            if pos_score > neg_score:
                return [1, 0, 0]
            elif neg_score > pos_score:
                return [0, 1, 0]
            else:
                return [0, 0, 1]
        return [0, 0, 1]
def polarity_score_1(s):
    pos = swn.senti_synset(s.name()).pos_score()
    neg = swn.senti_synset(s.name()).neg_score()
    subj = swn.senti_synset(s.name()).obj_score()
    #print "%s,%s,%s,%s" %(s.name(),pos,neg,subj)
    if pos > neg:
        return pos
    elif neg > pos:
        return neg * -1.0
    elif pos == 0.0 and neg == 0.0:
        return 0.0
    else:
        return 0.0
Exemplo n.º 13
0
    def swn_polarity(text):
        # Return a sentiment: 0 = negative, 1 = positive
        sentiment = 0.0
        tokens_count = 0
        str_text = str(text)
        raw_sentences = sent_tokenize(str_text)
        for raw_sentence in raw_sentences:
            tagged_sentence = pos_tag(word_tokenize(raw_sentence))

            for word, tag in tagged_sentence:
                wn_tag = WordNet.penn_to_word_net(tag)
                if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
                    continue

                lemma = WordNet.lemmatizer.lemmatize(word, pos=wn_tag)
                if not lemma:
                    continue

                synsets = wn.synsets(lemma, pos=wn_tag)
                if not synsets:
                    continue

                synset = synsets[0]
                swn_synset = swn.senti_synset(synset.name())

                sentiment += swn_synset.pos_score() - swn_synset.neg_score()
                tokens_count += 1

        if not tokens_count:
            return 0

        if sentiment >= 0:
            return 1

        return 0
Exemplo n.º 14
0
def get_syn(line: list = [], o: bool = False, s: bool = False) -> float:
    """Return affect, polarity of line.
       Modified via: condor.depaul.edu/ntomuro/courses/NLP594s18
    """
    subsum, ssum = 0.0, 0.0
    to_wnt = (lambda t: t[0].lower() if t[0] != "J" else "a")
    for (w, t) in gramarye.tag_list(line):
        if t[0] in "RVNJR":  # Technically Middleton...
            w = "not" if w == "n't" else w.lower()
            lemma = wn.morphy(w, to_wnt(t))
            if not lemma:
                continue
            synsets = wn.synsets(lemma, to_wnt(t))
            synset = synsets[0]
            swn_synset = swn.senti_synset(synset.name())
            subsum += (1 - swn_synset.obj_score())  # Subjectivity
            ssum += (swn_synset.pos_score() - swn_synset.neg_score())
        else:
            continue
    if o:  # orientation: positive or negative
        return ssum
    elif s:
        return synset.name()
    else:  # 'subjectivity'
        return (subsum / (len(line) or 1.0))
Exemplo n.º 15
0
def filter_sentiment_words(data, stopwords, senti_words):
    """
    Input: single document (string)
    return string contain sentiment word separated with space
    """

    if isinstance(data, list) or isinstance(data, tuple):
        raise TypeError('Must be string')

    collect = []

    for sentence in nt.sent_tokenize(data):
        for word_tag in nt.pos_tag(nt.word_tokenize(sentence)):
            word, tag = word_tag
            if tag in constants.POS_LIST.keys() and (word not in stopwords):
                if word in senti_words:
                    collect.append(stemming(word))
                else:
                    sen_sets = wn.synsets(word,
                                          pos=constants.POS_LIST.get(tag))
                    if sen_sets:
                        a = swn.senti_synset(sen_sets[0].name())
                        if a:
                            if a.obj_score() <= 0.7:
                                collect.append(stemming(word))
    return ' '.join(list(set(collect)))
def swn_polarity(text):
   
    sentiment = 0.0
    
    stop_words=set(stopwords.words("english"))
    raw_sentences = sent_tokenize(text)
    for raw_sentence in raw_sentences:
        tagged_sentence = pos_tag(word_tokenize(raw_sentence))
 
        for word, tag in tagged_sentence:
    
            if word not in stop_words:
        
                wn_tag = penn_to_wn(tag)
                if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV,wn.VERB):
                    continue

                lemma = lemmatizer.lemmatize(word, pos=wn_tag)
                if not lemma:
                    continue

                synsets = wn.synsets(lemma, pos=wn_tag)
                if not synsets:
                    continue

                # Take the first sense, the most common
                synset = synsets[0]
                swn_synset = swn.senti_synset(synset.name())
                sentiment += swn_synset.pos_score() - swn_synset.neg_score()
 
    return sentiment
Exemplo n.º 17
0
	def makesense(self,word):
		if self.cache.get(word,-1)==-1:
			x1=wn.synsets(word)			#finding the similar wordnet synsets
			lenx=len(x1)
			pos1=0
			neg1=0
			sc=0
			res=[]
			
			if(lenx>5):
				lenx=5
			for i in range(lenx):		#loop to add the positive and negative scores for each synset
				x2=x1[i].name()		
				y1=swn.senti_synset(x2)
				try:
					pos2=y1.pos_score()
					neg2=y1.neg_score()
					sc=sc+1				
					pos1=pos1+pos2
					neg1=neg1+neg2

				except AttributeError:
					continue;
			if (sc!=0):
				positive=pos1/sc
				negative=neg1/sc
			else:	
				positive=0
				negative=0
			res.append(positive)
			res.append(negative)
			mytup=tuple(res)
			return mytup
		else:
			return self.cache[word]
Exemplo n.º 18
0
def analyze_headline(doc):

    prominence = 0
    for name in doc.ents:
        if name.label_ in ('PERSON', 'ORG'):
            # print('prominence:', name.text)
            prominence += 1

    def match_synset(pos, lemma, synset):
        if SYNSET_POS[pos] != synset.pos():
            return False
        if lemma not in set(synset.lemma_names()):
            return False
        return True

    def average_score(scores):
        n = len(scores)
        return (
            sum(s[0] for s in scores) / n,
            sum(s[1] for s in scores) / n,
            sum(s[2] for s in scores) / n,
        )

    sentiments = []
    for token in doc:
        if token.pos_ not in ('NOUN', 'VERB', 'ADJ', 'ADV'):
            continue
        synsets = token._.wordnet.synsets()
        synsets = [
            s for s in synsets if match_synset(token.pos_, token.lemma_, s)
        ]
        sentis = [sentiwordnet.senti_synset(s.name()) for s in synsets]
        scores = [(s.pos_score(), s.obj_score(), s.neg_score())
                  for s in sentis]
        if scores:
            sentiments.append(average_score(scores))

    superlativeness = 0
    for token in doc:
        if token.pos_ == 'ADJ':
            if token.tag_ in ('JJR', 'JJS'):
                # print('superlativeness:', token.text)
                superlativeness += 1
        if token.pos_ == 'ADV':
            if token.tag_ in ('RBR', 'RBS'):
                # print('superlativeness:', token.text)
                superlativeness += 1

    def aggregate_sentiments(sentiments):
        result = 0
        for pos, obj, neg in sentiments:
            # a) pos + obj + neg == 1
            # b) we are interested only in degree in of emotion
            # c) more emotional words => more emotional degree => let it be additive
            result += pos + neg
        return result

    sentiment = aggregate_sentiments(sentiments)

    return (prominence, sentiment, superlativeness)
Exemplo n.º 19
0
def swn_process_tweet(tweet):
    tokens = word_tokenize(tweet)
    # tag words with noun, adj, adv, or verb
    tagged_words = pos_tag(tokens)
    score = 0.0

    for word, tag in tagged_words:
        # convert tag to sentiwordnet
        swn_tag = convert_tag(tag)
        if swn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            return 0.0

        lemma = lemmatizer.lemmatize(word, pos=swn_tag)
        if not lemma:
            return 0.0

        synsets = wn.synsets(lemma, pos=swn_tag)
        if not synsets:
            return 0.0

        # use the first, most common sense
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        # calculate the score
        score += swn_synset.pos_score() - swn_synset.neg_score()

        return score
Exemplo n.º 20
0
def swn_polarity(text):
    """
    Return a sentiment polarity: 0 = negative, 1 = positive
    """
    sentiment = 0.0
    tokens_count = 0
    raw_sentences = sent_tokenize(text)
    for raw_sentence in raw_sentences:
        tagged_sentence = pos_tag(word_tokenize(raw_sentence))
        for word, tag in tagged_sentence:
            wn_tag = penn_to_wn(tag)
            if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
                continue
            lemma = lemmatizer.lemmatize(word, pos=wn_tag)
            #print('lemma: '+lemma)
            if not lemma:
                continue
            synsets = wn.synsets(lemma, pos=wn_tag)
            #print('synsets: '+str(synsets))
            if not synsets:
                continue
            # Take the first sense, the most common
            synset = synsets[0]
            swn_synset = swn.senti_synset(synset.name())
            sentiment += swn_synset.pos_score() - swn_synset.neg_score()
            tokens_count += 1
    # judgment call ? Default to positive or negative
    if not tokens_count:
        return 0
    # sum greater than 0 => positive sentiment
    return sentiment
Exemplo n.º 21
0
def is_sentiment(tokens):
    """
    Check if the headline is sentiment
    Access to sentiWord using nltk
    :param tokens: Spacy Doc object: list of tokens
    :return: flag if header is sentiment
    """
    for token in tokens:
        token_pos = token.pos_
        if token_pos in SENTI_WORD_POS:
            # prepare string to nltk format (e.g 'word.a.01')
            word_pos = '{}.{}.0'.format(token.text, SENTI_WORD_POS[token_pos])
            sent_scores = []
            for i in range(5):
                # get the scores of first 5 values if exists
                try:
                    breakdown = swn.senti_synset('{}{}'.format(
                        word_pos, i + 1))
                    # nltk response: positive, negative, objective
                    # objective = 1 - (positive + negative)
                    # the word is sentiment if it is not objective
                    sent_scores.append(1.0 - breakdown.obj_score())
                except WordNetError:
                    break
            if sent_scores:
                score = sum(sent_scores) / len(sent_scores)
                if score > 0.5:
                    return True
    return False
def swn_polarity(text):
    """
    Code inspiré d'un lien internet https://nlpforhackers.io/sentiment-analysis-intro/
    Calcul le nombre de mots positifs et négatif dans un text ainsi que le score de sentiment total du text
    """
    sentiment = 0.0
    nombre_mot_negatif = 0
    nombre_mot_positif = 0

    tokens = nltk.word_tokenize(text)
    words_tags = nltk.pos_tag(tokens)
    for word, tag in words_tags:
        wn_tag = penn_to_wn(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            continue

        synsets = wn.synsets(word, pos=wn_tag)
        if not synsets:
            continue

        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())

        sentiment += swn_synset.pos_score() - swn_synset.neg_score()
        if swn_synset.pos_score() - swn_synset.neg_score() >0:
            nombre_mot_positif += 1

        elif swn_synset.pos_score() - swn_synset.neg_score() <0:
            nombre_mot_negatif += 1

    return sentiment,nombre_mot_positif,nombre_mot_negatif
Exemplo n.º 23
0
def printSWNresult(synetName):
    swn_synset = swn.senti_synset(synetName)
    word_sent = swn_synset.pos_score() - swn_synset.neg_score()
    print("---SWN results----")
    print("Positive score = " + str(swn_synset.pos_score()))
    print("Negative score = " + str(swn_synset.neg_score()))
    print("Sentiment = " + str(word_sent))
Exemplo n.º 24
0
def get_scores(word , postag):
    count = 0
    pos_score = 0
    neg_score = 0
    obj_score = 0
    tag = postag_to_sentiwordnet_tag_restricted(postag)
    if tag is None:
        return -1,-1,-1
    try:
        for i in range(1,4):
            word_scores = swn.senti_synset(word+'.'+tag+'.0'+str(i))
            if word_scores is None:
                return -1,-1,-1
            # print word_scores
            # print word_scores.obj_score()
            pos_score += word_scores.pos_score()
            neg_score += word_scores.neg_score()
            obj_score += word_scores.obj_score()
            count += 1
    except WordNetError:
        print "oups"
    if count > 0:
        pos_score /= count
        neg_score /= count
        obj_score /= count
    return pos_score,neg_score,obj_score
def polarity_score(text_syn_set_list):
    limp =[]
    for s in text_syn_set_list:
        pos_score = swn.senti_synset(s.name()).pos_score()
        neg_score = swn.senti_synset(s.name()).neg_score()
        neut_score = swn.senti_synset(s.name()).obj_score()
        polarity_score = 0
        if pos_score > neg_score and pos_score > neut_score:
            polarity_score = 1
        elif neg_score > pos_score and neg_score > neut_score:
            polarity_score = -1
        else:
            polarity_score = 0
        limp.append(polarity_score)
    arr = np.array(limp)
    return round(arr.mean(),1)
    def filter_sentiment_words(self, data):
        '''
        Input:
        -----
        data: single document (string)

        Output:
        ------
        return string contain sentiment word seprated with space
        '''

        if isinstance(data, list) or isinstance(data, tuple):
            raise CustomError('Must be string')
        collect = []
        for sentence in nt.sent_tokenize(data):
            for word_tag in nt.pos_tag(nt.word_tokenize(sentence)):
                word, tag = word_tag
                if tag in POS_LIST.keys() and (word not in self.stopwords):
                    if word in self.senti_m_words:
                        collect.append(self._stemming(word))
                    else:
                        sen_sets = wn.synsets(word, pos=POS_LIST.get(tag))
                        if sen_sets:
                            a = swn.senti_synset(sen_sets[0].name())
                            if a:
                                if a.obj_score() <= 0.7:
                                    collect.append(self._stemming(word))
        return ' '.join(list(set(collect)))
def makesense(word):
	x1=wn.synsets(word)			#finding the similar wordnet synsets
	lenx=len(x1)
	pos1=0
	neg1=0
	sc=0
	res=[]
	
	if(lenx>5):
		lenx=5
	for i in range(lenx):		#loop to add the positive and negative scores for each synset
		x2=x1[i].name()		
		y1=swn.senti_synset(x2)
		try:
			pos2=y1.pos_score()
			neg2=y1.neg_score()
			sc=sc+1			
			pos1=pos1+pos2
			neg1=neg1+neg2

		except AttributeError:
			continue;
	if (sc!=0):
		positive=pos1/sc
		negative=neg1/sc
	else:	
		positive=0
		negative=0
	res.append(positive)
	res.append(negative)
	mytup=tuple(res)
	return mytup
    def get_sentiment(word, tag):
        """
        From : https://stackoverflow.com/questions/38263039/sentiwordnet-scoring-with-python
        returns list of pos neg and objective score. But returns empty list if not present in senti wordnet.
        """
        wn_tag = penn_to_wn(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            return []
        # Lemmatization : Canonical lexical form (better -> good, walking -> walk, was -> be)
        lemmatizer = WordNetLemmatizer()
        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            return []
        synsets = wn.synsets(word, pos=wn_tag)
        if not synsets:
            return []
        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())

        return [
            swn_synset.pos_score(),
            swn_synset.neg_score(),
            swn_synset.obj_score()
        ]
def swn_polarity(sentence):

    sentiment = 0.0
    tokens_count = 0

    tagged_sentence = pos_tag(word_tokenize(sentence))
    for word, tag in tagged_sentence:
        # include all in denominator for comparison
        tokens_count += 1
        wn_tag = penn_to_wn(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            continue
        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            continue
        synsets = wn.synsets(lemma, pos=wn_tag)
        if not synsets:
            continue
        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        sentiment += swn_synset.pos_score() - swn_synset.neg_score()

    if tokens_count > 0:
        # average sentiment
        return sentiment / tokens_count
    else:
        return np.nan
def get_sentiwordnet(pos_text, pos):
    import nltk
    from nltk.corpus import wordnet as wn
    from nltk.corpus import sentiwordnet as swn
    feat = []
    for key, val in enumerate(pos):
        pos, neg, pos_neg, neg_neg, POS_pos, POS_neg = 0,0,0,0, {'A':0, 'V':0, 'R':0, 'N':0}, {'A':0, 'V':0, 'R':0, 'N':0}
        for key3, val3 in enumerate(val):
            if val3 in 'AVRN':
                text = pos_text[key][key3].strip("_NEG")
                synsets = wn.synsets('%s'%text.decode('utf-8'),val3.lower())
                try:
                    sense=synsets[0]
                except:
                    continue
                k = swn.senti_synset(sense.name())
                if k != None:
                    if pos_text[key][key3].endswith("_NEG"):
                        pos_neg += k.pos_score()
                        neg_neg += k.neg_score()
                        POS_neg[val3]+=1
                    else:
                        pos += k.pos_score()
                        neg += k.neg_score()
                        POS_pos[val3]+=1
        feat.append([pos, neg, pos_neg, neg_neg, pos+neg+pos_neg+neg_neg, sum(POS_pos.values())+sum(POS_neg.values())]+POS_pos.values()+POS_neg.values())
    return np.array(feat)
Exemplo n.º 31
0
def get_sentiment(word, tag):
    wn_tag = penn_to_wn(tag)

    if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
        return []

    # Lemmatization
    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    if not lemma:
        return []

    # Synset is a special kind of a simple interface that is present in NLTK to look up words in WordNet.
    # Synset instances are the groupings of synonymous words that express the same concept.
    # Some of the words have only one Synset and some have several.
    synsets = wn.synsets(word, pos=wn_tag)
    if not synsets:
        return []

    # Take the first sense, the most common
    synset = synsets[0]
    swn_synset = swn.senti_synset(synset.name())

    return [
        synset.name(),
        swn_synset.pos_score(),
        swn_synset.neg_score(),
        swn_synset.obj_score()
    ]

    pos = neg = obj = count = 0
Exemplo n.º 32
0
def sentence_score(text, threshold=0.75, wsd=word_sense_cdf):
    '''Classifies a phrase according to sentiment analysis based
    on WordNet and SentiWordNet. It also computes a thresholded 
    score by ignoring strongly objective words.'''
    tagged_words = pos_tag(text)

    obj_score = 0  # object score
    pos_score = 0  # positive score
    neg_score = 0  #negative score
    pos_score_thr = 0
    neg_score_thr = 0

    for word in tagged_words:
        #     print word
        if 'punct' not in word:
            sense = wsd(word['word'], text, wordnet_pos_code(word['pos']))
            if sense is not None:
                sent = swn.senti_synset(sense.name())
                if sent is not None and sent.obj_score() != 1:
                    obj_score = obj_score + float(sent.obj_score())
                    pos_score = pos_score + float(sent.pos_score())
                    neg_score = neg_score + float(sent.neg_score())
                    if sent.obj_score() < threshold:
                        pos_score_thr = pos_score_thr + float(sent.pos_score())
                        neg_score_thr = neg_score_thr + float(sent.neg_score())

    return (pos_score - neg_score, pos_score_thr - neg_score_thr)
Exemplo n.º 33
0
def get_sentiment(word, tag, mode):

    if mode == 1:
        wn_tag = penn_to_wn_adj(tag)
        if wn_tag not in (wn.ADJ, wn.ADV):
            return []
    else:
        wn_tag = penn_to_wn_noun(tag)
        if wn_tag not in (wn.ADJ, wn.ADV):
            return []

    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    if not lemma:
        return []

    synsets = wn.synsets(word, pos=wn_tag)
    if not synsets:
        return []

    synset = synsets[0]
    swn_synset = swn.senti_synset(synset.name())

    return [
        swn_synset.pos_score(),
        swn_synset.neg_score(),
        swn_synset.obj_score()
    ]
Exemplo n.º 34
0
def sentence_score(text, threshold = 0.75, wsd = word_sense_cdf):
    '''Classifies a phrase according to sentiment analysis based
    on WordNet and SentiWordNet. It also computes a thresholded 
    score by ignoring strongly objective words.'''
    tagged_words = pos_tag(text)

    obj_score = 0 # object score 
    pos_score=0 # positive score
    neg_score=0 #negative score
    pos_score_thr=0
    neg_score_thr=0

    for word in tagged_words:
    #     print word
        if 'punct' not in word :
            sense = wsd(word['word'], text, wordnet_pos_code(word['pos']))
            if sense is not None:
                sent = swn.senti_synset(sense.name())
                if sent is not None and sent.obj_score() <> 1:
                    obj_score = obj_score + float(sent.obj_score())
                    pos_score = pos_score + float(sent.pos_score())
                    neg_score = neg_score + float(sent.neg_score())
                    if sent.obj_score() < threshold:
                        pos_score_thr = pos_score_thr + float(sent.pos_score())
                        neg_score_thr = neg_score_thr + float(sent.neg_score())

    return (pos_score - neg_score, pos_score_thr - neg_score_thr)
Exemplo n.º 35
0
def swn_polaridade(text):
    text = str(text).encode().decode()
    sentiment = 0.0
    tokens_c = 0.0
    text = clean_text(text)
    raw_texts = sent_tokenize(text)
    for raw_text in raw_texts:
        tag_text = pos_tag(word_tokenize(raw_text))
        # print(tag_text)
        for word, tag in tag_text:
            wn_tag = swn_pos(tag)
            if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
                continue
            lemma = lemmatizer.lemmatize(word, pos=wn_tag)
            if not lemma:
                continue
            synsets = wn.synsets(lemma, pos=wn_tag)
            if not synsets:
                continue
            synset = synsets[0]
            swn_synset = swn.senti_synset(synset.name())
            sentiment += swn_synset.pos_score() - swn_synset.neg_score()
            tokens_c += 1
        # print(sentiment)
        if not tokens_c:
            return 0
        if sentiment > 0:
            return 'Positivo, Score: ', sentiment
        if sentiment < 0:
            return 'Negativo, Score: ', sentiment
        if sentiment == 0:
            return 'Neutro, Score: ', sentiment
Exemplo n.º 36
0
    def swn_polarity(self, text):
        sentiment = 0.0
        tokens = 0

        tagged_sentence = pos_tag(word_tokenize(text))

        for word, tag in tagged_sentence:
            wn_tag = self.penn_to_wn(tag)

            if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
                continue

            lemma = self.lemmatizer.lemmatize(word, pos=wn_tag)
            if not lemma:
                continue
            
            synsets = wn.synsets(lemma, pos=wn_tag)
            if not synsets:
                continue
            
            synset = synsets[0]
            swm_synset = swn.senti_synset(synset.name())

            sentiment += swm_synset.pos_score() - swm_synset.neg_score()
            tokens += 1
        
        if not tokens:
            return 0
        
        if sentiment >= 0:
            return 1

        return 0
Exemplo n.º 37
0
def get_polarity_type(synset_name):
    swn_synset =  swn.senti_synset(synset_name)
    if not swn_synset:
        return None
    elif swn_synset.pos_score() > swn_synset.neg_score() and swn_synset.pos_score() > swn_synset.obj_score():
        return 1
    elif swn_synset.neg_score() > swn_synset.pos_score() and swn_synset.neg_score() > swn_synset.obj_score():
        return -1
    else:
        return 0
Exemplo n.º 38
0
    def run_nltk_guess(self, text):
        tokens = tokenize(text)
        doc_counts = {}
        doc_inverse_counts = {}
        scores = {}

        for word in tokens:
            word_swn = swn.senti_synset(word)
            word_prob = self.stem_label_count(label, word) / doc_counts[label]
            print word_prob, word_swn
Exemplo n.º 39
0
def sentimentClassify(message):
	message = cleanMessage(message) # Clean.

	# Tokenize. Get meanings.
	tokens = nltk.word_tokenize(message)
	meanings = [nltk.wsd.lesk(tokens,t) for t in tokens]

	# Aggregate score containers.
	aggScorePos = 0
	aggScoreNeg = 0
	aggScoreObj = 0

	# Container for scoring information.
	scoreInfo = { 'tokens': [] }

	# Score.
	for t, m in zip(tokens,meanings):
		# Only process words for which we have meanings.
		if m is None:
			continue
		# Score.
		swnEntry = swn.senti_synset(m.name())
		if swnEntry is None:
			continue
		scoreInfo['tokens'].append((
			t, m.name(),
			swnEntry.pos_score(),								# Positivity
			swnEntry.neg_score(),								# Negativity
			1 - (swnEntry.pos_score() + swnEntry.neg_score())	# Objectivity
		))
		# Aggregates.
		aggScorePos += swnEntry.pos_score()
		aggScoreNeg += swnEntry.neg_score()
		aggScoreObj += 1 - (swnEntry.pos_score() + swnEntry.neg_score())

	# Calculate means.
	scoreInfo['scorePos'] = aggScorePos / max(1,len(scoreInfo['tokens']))
	scoreInfo['scoreNeg'] = aggScoreNeg / max(1,len(scoreInfo['tokens']))
	scoreInfo['scoreObj'] = aggScoreObj / max(1,len(scoreInfo['tokens'])) if len(scoreInfo['tokens']) else 1

	# Perform final classification.
	if (scoreInfo['scorePos'] > scoreInfo['scoreNeg']):
		scoreInfo['class'] = 'Positive'
	elif (scoreInfo['scorePos'] < scoreInfo['scoreNeg']):
		scoreInfo['class'] = 'Negative'
	else:
		scoreInfo['class'] = 'Neutral'

	return scoreInfo, len(tokens)
Exemplo n.º 40
0
def define_synset(nick,word,synset,target,lang=None,sentiment=False):
		if sentiment:
			sent=swn.senti_synset(synset.name())
			mb.tell("positive: "+str(sent.pos_score())+" negative: "+str(sent.neg_score())+" objective: "+str(sent.obj_score()),target)
		else:
			if lang:
				words=", ".join(synset.lemma_names(lang))
				if words!="":
					mb.tell(nick+', '+word+" in "+lang.upper()+": "+words,target)
				else:
					mb.tell(nick+": no idea",target)
			else:
				mb.tell(nick+', '+word+": "+synset.definition(),target)
		
		return
Exemplo n.º 41
0
    def getWordSentimentTuple(self, word, pos, wordlist):
        if wordlist != "sentiwordnet":
            raise InvalidDictionaryException("Invalid dictionary " + wordlist + \
                    "please use sentiwordnet")
        else:
            simplePOS = convertPOSTagToSimplePOS(pos)
            if pos:
                wordSense = lesk(self.tokens, word, simplePOS)
                if wordSense: 
                   sentiSynsetWord = swn.senti_synset(wordSense.name())
                   if sentiSynsetWord:
                       return (sentiSynsetWord.pos_score(), 
                               sentiSynsetWord.neg_score(),
                               sentiSynsetWord.obj_score())

        return (0, 0, 0)
    def get_scores_from_senti_synset(self, string_name_of_synset, return_format=tuple):
        """
        Args:
            string_name_of_synset: The string name of the synset that want scores for
            return_format: What kind of object to return. Allowed values are tuple, dict
        Returns:
            On default of tuple returns (positiveScore, negativeScore, objScore)
        """
        breakdown = swn.senti_synset(string_name_of_synset)

        if return_format is tuple:
            return (breakdown.pos_score(), breakdown.neg_score(), breakdown.obj_score())
        elif return_format is dict:
            return {
                'posScore': breakdown.pos_score(),
                'negScore': breakdown.neg_score(),
                'objScore': breakdown.obj_score()
                }
def proccess_sentence(sentencedata,chapter_register):
    wordngrams=chapter_register["word_ngrams"]
    for n in wordngrams:
        if len(sentencedata.words)>n:
            ngrams(wordngrams[n],n,sentencedata.words)
    for x in sentencedata.words:
        words=chapter_register["word_freq_dict"]
        words.plusplus(x,1)
        try:
            sets=wn.synsets(x)
            if len(sets)<=0:
                break
            chapter_register["validwords"]=chapter_register["validwords"]+1
            usage=chapter_register["usage_freq_dict"]
            usages = [inner for outer in sets for inner in outer.usage_domains()]
            for use in usages:
                usage.plusplus(use._name,float(1)/len(usages))
            topic=chapter_register["topic_freq_dict"]
            topics = [inner for outer in sets for inner in outer.topic_domains()]
            for top in topics:
                topic.plusplus(top._name,float(1)/len(topics))
            region=chapter_register["region_freq_dict"]
            regions = [inner for outer in sets for inner in outer.region_domains()]
            for reg in regions:
                region.plusplus(reg._name,float(1)/len(regions))
            sense=chapter_register["sense_dist_dict"]
            sentis = [swn.senti_synset(synset._name) for synset in sets]
            if None in sentis:
                continue
            for sen in sentis:
                sense.plusplus("pos",float(sen.pos_score())/len(sentis))
                sense.plusplus("neg",float(sen.neg_score())/len(sentis))
                sense.plusplus("obj",float(sen.obj_score())/len(sentis))
            sense.plusplus("total",1)
            charngrams=chapter_register["character_ngrams"]
            for n in character_ngrams:
                if len(x)>n:
                    ngrams(charngrams[n],n,"^"+x+"*")
        except BaseException as e:
            print(e)
            print("problem finding word:"+x)
Exemplo n.º 44
0
forms = ['.a.01','.a.02']

#initiliaze subject_words with subjective words from vocab list
for w in words:
	lemmatizer = WordNetLemmatizer()
	try:
		root = lemmatizer.lemmatize(w)
	except:
		pass
	if isinstance(root, unicode):
		root = unicodedata.normalize('NFKD', root).encode('ascii','ignore')
	root = root.strip()	
	for f in forms:
		j = root + f
		try:
			if (swn.senti_synset(j).obj_score() < 0.5):
				subject_words.append(root)
				break
			else:
				pass
		except:
			pass

out = open('sword.set', 'w')
dump(subject_words, out)
out.close()
print len(subject_words)


	
Exemplo n.º 45
0
def index_sentiwordnet(request):
    res = HttpResponse(content_type='text/csv')
    res['Content-Disposition'] = 'attachment; filename=listado.csv'
    writer = csv.writer(res)
    #writer.writerow(['id','Tweets','Usuario','Numero Favoritos','Numero Retweets','Nombre'])
    # 'POS','ROOT','Positivity score','Negativity score','Objectivity score'
    writer.writerow(['Tweet Original','Tweet Traducido','POS','ROOT','Positivity Score','Negativity Score','Objectivity Score'])

    if request.POST and request.FILES:
        #http://www.thuydienthacba.com/questions/4576059/getting-type-error-while-opening-an-uploaded-csv-file
        csvfile = request.FILES['csv_file'].open()  # http://stackoverflow.com/questions/10617286/getting-type-error-while-opening-an-uploaded-csv-file
        #portfolio = csv.DictReader(paramFile)
        portfolio = csv.DictReader(request.FILES['csv_file'].file)

        #print(gs.translate('hello world', 'de'))
        for i in portfolio:
            twett = ""
            twett= twett.join(i['Tweets']).decode('utf8')
            #translation = translator.translate(twett)
            print twett

            b = TextBlob(twett)
            traduccion = ""
            traduccion = traduccion.join(b.translate(to="en"))
            tokens = nltk.word_tokenize(traduccion)
            print traduccion
            print type(traduccion)
            tagged = nltk.pos_tag(tokens)
            #print tagged

            stemmer = SnowballStemmer("english")
            # Tokenizar
            tknzr = TweetTokenizer()
            text_token = tknzr.tokenize(traduccion)
            text_token2 = []

            # Raiz de cada palabra en text_token2
            for i in text_token:
                aux = stemmer.stem(i)
                text_token2.append(aux)

            print text_token2
            print tokens

            cont = 0
            pos_score = 0
            neg_score = 0
            obj_score = 0
            for i in text_token2:
                # si la raiz existe en el diccionario
                n = (lesk(text_token2, i, 'n'))
                if n:
                    x = n.name()
                    #print wn.synset(x).definition()
                    breakdown = swn.senti_synset(x)
                    pos_score = pos_score + breakdown.pos_score()
                    neg_score = neg_score + breakdown.neg_score()
                    obj_score = obj_score + breakdown.obj_score()
                    cont = cont+1
                elif n==None:
                    # Buscanos la palabra original en el diccionario
                    try:
                        n = (lesk(text_token2, text_token[cont], 'n'))
                        x = n.name()
                        breakdown = swn.senti_synset(x)
                        pos_score = pos_score + breakdown.pos_score()
                        neg_score = neg_score + breakdown.neg_score()
                        obj_score = obj_score + breakdown.obj_score()

                        cont = cont + 1
                    except AttributeError:
                        cont = cont + 1
                else:
                    # La palabara no existe en el diccionario
                    cont = cont + 1

            print "La positividad es: %f" %pos_score
            print "La negatividad es: %f" %neg_score
            print "La objetividad es: %f" %obj_score
            # ,json.dumps(tagged),json.dumps(text_token2),pos_score,neg_score,obj_score
            writer.writerow([twett.encode('utf-8'),traduccion.encode('utf-8'), json.dumps(tagged),json.dumps(text_token2),pos_score,neg_score,obj_score])
        return res

    return render(request, "index2.html", locals())
Exemplo n.º 46
0
def sentimentClassify(message):
    message = cleanMessage(message)  # Clean.

    # Tokenize. Get meanings.
    sentences = nltk.sent_tokenize(message)
    sentenceTokens = [nltk.word_tokenize(s) for s in sentences]
    posTags = [nltk.pos_tag(st) for st in sentenceTokens]

    # Translate to Wordnet POS tags.
    wnPosTags = []
    for s in posTags:
        wnPosTags.append([(token, posTagToWn(pos)) for (token, pos) in s if posTagToWn(pos) is not None])

        # Get sense. Use lesk() to start; if failed, just take first sense.

    def wsd(tokens, token, pos):
        leskWsd = nltk.wsd.lesk(tokens, token, pos)
        if leskWsd:
            return leskWsd
        defaultWsd = wn.synsets(token, pos)
        if defaultWsd:
            return defaultWsd[0]
        return None

        # This is a big design decision. Do we collapse the sentences into one long set of tokens for WSD?
        # It might be good to do so, as each set of text is about a focused topic and we can understand more by
        # considering it as a whole.
        # On the other hand, it might cause confusion among some words.
        # Decision: combine all text here. If we wanted to classify separately, the following code would have worked:
        # senses = []
        # for s in wnPosTags:
        # 	sentenceSenses = [(token, wsd(tokens,token,pos)) for (token,pos) in s]
        # 	sentenceSenses = [t for t in sentenceSenses if t[1] is not None]
        # 	senses.append(sentenceSenses)

        # Collapse list.

    wnPosTagsFlat = [item for sublist in wnPosTags for item in sublist]
    allTokens = [token for (token, _) in wnPosTagsFlat]

    # Calculate senses.
    senses = [(token, wsd(allTokens, token, pos)) for (token, pos) in wnPosTagsFlat]
    senses = [s for s in senses if s[1] is not None]

    # Aggregate score containers.
    aggScorePos = 0
    aggScoreNeg = 0

    # Score containers for "significant" (obj != 1) tokens.
    significantTokens = 0
    aggSigScorePos = 0
    aggSigScoreNeg = 0

    # Container for scoring information.
    scoreInfo = {"tokens": []}

    # Score.
    for (token, sense) in senses:
        # Score.
        swnEntry = swn.senti_synset(sense.name())
        if swnEntry is None:
            continue
        scoreInfo["tokens"].append((token, sense.name(), swnEntry.pos_score(), swnEntry.neg_score()))
        # Aggregates.
        aggScorePos += swnEntry.pos_score()
        aggScoreNeg += swnEntry.neg_score()
        # Significants.
        if swnEntry.pos_score() > 0 or swnEntry.neg_score() > 0:
            significantTokens += 1
            aggSigScorePos += swnEntry.pos_score()
            aggSigScoreNeg += swnEntry.neg_score()

            # Calculate means.
            # Aggregates.
    scoreInfo["aggScorePos"] = aggScorePos
    scoreInfo["aggScoreNeg"] = aggScoreNeg
    # Means.
    scoreInfo["meanScorePos"] = aggScorePos / max(1, len(scoreInfo["tokens"]))
    scoreInfo["meanScoreNeg"] = aggScoreNeg / max(1, len(scoreInfo["tokens"]))
    # Significants.
    scoreInfo["meanScorePosSig"] = aggSigScorePos / (significantTokens or 1)
    scoreInfo["meanScoreNegSig"] = aggSigScoreNeg / (significantTokens or 1)

    # Perform final classification.
    if scoreInfo["aggScorePos"] > scoreInfo["aggScoreNeg"]:
        scoreInfo["class"] = "Positive"
    elif scoreInfo["aggScorePos"] < scoreInfo["aggScoreNeg"]:
        scoreInfo["class"] = "Negative"
    else:
        scoreInfo["class"] = "Neutral"

    return scoreInfo, len(allTokens)
 def calculate_sentiment(self):
   sentence_positive = 0
   sentence_negative = 0
   sentence_objective = 0
   sentence_subjective = 0
   sentence_total = 0
   sentence_sentiments = []
   entity_sentiments = {}
   for entity in self.entities:
     entity_sentiments[entity] = 0
   #build sentiment score
   past_words = []
   article_sentences = textutility.sentence_tokenize(self.text)
   pos_tokens = textutility.pos_tag(article_sentences)
   for idx, sentence_token in enumerate(pos_tokens):
     sentence_words = textutility.get_lowercase(textutility.get_words(article_sentences[idx]))
     # The pattern.en sentiment() function returns a (polarity, subjectivity)-tuple for the given sentence, based on the adjectives it contains, where polarity is a value between -1.0 and +1.0 and subjectivity between 0.0 and 1.0.
     patternsent(article_sentences[idx])
     for token in sentence_token:
       word = token[0].strip()
       if word in self.english_stops:
         continue;
       sanitized = self.wordnet_sanitize(token[0], token[1])
       if(sanitized[1] is None):
         word_synset = wn.synsets(sanitized[0])
       else:
         word_synset = wn.synsets(sanitized[0], sanitized[1])
       if word_synset:
         word_synset = word_synset[0]
         sentiment_synset = swn.senti_synset(word_synset.name())
         if sentiment_synset:
           sentence_total = ((sentiment_synset.pos_score() - sentiment_synset.neg_score()) * (1 - sentiment_synset.obj_score())) #weight subjective words
           if(True in past_words):
             sentence_positive += sentiment_synset.neg_score()
             sentence_negative += sentiment_synset.pos_score()
             sentence_total = sentence_total*-1
           else:
             sentence_positive += sentiment_synset.pos_score()
             sentence_negative += sentiment_synset.neg_score()
           sentence_objective += sentiment_synset.obj_score()
           sentence_subjective += (1 - sentiment_synset.obj_score())
       negation = False    
       if(word in self.negation_words or (len(word) > 2 and word[-3:] == "n't")):
         negation = True
       past_words.append(negation)
       if(len(past_words) > 3):
         past_words.pop(0)
     self.total_scores['positive'] += sentence_positive
     self.total_scores['negative'] += sentence_negative
     self.total_scores['objective'] += sentence_objective
     self.total_scores['subjective'] += sentence_subjective
     self.total_scores['aggregate'] += sentence_total
     sentence_sentiments.append(sentence_total)
     for entity in self.entities:
       if entity in sentence_words:
         entity_sentiments[entity] += sentence_total
     sentence_positive = 0
     sentence_negative = 0
     sentence_objective = 0
     sentence_subjective = 0
     sentence_total = 0
   self.total_scores['by_sentences'] = sentence_sentiments
Exemplo n.º 48
0
def swn_word_score(tup: "tuple or list", special={}, recur=1, count_nouns=True):
    """attempt to find the sentiment value of the given wordnet formatted word
    Sometimes will have to consult synonyms of word

    Parameters
    ----------    
    tup: indexable iterable of length 3
        format: (word, part-of-speech, version)
    
    DEFAULTS:
        special:dict special lexicon dictionary that overrides swn's dictionary
        recur:int how many synonyms are we willing to look at
        count_nouns=true: determines whether or not to include nouns in the scoring
    
    Returns
    -------
    tuple(int, int): 
        (positive_score, negative_score) 
        Will return (None, None) if no scores found
    """
    # type checking
    if type(recur) != int:
        raise TypeError("Expected 'recur' to be of type int but was of type {}", type(recur))
    if recur < 0:
        raise ValueError("'recur' must be >= 0")

    if not count_nouns and tup[1] == "n":
        return (0.0, 0.0)

    # first checks if word is in specialized lexicon
    if tup[0] in special:
        return special[tup[0]]

    # second: looks if have a sentiment for word in sentiwordnet
    wordnet_pos = "nvasr"  # all wordnet pos labels
    ans_obj = None  # senti object with pos and neg scores
    current_pos = tup[1]  # part of speech (used to change weights)
    try:  # see if given tuple as sentiscore
        ans_obj = swn.senti_synset("{}.{}.{}".format(tup[0], tup[1], tup[2]))
    except:
        pass
    if ans_obj is None:  # if not has sentiscore do following process
        senti_list = None
        if tup[1] in wordnet_pos:  # if valid part-of-speech
            senti_list = list(swn.senti_synsets(tup[0], tup[1]))
        if senti_list is None or len(senti_list) == 0:  # if part-of-speech not help
            senti_list = list(swn.senti_synsets(tup[0]))
        if len(senti_list) != 0:
            index = int(tup[2])  # version is the index of the list version of synsets
            ans_obj = senti_list[index] if len(senti_list) > index else senti_list[0]
            current_pos = ans_obj.synset.name()[-4]

    # if not have a sentiment for word, try to find a synonym to score
    if ans_obj is None:
        temp_list = wn.synsets(tup[0])
        if len(temp_list) != 0:
            wn_tup = temp_list[0].name().split(".")
            if wn_tup[0] != tup[0] and recur != 0:
                return swn_word_score(wn_tup, special=special, recur=recur - 1)
    # if all fails, return None, None
    if ans_obj is None:
        return None, None
    if current_pos == "n":
        return ans_obj.pos_score() * gv.noun_weight, ans_obj.neg_score() * gv.noun_weight

    return ans_obj.pos_score(), ans_obj.neg_score()
Exemplo n.º 49
0
def sentiScores(word):      #takes input as string, return bith +&- values
    word = swn.senti_synset(word)[0]
    values = []
    values.append(word.pos_score())      #positive value
    values.append(word.neg_score())     #negative value
    return values
Exemplo n.º 50
0
print "transitivity"
for transitivity_type in transitivity_vector:
    dtm = base.cbind(dtm,transitivitytype=transitivity_type)
dtm_transitivity = base.cbind(dtm,class_label=problem_class_labels)
waikatoWriteArff(base.data_frame(dtm_transitivity),file="problem_transitivity.arff",class_col="class_label")

print "polarity"
polarityInfo = []
problem_none=0
non_problem_none=0
for i,sentence in enumerate(problem_strings+non_problem_strings):
    heads = problem_heads+non_problem_heads
    head,pos = heads[i]
    synset = lesk(sentence.split(),head,penn2morphy(pos))
    if synset:
        scores = swn.senti_synset(synset.name())
        if scores !=None:
            polarityInfo.append([scores.pos_score(),scores.neg_score()])
        else:
            if i<500: problem_none+=1
            elif i>=500: non_problem_none+=1
            polarityInfo.append([0,0])
    else:
        if i<500: problem_none+=1
        elif i>=500: non_problem_none+=1
        polarityInfo.append([0,0])
positive = robjects.Vector([p for p,n in polarityInfo])
negative = robjects.Vector([n for p,n in polarityInfo])
dtm = base.cbind(dtm,positive_sentiment=positive)
dtm = base.cbind(dtm,negative_sentiment=negative)
print "polarity stuff:",problem_none,non_problem_none
Exemplo n.º 51
0
    obj_score = 0 # object score 
    pos_score=0 # positive score
    neg_score=0 #negative score
    for word in wordnet_definitions(tag_tweet(line)):
 #   if 'punct' not in word :
        #print word
        #sense = word_sense_disambiguate(word['word'], wordnet_pos_code(word['pos']), review)
        #lesk(sent, word, pos))
        sense = lesk (line,word['word'], wordnet_pos_code(word['pos']))
        #print "1",sense
        if sense is None:
            sense = lesk (line,word['word'])
            #print "2",sense
        if sense is not None:
            #sent = sentiment.senti_synset(sense.name)
            sent = swn.senti_synset(sense.name())
            #print "3" , sent
        
            # Extraction of the scores
            if sent is not None and sent.obj_score() <> 1:
                obj_score = obj_score + float(sent.obj_score())
                pos_score = pos_score + float(sent.pos_score())
                neg_score = neg_score + float(sent.neg_score())
                count=count+1
                #print "1", str(sent.pos_score())+ " - "+str(sent.neg_score())+ " - "+ str(sent.obj_score())+" - "+sent.synset.name()
                if sent.obj_score() < threshold:
                    pos_score_tre = pos_score_tre + float(sent.pos_score())
                    neg_score_tre = neg_score_tre + float(sent.neg_score())
                    count_tre=count_tre+1