def getSentics(self, word):
        senticsAndItensity = []
        sn = SenticNet('en')
        try:
            sentics = sn.sentics(word)
            polarity_intensity = sn.polarity_intense(word)
            # print(sentics)
            # print(sentics['pleasantness'])
            # print(sentics['attention'])
            # print(sentics['sensitivity'])
            # print(sentics['aptitude'])
            # print(polarity_intensity)

            senticsAndItensity.append(float(sentics['pleasantness']))
            senticsAndItensity.append(float(sentics['attention']))
            senticsAndItensity.append(float(sentics['sensitivity']))
            senticsAndItensity.append(float(sentics['aptitude']))
            senticsAndItensity.append(float(polarity_intensity))

            return senticsAndItensity

        except Exception as e:
            defaultsentics = [0.0, 0.0, 0.0, 0.0, 0.0]
            return defaultsentics


# ##TESTING AREA
# yas = SenticValuer()
# print(yas.getSentics("awkward"))
def senticnet(text):
    """
    Returns a list obtained from SenticNet with the following four features normalized: [pleasantness_value, attention_value, sensiivity_value, aptitude_value]

    :param text: input text pre-processed by Spacy
    :return: a list with the SenticNet features averaged for all the words in text
    """
    list_features = [0] * 4
    sn = SenticNet()
    count_words = 0

    for token in text:
        try:
            concept_info = sn.concept(token)
            list_features[0] += float(concept_info['sentics']['pleasantness'])
            list_features[1] += float(concept_info['sentics']['attention'])
            list_features[2] += float(concept_info['sentics']['sensitivity'])
            list_features[3] += float(concept_info['sentics']['aptitude'])
            count_words += 1
        except KeyError:
            pass

    if count_words != 0:
        list_features = [feature / count_words for feature in list_features]

    return list_features
def getMaxSum_senti(text):



    wnl = WordNetLemmatizer()
    sn = SenticNet()
    sentences = nltk.sent_tokenize(text)

    text_sentiAvg = 0
    sentence_maxSenti = 0

    for index in range(len(sentences)):
        sentence = sentences[index].strip()
        sentence = sentence[0:-1]

        assert '.' not in sentence
        words = nltk.word_tokenize(sentence.lower())
        pos_tags = nltk.pos_tag(words)
        sentence_sentiSum = getSentenceSentiSum(pos_tags, wnl, sn)
        # print sentence_sentiSum,

        if sentence_sentiSum > sentence_maxSenti:
            sentence_maxSenti = sentence_sentiSum

        text_sentiAvg += sentence_sentiSum

    text_sentiAvg = text_sentiAvg / len(sentences)
    text_sentiAvg = round(text_sentiAvg, 6)
    sentence_maxSenti = round(sentence_maxSenti, 6)

    return text_sentiAvg, sentence_maxSenti
示例#4
0
def doc_sentiment(data):
    """
    """
    #Call SenticNet module
    sn = SenticNet()
    
    #Create positive and negative variables
    total_sentiment = 0
    
    #Calculate sentiment for all words in document
    for i in range(len(data)):
        #If words don't exist in SenticNet vocabulary they will return an error
        #We treat these words as if they have a sentiment of 0
        try:
            #Calculate sentiment of word
            sentiment = sn.polarity_value(data[i])
            #Update total sentiment
            total_sentiment += float(sentiment)
            
        except:
            None
    
    try:
        #If total sentiment = 0 division errors will occur
        #Calculate average sentiment for the document
        avg_sentiment = total_sentiment/len(data)
    except:
        avg_sentiment = 0
        
    if avg_sentiment >= 0:
        output = 1
    else:
        output = 0
    
    return output
def sem(d):
    try:
        sn = SenticNet()
        sn.semantics(d)
        return True
    except KeyError:
        return False
示例#6
0
def Terms_Chooser(data, n_of_words, polarity_threshold):
    sn = SenticNet()

    #choosing words
    data["Content"] = data["Content"].apply(lambda row: nltk.word_tokenize(row))

    lista = np.array(data["Content"].values.tolist())
    lista = list(itertools.chain.from_iterable(lista))

    FD = FreqDist(lista)
    MC = FD.most_common(n_of_words)

    common_words = []
    for i in range (0,n_of_words):
        common_words.append(MC[i][0])

    polarity = list()
    words = list()

    for x in common_words:
        try:
            temp = sn.polarity_intense(x)
            if (float(temp) > polarity_threshold or float(temp) < -(polarity_threshold)):
                polarity.append(temp)
                words.append(x)
        except Exception:
            continue

    return words
示例#7
0
def fun1(d):
    try:
        from senticnet.senticnet import SenticNet
        sn = SenticNet()
        sn.semantics(d)
        return True
    except KeyError as error:
        return False
def get_emotions(tokens):
    from senticnet.senticnet import SenticNet
    result = {}
    sn = SenticNet()
    for token in tokens:
        moodtags = ""

        if token in sn.data:
            moodtags = sn.moodtags(token)
            print(token, moodtags)
    #TODO
    return result
 def __init__(self):
     print("Start SenticNet - Sentiment Analysis")
     self.sp = Support()
     self.sn = SenticNet()
     self.corpus = self.sp.import_corpus_bank()
     self.terminology = self.sp.import_bank_terminology(filename='bank_terminology')
     self.data, self.label = self.sp.process_data(filename='bank_message',
                                             size_msg=3,
                                             clean=True,
                                             replace_text=True,
                                             stemmed=None,
                                             lemmatize=None,
                                             spelling=None)
 def __init__(self, *args, **kwargs):
     # loading necessaries
     self.nlp = spacy.load("en_core_web_sm")
     self.senti = SenticNet()
     self.sid = SentimentIntensityAnalyzer()
     #loading dataset
     self.df = pd.read_json("./Sarcasm_Headlines_Dataset.json", lines=True)
     self.df = self.df[:15000]
     self.df.drop(columns="article_link",
                  inplace=True)  #dropping unnessary attribute
     #storing nlp data in headlines variable
     self.headlines = []
     self.uni_gram = set()
     self.uni_feature = []
     self.y_ = []
     for i in self.df['headline']:
         self.headlines.append(self.nlp(i))
示例#11
0
    def sentiment_avg(self, text):
        sn = SenticNet('pt')
        list_polarity = []
        qtd_words = len(text)
        temp = text.split()
        avg_n = 0
        for i in range(len(temp)):
            try:
                polarity_value = sn.polarity_value(
                    self.treatment_string(temp[i]))
                list_polarity.append(polarity_value)
            except:
                qtd_words -= 1
                i += 1

        avg_n = self.avg(list_polarity, qtd_words)
        if avg_n > 0.003 or avg_n < -0.003:
            return True
        else:
            return False
示例#12
0
 def __init__(self, vocab):
     self.vocab = vocab
     self.X_width = len(vocab)
     self.sn = SenticNet()
def get_clues(text):
    text = text
    print("*--------(%s)-------------*" % (text))
    print(type(text))
    nlp = StanfordCoreNLP('http://localhost:9001')
    stop_words = set(stopwords.words('english'))
    '''
		Method to remove numbers appended at last
	'''
    dep_parse = nlp.annotate(text,
                             properties={
                                 'annotators': 'depparse',
                                 'outputFormat': 'json',
                                 'timeout': 10000,
                             })

    pos = nlp.annotate(text,
                       properties={
                           'annotators': 'lemma',
                           'outputFormat': 'json',
                           'timeout': 10000,
                       })

    sn = SenticNet()
    word_to_dep = [{} for i in range(len(dep_parse['sentences']))]
    word_to_par = [{} for i in range(len(dep_parse['sentences']))]
    word_to_pos = [{} for i in range(len(dep_parse['sentences']))]
    word_to_lemma = [{} for i in range(len(dep_parse['sentences']))]
    word_to_child = [{} for i in range(len(dep_parse['sentences']))]
    sents = [[] for i in range(len(dep_parse['sentences']))]
    index_to_word = {}
    '''
		Constructing dicts for maintaining the dependencies among words. 
	'''
    '''
		Appending each word by occurence number to maintain distinct word count
	'''
    #print(dep_parse['sentences'])
    print("********")
    for i, sent in enumerate(dep_parse['sentences']):
        for dep in sent['basicDependencies']:
            word_to_dep[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['dep']
            word_to_par[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['governorGloss'] + str(
                               dep['governor'])
            index_to_word[dep['dependentGloss'] +
                          str(dep['dependent'])] = dep['dependentGloss']

            if (dep['governorGloss'] + str(dep['governor'])
                    not in word_to_child[i]):
                word_to_child[i][dep['governorGloss'] +
                                 str(dep['governor'])] = []
            if (dep['dependentGloss'] + str(dep['dependent'])
                    not in word_to_child[i]):
                word_to_child[i][dep['dependentGloss'] +
                                 str(dep['dependent'])] = []
            word_to_child[i][dep['governorGloss'] +
                             str(dep['governor'])].append(
                                 dep['dependentGloss'] + str(dep['dependent']))
            sents[i].append(dep['dependentGloss'] + str(dep['dependent']))
        word_to_dep[i]['ROOT0'] = 'root'
        word_to_par[i]['ROOT0'] = 'root'

    for i, sent in enumerate(pos['sentences']):
        for pos_tagger in sent['tokens']:
            word_to_pos[i][pos_tagger['word']] = pos_tagger['pos']
            word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma']
        word_to_pos[i]['ROOT'] = 'root'
        word_to_lemma[i]['ROOT'] = 'root'
    '''
		Displaying the deps
	'''

    ##Implemeting rules to extract aspects
    for i, sent in enumerate(sents):
        if (__name__ == '__main__'):
            print(word_to_dep[i], word_to_par[i], word_to_pos[i])
            print("Children==>")
            print(word_to_child[i])

    aspects = []
    for i, sent in enumerate(sents):
        for word in sent:
            '''
				Rule 0
			'''
            if ('subj' in word_to_dep[i][word]):
                for child in word_to_child[i][word_to_par[i][word]]:
                    if ('amod' in word_to_dep[i][child]
                            or 'advmod' in word_to_dep[i][child]):
                        aspects.append(word_to_par[i][word])
                        if (__name__ == '__main__'):
                            print("Rule 0 triggered.")
            '''
				Rule 1 (without sub): Very big to hold.
			'''
            if (word_to_dep[i][word] == 'xcomp' and
                ('JJ' in word_to_pos[i][index_to_word[word_to_par[i][word]]] or
                 'RB' in word_to_pos[i][index_to_word[word_to_par[i][word]]])):
                if (__name__ == '__main__'):
                    print("Rule 1 triggered")
                aspects.append(word_to_par[i][word])
            '''
				Rule 2 (without subj): Not to mention the price of the phone
			'''
            if (word_to_dep[i][word] == 'dobj' and 'VB'
                    in word_to_pos[i][index_to_word[(word_to_par[i][word])]]
                    and ('NN' in word_to_pos[i][index_to_word[(word)]]
                         or 'JJ' in word_to_pos[i][index_to_word[(word)]])):
                aspects.append(word)
                if (__name__ == '__main__'):
                    print("Rule 2 triggered")
                    print(word)
            '''
				Rule 3 (without subj): Love the sleekness of the player
			'''

            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'nmod'):
                aspects.append(word_to_par[i][word])
                if (__name__ == '__main__'):
                    print("Rule 3 triggered")
                    print(word_to_par[i][word])
                '''
				Rule 4 (with sub): The battery lasts little 
				two aspects 
			'''
            if (word_to_dep[i][word] == 'advmod'
                    or word_to_dep[i][word] == 'amod' or word_to_dep[i][word]
                    == 'advcl') and ('VB' in word_to_pos[i][index_to_word[(
                        word_to_par[i][word])]]):
                aspects.append(word_to_par[i][word])
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'nsubj'
                            and word_to_par[i][word2] == word_to_par[i][word]
                            and
                        ('NN' in word_to_pos[i][index_to_word[word2]]
                         or 'JJ' in word_to_pos[i][index_to_word[word2]])):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 4 triggered")
                            print(word2)
                '''
				Rule 5 (with sub): I like the lens of this camera
			'''
            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'dobj'):
                if (__name__ == '__main__'):
                    print("Rule 5 triggered")
                    print(word)
                try:
                    concept_info = sn.concept((word))
                    print("present in senticnet")
                except KeyError:
                    print("Yay")
                    aspects.append(word)
            '''
				Rule 6 : I like the beauty of the screen.
				Check if senticnet condition should be added
			'''
            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'dobj'):
                try:
                    concept_info = sn.concept((word))
                    aspects.append(word)
                    print("yay!")
                except KeyError:
                    print("oops, not there in SenticNet")
                for word2 in sent:
                    if (word2 != word and word_to_par[i][word2] == word and
                            'NN' in word_to_pos[i][index_to_word[(word2)]]):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 6 triggered.")
                            print(word2)
            '''
				Rule 7 : I would like to comment on the camera of this phone. 
			
			'''
            if (word_to_dep[i][word] == 'xcomp'):
                try:
                    concept_info = sn.concept((word))
                    aspects.append(word)
                    print("yay!")
                except KeyError:
                    print("oops, not there in SenticNet")
                for child in word_to_child[i][word]:
                    if ('NN' in word_to_pos[i][index_to_word[child]]):
                        aspects.append(child)
                        if (__name__ == '__main__'):
                            print("Rule 7 triggered.")
                            print(word)
                            print(child)
            '''
				Rule 8 : The car is expensive.
			'''
            if (word_to_dep[i][word] == 'nsubj'):
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'cop'
                            and word_to_par[i][word2] == word_to_par[i][word]):
                        aspects.append(word_to_par[i][word])
                        if (__name__ == '__main__'):
                            print("Rule 8 triggered")
                            print(word_to_par[i][word])
            '''			
				Rule 9 : The camera is nice.
			'''
            if (word_to_dep[i][word] == 'nsubj'
                    and 'NN' in word_to_pos[i][index_to_word[(word)]]):
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'cop'
                            and word_to_par[i][word2] == word_to_par[i][word]):
                        aspects.append(word)
                        if (__name__ == '__main__'):
                            print("Rule 9 triggered")
                            print(word)
            '''
				Rule 10 : The phone is very lightweight to carry.
			'''
            if (word_to_dep[i][word] == 'cop'):
                for word2 in sent:
                    if (word2 != word
                            and 'VB' in word_to_pos[i][index_to_word[(word2)]]
                            and word_to_par[i][word] == word_to_par[i][word2]):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 10 triggered.")
                            print(word2)
            '''
				Extracting mods of dobjs

			'''
            if (word_to_dep[i][word] == 'dobj'):
                for child in word_to_child[i][word]:
                    if ('mod' in word_to_dep[i][child] and 'JJ'
                            in word_to_pos[i][index_to_word[(child)]]):
                        aspects.append(child)
            '''
				Rule 11 : Checking for conjuctions
			'''
        for asp in aspects:
            for word in sent:
                if (word_to_dep[i][word] == 'conj'
                        and word_to_par[i][word] == asp):
                    aspects.append(word)
                    if (__name__ == '__main__'):
                        print("Rule conj triggered.")
                        print(word)

    finalIAC = set(aspects)
    finalIAC = [index_to_word[f] for f in finalIAC]
    finalIAC = [w for w in finalIAC if not w in stop_words]

    finalSenti = []
    for iac in finalIAC:
        try:
            concept_info = sn.concept((iac))
            finalSenti.append(iac)
        except KeyError:
            print("No word available for " + iac)

    return finalIAC, finalSenti
# Классификация текстов по 5 шкалам согласно базе SenticNet, оценка XGBoost

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from senticnet.senticnet import SenticNet
from nltk.stem import WordNetLemmatizer
from sklearn import metrics
import xgboost
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

df = pd.read_csv('preprocessed.csv')

lemmatizer = WordNetLemmatizer()    # преобразование слов в исходную форму
sn = SenticNet()   # база данных, содержащая классификацию слов и выражений по значению и настроению


# Функция, возвращающая оценки введенного текста по 5 шкалам базы SenticNet: polarity intensity,
# pleasantness, attention, sensitivity и aptitude. Оценка формируется как сумма оценок
# всех включенных в базу слов и выражений из текста и нормируется на кол-во слов в тексте
def SN(data):
    # Преобразование текста в вектор, формирование словаря слов и словосочетаний длинной до 3 слов включительно
    vectorizer = CountVectorizer(analyzer='word', ngram_range=(1,3))
    vec = vectorizer.fit_transform([data]).todense()
    k = 0
    polarity_intense = sentics_pleasant = sentics_attention = sentics_sense = sentics_aptitude = 0
    for i in vectorizer.vocabulary_.keys():
        try:  # Попытка найти i-ое слово/выражение в базе
            num_repetitions = vec[0, vectorizer.vocabulary_[i]]
            polarity_intense += (float(sn.polarity_intense(i)) * num_repetitions)
            sentics_pleasant += (float(sn.sentics(i)['pleasantness']) * num_repetitions)
示例#15
0
 def __init__(self):
     self.sn = SenticNet()
def get_clues(text):
    text = text
    print("*--------(%s)-------------*" % (text))
    print(type(text))
    nlp = StanfordCoreNLP('http://localhost:9001')
    stop_words = set(stopwords.words('english'))
    '''
		Method to remove numbers appended at last
	'''
    dep_parse = nlp.annotate(text,
                             properties={
                                 'annotators': 'depparse',
                                 'outputFormat': 'json',
                                 'timeout': 10000,
                             })

    pos = nlp.annotate(text,
                       properties={
                           'annotators': 'lemma',
                           'outputFormat': 'json',
                           'timeout': 10000,
                       })

    sn = SenticNet()
    word_to_dep = [{} for i in range(len(dep_parse['sentences']))]
    word_to_par = [{} for i in range(len(dep_parse['sentences']))]
    word_to_pos = [{} for i in range(len(dep_parse['sentences']))]
    word_to_lemma = [{} for i in range(len(dep_parse['sentences']))]
    word_to_child = [{} for i in range(len(dep_parse['sentences']))]
    sents = [[] for i in range(len(dep_parse['sentences']))]
    index_to_word = {}
    aspect_result = [[] for i in range(len(dep_parse['sentences']))]
    '''
		Constructing dicts for maintaining the dependencies among words. 
	'''
    '''
		Appending each word by occurence number to maintain distinct word count
	'''
    print("********")
    for i, sent in enumerate(dep_parse['sentences']):
        for dep in sent['basicDependencies']:
            word_to_dep[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['dep']
            word_to_par[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['governorGloss'] + str(
                               dep['governor'])
            index_to_word[dep['dependentGloss'] +
                          str(dep['dependent'])] = dep['dependentGloss']

            if (dep['governorGloss'] + str(dep['governor'])
                    not in word_to_child[i]):
                word_to_child[i][dep['governorGloss'] +
                                 str(dep['governor'])] = []
            if (dep['dependentGloss'] + str(dep['dependent'])
                    not in word_to_child[i]):
                word_to_child[i][dep['dependentGloss'] +
                                 str(dep['dependent'])] = []
            word_to_child[i][dep['governorGloss'] +
                             str(dep['governor'])].append(
                                 dep['dependentGloss'] + str(dep['dependent']))
            sents[i].append(dep['dependentGloss'] + str(dep['dependent']))
        word_to_dep[i]['ROOT0'] = 'root'
        word_to_par[i]['ROOT0'] = 'root'

    for i, sent in enumerate(pos['sentences']):
        for pos_tagger in sent['tokens']:
            word_to_pos[i][pos_tagger['word']] = pos_tagger['pos']
            word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma']
        word_to_pos[i]['ROOT'] = 'root'
        word_to_lemma[i]['ROOT'] = 'root'
    '''
		Displaying the deps
	'''

    ##Implemeting rules to extract aspects
    for i, sent in enumerate(sents):
        if (__name__ == '__main__'):
            print(word_to_dep[i], word_to_par[i], word_to_pos[i],
                  word_to_lemma[i])
            print("Children==>")
            print(word_to_child[i])

    for i, sent in enumerate(sents):
        token_t = word_to_child[i]['ROOT0'][0]
        is_sub = False
        token_h = None
        for child in word_to_child[i][token_t]:
            if 'subj' in word_to_dep[i][child]:
                is_sub = True
                token_h = child

        #If subject noun relationship present
        if is_sub:
            """
				Rule 0: if any adv or adj modifies the token t.

			"""
            for child in word_to_child[i][token_t]:
                if ('amod' in word_to_dep[i][child]
                        or 'advmod' in word_to_dep[i][child]):
                    try:
                        concept_info = sn.concept(index_to_word[child])
                        aspect_result[i].append(token_t)
                        if __name__ == '__main__':
                            print("Rule 1 triggered.")
                            print("present in senticnet")
                    except KeyError:
                        print("OOps")
            """
				Rule 1: The battery lasts little.

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'advmod' or word_to_dep[i][child]
                        == 'amod' or word_to_dep[i][child] == 'advcl') and (
                            'VB' in word_to_pos[i][index_to_word[token_t]]):
                    aspect_result[i].append(token_t)
                    aspect_result[i].append(token_h)
                    if __name__ == '__main__':
                        print("Rule 1 triggered.")
                        print(token_t)
                        print(token_h)
            """
				Rule 2: I like the beauty of the screen (and I like the lens of this camera). 

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'dobj'
                        and 'NN' in word_to_pos[i][index_to_word[child]]):
                    aspect_result[i].append(child)
                    if __name__ == '__main__':
                        print(child)
                    try:
                        concept_info = sn.concept(index_to_word[child])
                        if __name__ == '__main__':
                            print("Rule 2 triggered")
                        for grandchild in word_to_child[i][child]:
                            if ('NN' in word_to_pos[i][
                                    index_to_word[grandchild]]):
                                aspect_result[i].append(grandchild)
                                print(grandchild)
                    except KeyError:
                        print("OOps")
            """
				Rule 3: I would like to comment on the camera of this phone.
	
			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'xcomp'):
                    try:
                        sn.concept(index_to_word[child])
                        aspect_result[i].append(child)
                        if __name__ == '__main__':
                            print(child)
                    except KeyError:
                        print("OOps")
                    for grandchild in word_to_child[i][child]:
                        if ('NN' in word_to_pos[i][index_to_word[grandchild]]):
                            aspect_result[i].append(grandchild)
                            if __name__ == '__main__':
                                print(grandchild)
                                print("Rule 3 triggered.")
            """
				Rule 4: The car is expensive.

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'):
                    try:
                        sn.concept(word_to_lemma[i][index_to_word[token_t]])
                        aspect_result[i].append(token_t)
                        if __name__ == '__main__':
                            print("Rule 4 triggered")
                            print(token_t)
                    except KeyError:
                        pass
            """
				Rule 5: The camera is nice

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'
                        and 'NN' in word_to_pos[i][index_to_word[token_h]]):
                    aspect_result[i].append(token_h)
                    if __name__ == '__main__':
                        print("Rule 5 triggered.")
                        print(token_h)
            """
				Rule 6: 

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'):
                    for child2 in word_to_child[i][token_t]:
                        if (child != child2 and 'VB'
                                in word_to_pos[i][index_to_word[child2]]):
                            try:
                                sn.concept(index_to_word[token_t])
                                sn.concept(index_to_word[child2])
                                aspect_result[i].append(token_t)
                                aspect_result[i].append(child2)
                                if __name__ == '__main__':
                                    print("rule 6 trigg")
                                    print(token_t)
                                    print(child2)
                            except KeyError:
                                pass
        else:
            """
				Rule 7:Very big to hold.

			"""
            for word in sent:
                if ('RB' in word_to_pos[i][index_to_word[word]]
                        or 'JJ' in word_to_pos[i][index_to_word[word]]):

                    for child in word_to_child[i][word]:
                        if (word_to_dep[i][child] == 'xcomp'
                                or word_to_dep[i][child] == 'ccomp'):
                            aspect_result[i].append(word)
                            if __name__ == '__main__':
                                print("Rule 7 triggered")
                                print(word)
            """
				Rule 8: Love the sleekness of the player.
			"""
            for word in sent:
                for child in word_to_child[i][word]:
                    if ('NN' in word_to_pos[i][index_to_word[child]]
                            and word_to_dep[i][child] == 'nmod'):
                        for grandchild in word_to_child[i][child]:
                            if ('IN' in word_to_pos[i][
                                    index_to_word[grandchild]]):
                                aspect_result[i].append(word)
                                aspect_result[i].append(child)
                                if __name__ == '__main__':
                                    print(word)
                                    print(child)
                                    print("Rule 8 triggered.")
            """
				Rule 9: Not to mention the price of the phone.

			"""
            for word in sent:
                for child in word_to_child[i][word]:
                    if (word_to_dep[i][child] == 'dobj'):
                        aspect_result[i].append(child)
                        if __name__ == '__main__':
                            print(child)
                            print("Rule 9 triggered")
            '''
				Rule 11 : Checking for conjuctions
			'''
        for asp in aspect_result[i]:
            for word in sent:
                if (word_to_dep[i][word] == 'conj'
                        and word_to_par[i][word] == asp):
                    aspect_result[i].append(word)
                    if (__name__ == '__main__'):
                        print("Rule conj triggered.")
                        print(word)

    finalIAC = [set(aspect_result[i]) for i in range(len(sents))]
    finalIAC = [[index_to_word[w] for w in finalIAC[i]]
                for i in range(len(sents))]

    print(finalIAC)
    singleFinalIAC = []
    for i in range(len(sents)):
        for w in finalIAC[i]:
            if w not in stop_words:
                singleFinalIAC.append(w)
    print(singleFinalIAC)

    finalSenti = []
    for iac in singleFinalIAC:
        try:
            concept_info = sn.concept((iac))
            finalSenti.append(iac)
        except KeyError:
            print("No word available for " + iac)

    return singleFinalIAC, finalSenti
示例#17
0
from senticnet.senticnet import SenticNet

sn = SenticNet('ru')

word = input('Введите ваш комментарий(например "как дела"): ')

lst = word.split()

#concept_info = sn.concept(word)
#polarity_value = sn.polarity_value(word)
#polarity_intense = sn.polarity_intense(word)
#moodtags = sn.moodtags(word)
#semantics = sn.semantics(word)

print(list(map(lambda x: sn.sentics(x), lst)))
pop = input(" ")
示例#18
0
from senticnet.senticnet import SenticNet

sn = SenticNet()
print("polarity value:", sn.polarity_value("love"))
print("polarity intense:", sn.polarity_intense("love"))
print("moodtags:", ", ".join(sn.moodtags("love")))
print("semantics:", ", ".join(sn.semantics("love")))
print("\n".join([key + ": " + str(value) for key, value in sn.sentics("love").items()]))
示例#19
0
def data_Preprocessing(data, data_test, n_of_words, polarity_threshold):
    Reviews = data["Content"]
    #Check if all char are ASCII
    # If we need another method for Encode/Decode there is string.printable method
    for i in range(0, len(Reviews)):
        x = Reviews.iloc[i].encode('ascii', errors='ignore').decode()

    # Set all the content to lower case
    Reviews = Reviews.apply(lambda row: row.lower())

    # Add to the follow variable the characters that you want to delete
    chars_to_del = "[" + string.punctuation + string.digits + "]"
    # Delete all the chars in "chars_to_del" from each row of the dataframe
    Reviews = Reviews.apply(lambda row: re.sub(chars_to_del, '', row))
    # Tokenize every single words of the data content
    Token_Reviews = Reviews.apply(lambda row: nltk.word_tokenize(row))

    # Generating the list "stop" of element TO BE REMOVED from the sentences (stopwords, numbers and punctuations)
    stop = stopwords.words("english")
    # Remove all the words in the variable "stop"
    Filtered_Review = Token_Reviews.apply(
        lambda row: [w for w in row if not w in stop])

    # Stemming the data's content
    # Stemming the Filtered sentence, some stemmed words:
    # http://snowball.tartarus.org/algorithms/english/stemmer.html
    ps = PorterStemmer()
    for idx in range(0, len(Filtered_Review)):
        Stemmed_Review_temp = []
        for word in Filtered_Review.iloc[i]:
            Stemmed_Review_temp.append(ps.stem(word))
        Filtered_Review.iloc[i] = Stemmed_Review_temp

    # Terms choosing: most common word
    sn = SenticNet()

    Filtered_Review_List = list(itertools.chain.from_iterable(Filtered_Review))
    Words_Frquency = FreqDist(Filtered_Review_List)
    Most_Common_Words_Frequency = Words_Frquency.most_common(n_of_words)

    Most_Common_Words = []
    for i in range(0, n_of_words):
        Most_Common_Words.append(Most_Common_Words_Frequency[i][0])

    index = 1
    words_and_polarity = pd.DataFrame(columns=["Word", "Polarity"])
    Selected_Words = []
    # Terms polarity
    for word in Most_Common_Words:
        try:
            temp = sn.polarity_intense(word)
            if (float(temp) > polarity_threshold
                    or float(temp) < -(polarity_threshold)):
                words_and_polarity.loc[index] = [word, float(temp)]
                index = index + 1
                Selected_Words.append(word)
        except Exception:
            continue
    # Decomment if you want to recomputer the selected words and their polarity
    #words_and_polarity.to_csv("Words_and_Polarity.csv", sep=",")

    return data, data_test
示例#20
0
def pre_process_and_predict(sentence):
    wordnet_lemmatizer = WordNetLemmatizer()
    # # Replacing double quotes with single, within a string
    sentence = sentence.replace("\"", "\'")
    # # Removing unnecessary special characters, keeping only ,  ! ?
    sentence = re.sub(r"[^!?,a-zA-Z0-9\ ]+", '', sentence)
    # # Lemmatization on verbs
    sentence = ' '.join([
        wordnet_lemmatizer.lemmatize(word, pos='v')
        for word in word_tokenize(sentence)
    ])

    sn = SenticNet()
    senti = PySentiStr()
    senti.setSentiStrengthPath(CODE_PATH + '/sentistrength/SentiStrength.jar')
    senti.setSentiStrengthLanguageFolderPath(
        CODE_PATH + '/sentistrength/SentStrength_Data/')

    sentiment_score = []

    for sen in sent_tokenize(sentence):
        senti_pos, senti_neg = senti.getSentiment(sen, score='dual')[0]
        senti_pos -= 1
        if senti_neg == -1:
            senti_neg = 0
        sum_pos_score = 0
        sum_neg_score = 0
        for word in word_tokenize(sen):
            try:
                w_score = float(sn.polarity_intense(word)) * 5
            except KeyError:
                w_score = 0
            if w_score > 0:
                sum_pos_score = sum_pos_score + w_score
            elif w_score < 0:
                sum_neg_score = sum_neg_score + w_score
        sum_pos_score = (sum_pos_score + senti_pos) / 2
        sum_neg_score = (sum_neg_score + senti_neg) / 2
        sentiment_score.append((sum_pos_score, sum_neg_score))
    additional_features_s = []
    additional_features_ns = []

    contra = []
    pos_low = []
    pos_medium = []
    pos_high = []
    neg_low = []
    neg_medium = []
    neg_high = []

    for sum_pos_score, sum_neg_score in sentiment_score:
        contra.append(int(sum_pos_score > 0 and abs(sum_neg_score) > 0))
        pos_low.append(int(sum_pos_score < 0))
        pos_medium.append(int(sum_pos_score >= 0 and sum_pos_score <= 1))
        pos_high.append(int(sum_pos_score >= 2))
        neg_low.append(int(sum_neg_score < 0))
        neg_medium.append(int(sum_neg_score >= 0 and sum_neg_score <= 1))
        neg_high.append(int(sum_neg_score >= 2))
    additional_features_s = additional_features_s + [
        max(pos_medium),
        max(pos_high),
        max(neg_medium),
        max(neg_high)
    ]
    additional_features_ns = additional_features_ns + [
        max(pos_low), max(neg_low)
    ]

    tweet = sentence
    punctuation_count = SequencePunctuationCount(tweet)
    character_count = SequenceCharacterCount(tweet)
    capitalized_count = CapitalizedCount(tweet)
    exclamation_count = ExclamationCount(tweet)
    #     emoji_count       = EmojiCount(tweet)
    f_count = [
        punctuation_count, character_count, capitalized_count,
        exclamation_count
    ]
    for count in f_count:
        f_low = int(count == 0)
        f_medium = int(count >= 1 and count <= 3)
        f_high = int(count >= 4)
        additional_features_s = additional_features_s + [f_medium, f_high]
        additional_features_ns = additional_features_ns + [f_low]
    X = [sentence]

    in_file = open(os.path.join(PICKLES_PATH, "vocab.pickle"), "rb")
    vocab = pickle.load(in_file)
    in_file.close()

    in_file = open(os.path.join(PICKLES_PATH, "model.pickle"), "rb")
    model = pickle.load(in_file)
    in_file.close()

    vectorizer = TfidfVectorizer(vocabulary=vocab)
    X = vectorizer.fit_transform(X)
    ans = int(sum(model.predict(X)))
    print('Sentence : ', sentence)
    print('Sarcastic features : ', additional_features_s)
    print('Not Sarcastic features : ', additional_features_ns)
    print('Contradict : ', max(contra))
    print('Model Predict : ', ans)
    print(
        'My obs : ',
        int((sum(additional_features_s) >= sum(additional_features_ns))
            and max(contra) == 1))
    print('Final Prd : ', end='')

    if ans == 1 or ((sum(additional_features_s) >= sum(additional_features_ns))
                    and max(contra) == 1):
        return True
    else:
        return False
 def __init__(self):
     self.col = ['Name', 'Brand', 'Price', 'Title', 'Score', 'Time', 'Text']
     self.sn = SenticNet('en')
     self.wordnet_lemmatizer = WordNetLemmatizer()
from senticnet.senticnet import SenticNet

teste = []
sn = SenticNet('pt')
concept_info = sn.concept('amor')
polarity_value = sn.polarity_value('amor')
polarity_intense = sn.polarity_intense('amor')
moodtags = sn.moodtags('amor')
semantics = sn.semantics('amor')
sentics = sn.sentics('amor')

teste.append(concept_info)

print(teste)