def senticnet(text):
    """
    Returns a list obtained from SenticNet with the following four features normalized: [pleasantness_value, attention_value, sensiivity_value, aptitude_value]

    :param text: input text pre-processed by Spacy
    :return: a list with the SenticNet features averaged for all the words in text
    """
    list_features = [0] * 4
    sn = SenticNet()
    count_words = 0

    for token in text:
        try:
            concept_info = sn.concept(token)
            list_features[0] += float(concept_info['sentics']['pleasantness'])
            list_features[1] += float(concept_info['sentics']['attention'])
            list_features[2] += float(concept_info['sentics']['sensitivity'])
            list_features[3] += float(concept_info['sentics']['aptitude'])
            count_words += 1
        except KeyError:
            pass

    if count_words != 0:
        list_features = [feature / count_words for feature in list_features]

    return list_features
class Get_IAC():
    def __init__(self):
        self.col = ['Name', 'Brand', 'Price', 'Title', 'Score', 'Time', 'Text']
        self.sn = SenticNet('en')
        self.wordnet_lemmatizer = WordNetLemmatizer()

    def review_to_sentences(self, review):
        #     review = review.replace(',','.')
        review = review.replace('.', '. ')
        raw_sentences = sent_tokenize(review)
        return raw_sentences

    def InputData(self, input_path):
        self.dict_list = []
        if '.csv' in input_path:
            with open(input_path, 'r', encoding='utf8') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    d = {i: row[i] for i in col}
                    self.dict_list.append(d)
        elif '.xlsx' in input_path:
            wb = load_workbook(input_path)
            sheet = wb.active
            count = 0
            for row in sheet.rows:
                if count == 0:
                    count += 1
                    continue
                d = {}
                name = 0
                for cell in row:
                    d[self.col[name]] = cell.value
                    name += 1
                self.dict_list.append(d)

        self.dict_list = [
            x for x in self.dict_list if x['Text'] != '' and x['Text'] != None
        ]
        self.sentences = []
        for i in range(len(self.dict_list)):
            for j in self.review_to_sentences(self.dict_list[i]['Text']):
                self.sentences.append(j)
        self.sentences = [x for x in self.sentences if len(x) >= 5]

    def GetIAC(self):
        self.nlp = StanfordCoreNLP(r'stanford-corenlp-full-2018-10-05')
        self.IAC = []
        for i in tqdm(self.sentences):
            dependency = self.nlp.dependency_parse(i)
            token = self.nlp.word_tokenize(i)
            if [x for x in dependency if 'compound' in x] != []:
                for j in [x for x in dependency if 'compound' in x]:
                    token[j[2] - 1] = token[j[2] - 1] + '-' + token[j[1] - 1]
                    token[j[1] - 1] = ''
                i = ' '.join(token)

            parse = self.nlp.parse(i)
            dependency = self.nlp.dependency_parse(i)
            pos = self.nlp.pos_tag(i)
            token = []
            for j in pos:
                wordnet_pos = self.get_wordnet_pos(j[1])
                token.append(
                    self.wordnet_lemmatizer.lemmatize(j[0].lower(),
                                                      pos=wordnet_pos))

            # subject noun relation
            if [x for x in dependency if 'nsubj' in x] != []:
                for j in self.Subject_Noun_Rule(parse, dependency, token, pos):
                    self.IAC.append(j)
            else:  # Non subject noun relation
                for j in self.Non_Subject_Noun_Rule(parse, dependency, token,
                                                    pos):
                    self.IAC.append(j)
        self.nlp.close()
        self.IAC = list(set(self.IAC))

    def get_wordnet_pos(self, treebank_tag):
        if treebank_tag.startswith('J'):
            return wn.ADJ
        elif treebank_tag.startswith('V'):
            return wn.VERB
        elif treebank_tag.startswith('N'):
            return wn.NOUN
        elif treebank_tag.startswith('R'):
            return wn.ADV
        else:
            return wn.NOUN

    # Additional Rule: 對等連接詞
    def Conj(self, index, dependency, token):
        IAC = []
        index = list(set(index))
        if [x for x in dependency if 'conj' in x] != []:
            conj = [x for x in dependency if 'conj' in x]
            for j in conj:
                if j[1] in index or j[2] in index:
                    if j[1] not in index:
                        IAC.append(token[j[1] - 1])
                        index.append(j[1])
                    if j[2] not in index:
                        IAC.append(token[j[2] - 1])
                        index.append(j[2])
        return IAC

    def Subject_Noun_Rule(self, parse, dependency, token, pos):
        be = ['is', 'was', 'am', 'are', 'were']
        adv_mod = [x for x in dependency if 'advmod' in x]
        adj_mod = [x for x in dependency if 'amod' in x]
        active_token = token[[x for x in dependency if 'nsubj' in x][0][2] -
                             1]  # 主詞

        result = []
        index = []
        if adv_mod != [] or adj_mod != []:
            A, B = self.Rule1(adv_mod, adj_mod, active_token, token)
            result += A
            index += B

        #  does not have auxiliary verb
        if any(k in token
               for k in be) == False and [x for x in pos if 'MD' in x] == []:
            A, B = self.Rule2(token, pos, dependency, active_token, adv_mod,
                              adj_mod)
            result += A
            index += B

            if [x for x in dependency if 'dobj' in x] != []:
                A, B = self.Rule3(dependency, token, pos)
                result += A
                index += B

            if [x for x in dependency if 'xcomp' in x] != []:
                A, B = self.Rule4(dependency, token, pos)
                result += A
                index += B

        if [x for x in dependency if 'cop' in x] != []:
            A, B = self.Rule5(dependency, pos, active_token, token)
            result += A
            index += B

        result += self.Conj(index, dependency, token)
        return list(set(result))

    # 3.3.3 Rule 1
    def Rule1(self, adv_mod, adj_mod, active_token, token):
        IAC = []
        index = []
        if adv_mod != []:
            for j in adv_mod:
                try:
                    concept = self.sn.concept(token[j[2] - 1])
                    IAC.append(token[j[2] - 1])
                    index.append(j[2])
                except:
                    a = 0
    #                 print(token[j[2]-1] + ' Not in SenticNet')
        if adj_mod != []:
            for j in adj_mod:
                try:
                    concept = self.sn.concept(token[j[2] - 1])
                    IAC.append(token[j[2] - 1])
                    index.append(j[2])
                except:
                    a = 0
    #                 print(token[j[2]-1] + ' Not in SenticNet')
        return IAC, index

    # 3.3.3 Rule 2-1

    def Rule2(self, token, pos, dependency, active_token, adv_mod, adj_mod):
        IAC = []
        index = []
        advcl = [x for x in dependency
                 if 'advcl' in x]  # adverbial clause modifier
        if advcl != []:
            for j in advcl:
                IAC.append(token[j[1] - 1])
                index.append(j[1])
                IAC.append(active_token)
                index.append([x for x in dependency if 'nsubj' in x][0][2])

        if adv_mod != []:
            for j in adv_mod:
                IAC.append(token[j[1] - 1])
                index.append(j[1])
                IAC.append(active_token)
                index.append([x for x in dependency if 'nsubj' in x][0][2])

        if adj_mod != []:
            for j in adj_mod:
                IAC.append(token[j[1] - 1])
                index.append(j[1])
                IAC.append(active_token)
                index.append([x for x in dependency if 'nsubj' in x][0][2])

        return IAC, index

    # 3.3.3 Rule 2-2 & 2-3
    def Rule3(self, dependency, token, pos):
        IAC = []
        index = []
        dobj = [x for x in dependency
                if 'dobj' in x]  #  open clausal complement
        for j in dobj:
            if pos[j[2] - 1][1] == 'NN':
                try:
                    # Rule 2-3
                    concept = self.sn.concept(token[j[2] - 1])
                    IAC.append(token[j[2] - 1])
                    index.append(j[2])
                    conj = []
                    conj.append(j[2])
                    if [x for x in dependency if 'conj' in x and j[2] in x
                        ] != []:
                        for i in [
                                x for x in dependency
                                if 'conj' in x and j[2] in x
                        ]:
                            conj.append(i[1])
                            conj.append(i[2])
                    conj = list(set(conj))
                    for i in conj:
                        t1 = i
                        connect = [x for x in dependency if t1 in x]
                        for k in connect:
                            if k[1] != t1:
                                if pos[k[1] - 1][1] == 'NN':
                                    IAC.append(token[k[1] - 1])
                                    index.append(k[1])
                            if k[2] != t1:
                                if pos[k[2] - 1][1] == 'NN':
                                    IAC.append(token[k[2] - 1])
                                    index.append(k[2])
                except:
                    # Rule 2-2
                    IAC.append(token[j[2] - 1])
                    index.append(j[2])
    #                 print(token[j[2]-1] + ' Not in SenticNet')
        return IAC, index

    # 3.3.3 Rule 2-4

    def Rule4(self, dependency, token, pos):
        IAC = []
        index = []
        xcomp = [x for x in dependency
                 if 'xcomp' in x]  #  open clausal complement
        for j in xcomp:
            try:
                concept = self.sn.concept(token[j[1] - 1] + '-' +
                                          token[j[2] - 1])
                IAC.append(token[j[1] - 1] + '-' + token[j[2] - 1])
            except:
                a = 0
    #             print(token[j[1]-1] + '-' + token[j[2]-1] + ' Not in SenticNet')
            t1 = j[2]
            connect = [x for x in dependency if t1 in x]
            for k in connect:
                if pos[k[2] - 1][1] == 'NN':
                    IAC.append(token[k[2] - 1])
                    index.append(k[2])
        return IAC, index

    # 3.3.3 Rule 3 & 3.3.3 Rule 4 & 3.3.3 Rule 5

    def Rule5(self, dependency, pos, active_token, token):
        IAC = []
        index = []
        cop = [x for x in dependency if 'cop' in x]  # copula
        # Rule 4
        if pos[[x for x in dependency if 'nsubj' in x][0][2] - 1][1] == 'NN':
            IAC.append(active_token)
            index.append([x for x in dependency if 'nsubj' in x][0][2])

        # Rule 3 & Rule 5
        for j in cop:
            # Rule 3
            conj = []
            #         if token[j[1]-1] in all_term:
            IAC.append(token[j[1] - 1])
            index.append(j[1])
            conj.append(j[1])
            if [x for x in dependency if 'conj' in x and j[1] in x] != []:
                for i in [x for x in dependency if 'conj' in x and j[1] in x]:
                    conj.append(i[1])
                    conj.append(i[2])

            # Rule 5
            conj = list(set(conj))
            for i in conj:
                t1 = i
                connect = [x for x in dependency if t1 in x]
                for k in connect:
                    if k[1] != t1:
                        if pos[k[1] - 1][1] == 'VB' or pos[k[1] -
                                                           1][1] == 'VV':
                            IAC.append(token[k[1] - 1])
                            index.append(k[1])
                            if token[t1 - 1] not in IAC:
                                IAC.append(token[t1 - 1])
                                index.append(t1)
                    if k[2] != t1:
                        if pos[k[2] - 1][1] == 'VB' or pos[k[2] -
                                                           1][1] == 'VV':
                            IAC.append(token[k[2] - 1])
                            index.append(k[2])
                            if token[t1 - 1] not in IAC:
                                IAC.append(token[t1 - 1])
                                index.append(t1)
        return IAC, index

    def Non_Subject_Noun_Rule(self, parse, dependency, token, pos):
        result = []
        index = []
        if [x for x in dependency if 'xcomp' in x] != []:
            A, B = self.Rule6(dependency, token)
            result += A
            index += B

        if [x for x in dependency if 'case' in x] != []:
            A, B = self.Rule7(dependency, pos, token)
            result += A
            index += B

        if [x for x in dependency if 'dobj' in x] != []:
            A, B = self.Rule8(dependency, token)
            result += A
            index += B

        result += self.Conj(index, dependency, token)
        return list(set(result))

    # 3.3.4 Rule 1
    def Rule6(self, dependency, token):
        IAC = []
        index = []
        xcomp = [x for x in dependency
                 if 'xcomp' in x]  #  open clausal complement
        for j in xcomp:
            #         if token[j[1]-1] in all_term:
            IAC.append(token[j[1] - 1])
            index.append(j[1])
        return IAC, index

    # 3.3.4 Rule 2
    def Rule7(self, dependency, pos, token):
        IAC = []
        index = []
        case = [x for x in dependency
                if 'case' in x]  #  a prepositional relation
        for j in case:
            if pos[j[1] - 1][1] == 'NN':
                connect = [
                    x for x in dependency if j[1] in x and 'mod' in x[0]
                ]
                for i in connect:
                    IAC.append(token[i[1] - 1])
                    IAC.append(token[i[2] - 1])
                    index.append(i[1])
                    index.append(i[2])
        return list(set(IAC)), list(set(index))

    # 3.3.4 Rule 3
    def Rule8(self, dependency, token):
        IAC = []
        index = []
        dobj = [x for x in dependency
                if 'dobj' in x]  #  a direct object relation
        for j in dobj:
            IAC.append(token[j[2] - 1])
            index.append(j[2])
        return IAC, index

    def Save(self, output_path):
        with open(output_path, 'wb') as f:
            pickle.dump(self.IAC, f)
示例#3
0
            for _, records in groupby(sorted(lst, key=keyprop), keyprop)
        ]


    a = [{'time': '25 APR', 'total': 10, 'high': 10},
         {'time': '26 APR', 'total': 5, 'high': 5}]

    b = [{'time': '24 APR', 'total': 10, 'high': 10},
         {'time': '26 APR', 'total': 15, 'high': 5}]
    merger = merge_list_of_records_by('time', add)
    hasil_merge = merger(a+b)
    print(hasil_merge)

    print("sinonim with thesaurus==================================================================")
    # from PyDictionary import PyDictionary
    #
    # dictionary = PyDictionary()
    # print(dictionary.synonym("good"))

    from thesaurus import Word

    w = Word('suicidal')
    syn = w.synonyms()
    print(syn)

    sn = SenticNet()
    try:
        concept_info_sinonim = sn.concept("suicidal")
        print(concept_info_sinonim)
    except Exception as e:
        print(e)
示例#4
0
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
import nltk
nltk.download()

import os

from senticnet.senticnet import SenticNet
sn = SenticNet()
sn.concept('')


def fun1(d):
    try:
        from senticnet.senticnet import SenticNet
        sn = SenticNet()
        sn.semantics(d)
        return True
    except KeyError as error:
        return False


fun1('day')

from nltk.corpus import wordnet
sk = wordnet.synset('ssd')
def get_clues(text):
    text = text
    print("*--------(%s)-------------*" % (text))
    print(type(text))
    nlp = StanfordCoreNLP('http://localhost:9001')
    stop_words = set(stopwords.words('english'))
    '''
		Method to remove numbers appended at last
	'''
    dep_parse = nlp.annotate(text,
                             properties={
                                 'annotators': 'depparse',
                                 'outputFormat': 'json',
                                 'timeout': 10000,
                             })

    pos = nlp.annotate(text,
                       properties={
                           'annotators': 'lemma',
                           'outputFormat': 'json',
                           'timeout': 10000,
                       })

    sn = SenticNet()
    word_to_dep = [{} for i in range(len(dep_parse['sentences']))]
    word_to_par = [{} for i in range(len(dep_parse['sentences']))]
    word_to_pos = [{} for i in range(len(dep_parse['sentences']))]
    word_to_lemma = [{} for i in range(len(dep_parse['sentences']))]
    word_to_child = [{} for i in range(len(dep_parse['sentences']))]
    sents = [[] for i in range(len(dep_parse['sentences']))]
    index_to_word = {}
    '''
		Constructing dicts for maintaining the dependencies among words. 
	'''
    '''
		Appending each word by occurence number to maintain distinct word count
	'''
    #print(dep_parse['sentences'])
    print("********")
    for i, sent in enumerate(dep_parse['sentences']):
        for dep in sent['basicDependencies']:
            word_to_dep[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['dep']
            word_to_par[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['governorGloss'] + str(
                               dep['governor'])
            index_to_word[dep['dependentGloss'] +
                          str(dep['dependent'])] = dep['dependentGloss']

            if (dep['governorGloss'] + str(dep['governor'])
                    not in word_to_child[i]):
                word_to_child[i][dep['governorGloss'] +
                                 str(dep['governor'])] = []
            if (dep['dependentGloss'] + str(dep['dependent'])
                    not in word_to_child[i]):
                word_to_child[i][dep['dependentGloss'] +
                                 str(dep['dependent'])] = []
            word_to_child[i][dep['governorGloss'] +
                             str(dep['governor'])].append(
                                 dep['dependentGloss'] + str(dep['dependent']))
            sents[i].append(dep['dependentGloss'] + str(dep['dependent']))
        word_to_dep[i]['ROOT0'] = 'root'
        word_to_par[i]['ROOT0'] = 'root'

    for i, sent in enumerate(pos['sentences']):
        for pos_tagger in sent['tokens']:
            word_to_pos[i][pos_tagger['word']] = pos_tagger['pos']
            word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma']
        word_to_pos[i]['ROOT'] = 'root'
        word_to_lemma[i]['ROOT'] = 'root'
    '''
		Displaying the deps
	'''

    ##Implemeting rules to extract aspects
    for i, sent in enumerate(sents):
        if (__name__ == '__main__'):
            print(word_to_dep[i], word_to_par[i], word_to_pos[i])
            print("Children==>")
            print(word_to_child[i])

    aspects = []
    for i, sent in enumerate(sents):
        for word in sent:
            '''
				Rule 0
			'''
            if ('subj' in word_to_dep[i][word]):
                for child in word_to_child[i][word_to_par[i][word]]:
                    if ('amod' in word_to_dep[i][child]
                            or 'advmod' in word_to_dep[i][child]):
                        aspects.append(word_to_par[i][word])
                        if (__name__ == '__main__'):
                            print("Rule 0 triggered.")
            '''
				Rule 1 (without sub): Very big to hold.
			'''
            if (word_to_dep[i][word] == 'xcomp' and
                ('JJ' in word_to_pos[i][index_to_word[word_to_par[i][word]]] or
                 'RB' in word_to_pos[i][index_to_word[word_to_par[i][word]]])):
                if (__name__ == '__main__'):
                    print("Rule 1 triggered")
                aspects.append(word_to_par[i][word])
            '''
				Rule 2 (without subj): Not to mention the price of the phone
			'''
            if (word_to_dep[i][word] == 'dobj' and 'VB'
                    in word_to_pos[i][index_to_word[(word_to_par[i][word])]]
                    and ('NN' in word_to_pos[i][index_to_word[(word)]]
                         or 'JJ' in word_to_pos[i][index_to_word[(word)]])):
                aspects.append(word)
                if (__name__ == '__main__'):
                    print("Rule 2 triggered")
                    print(word)
            '''
				Rule 3 (without subj): Love the sleekness of the player
			'''

            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'nmod'):
                aspects.append(word_to_par[i][word])
                if (__name__ == '__main__'):
                    print("Rule 3 triggered")
                    print(word_to_par[i][word])
                '''
				Rule 4 (with sub): The battery lasts little 
				two aspects 
			'''
            if (word_to_dep[i][word] == 'advmod'
                    or word_to_dep[i][word] == 'amod' or word_to_dep[i][word]
                    == 'advcl') and ('VB' in word_to_pos[i][index_to_word[(
                        word_to_par[i][word])]]):
                aspects.append(word_to_par[i][word])
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'nsubj'
                            and word_to_par[i][word2] == word_to_par[i][word]
                            and
                        ('NN' in word_to_pos[i][index_to_word[word2]]
                         or 'JJ' in word_to_pos[i][index_to_word[word2]])):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 4 triggered")
                            print(word2)
                '''
				Rule 5 (with sub): I like the lens of this camera
			'''
            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'dobj'):
                if (__name__ == '__main__'):
                    print("Rule 5 triggered")
                    print(word)
                try:
                    concept_info = sn.concept((word))
                    print("present in senticnet")
                except KeyError:
                    print("Yay")
                    aspects.append(word)
            '''
				Rule 6 : I like the beauty of the screen.
				Check if senticnet condition should be added
			'''
            if ('NN' in word_to_pos[i][index_to_word[(word)]]
                    and word_to_dep[i][word] == 'dobj'):
                try:
                    concept_info = sn.concept((word))
                    aspects.append(word)
                    print("yay!")
                except KeyError:
                    print("oops, not there in SenticNet")
                for word2 in sent:
                    if (word2 != word and word_to_par[i][word2] == word and
                            'NN' in word_to_pos[i][index_to_word[(word2)]]):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 6 triggered.")
                            print(word2)
            '''
				Rule 7 : I would like to comment on the camera of this phone. 
			
			'''
            if (word_to_dep[i][word] == 'xcomp'):
                try:
                    concept_info = sn.concept((word))
                    aspects.append(word)
                    print("yay!")
                except KeyError:
                    print("oops, not there in SenticNet")
                for child in word_to_child[i][word]:
                    if ('NN' in word_to_pos[i][index_to_word[child]]):
                        aspects.append(child)
                        if (__name__ == '__main__'):
                            print("Rule 7 triggered.")
                            print(word)
                            print(child)
            '''
				Rule 8 : The car is expensive.
			'''
            if (word_to_dep[i][word] == 'nsubj'):
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'cop'
                            and word_to_par[i][word2] == word_to_par[i][word]):
                        aspects.append(word_to_par[i][word])
                        if (__name__ == '__main__'):
                            print("Rule 8 triggered")
                            print(word_to_par[i][word])
            '''			
				Rule 9 : The camera is nice.
			'''
            if (word_to_dep[i][word] == 'nsubj'
                    and 'NN' in word_to_pos[i][index_to_word[(word)]]):
                for word2 in sent:
                    if (word2 != word and word_to_dep[i][word2] == 'cop'
                            and word_to_par[i][word2] == word_to_par[i][word]):
                        aspects.append(word)
                        if (__name__ == '__main__'):
                            print("Rule 9 triggered")
                            print(word)
            '''
				Rule 10 : The phone is very lightweight to carry.
			'''
            if (word_to_dep[i][word] == 'cop'):
                for word2 in sent:
                    if (word2 != word
                            and 'VB' in word_to_pos[i][index_to_word[(word2)]]
                            and word_to_par[i][word] == word_to_par[i][word2]):
                        aspects.append(word2)
                        if (__name__ == '__main__'):
                            print("Rule 10 triggered.")
                            print(word2)
            '''
				Extracting mods of dobjs

			'''
            if (word_to_dep[i][word] == 'dobj'):
                for child in word_to_child[i][word]:
                    if ('mod' in word_to_dep[i][child] and 'JJ'
                            in word_to_pos[i][index_to_word[(child)]]):
                        aspects.append(child)
            '''
				Rule 11 : Checking for conjuctions
			'''
        for asp in aspects:
            for word in sent:
                if (word_to_dep[i][word] == 'conj'
                        and word_to_par[i][word] == asp):
                    aspects.append(word)
                    if (__name__ == '__main__'):
                        print("Rule conj triggered.")
                        print(word)

    finalIAC = set(aspects)
    finalIAC = [index_to_word[f] for f in finalIAC]
    finalIAC = [w for w in finalIAC if not w in stop_words]

    finalSenti = []
    for iac in finalIAC:
        try:
            concept_info = sn.concept((iac))
            finalSenti.append(iac)
        except KeyError:
            print("No word available for " + iac)

    return finalIAC, finalSenti
示例#6
0
# Each line of corpus must be equivalent to each document of the corpus
#boc_model=boc.BOCModel(doc_path="input corpus path")
boc_model = boc.BOCModel('text.txt')

#boc_model.context = text

# output can be saved with save_path parameter
boc_matrix, word2concept_list, idx2word_converter = boc_model.fit()

# SenitcNet lexicon lookup
from senticnet.senticnet import SenticNet

sn = SenticNet()

concept_info = sn.concept(text)
polarity_value = sn.polarity_value(text)
polarity_intense = sn.polarity_intense(text)
moodtags = sn.moodtags(text)
semantics = sn.semantics(text)
sentics = sn.sentics(text)

print('==================================')
print('test: ', text)
print('concept_info: ', concept_info)
print('polarity_value: ', polarity_value)
print('polarity_intense: ', polarity_intense)
print('moodtags: ', moodtags)
print('semantics: ', semantics)
print('sentics: ', sentics)
print('==================================')
def get_clues(text):
    text = text
    print("*--------(%s)-------------*" % (text))
    print(type(text))
    nlp = StanfordCoreNLP('http://localhost:9001')
    stop_words = set(stopwords.words('english'))
    '''
		Method to remove numbers appended at last
	'''
    dep_parse = nlp.annotate(text,
                             properties={
                                 'annotators': 'depparse',
                                 'outputFormat': 'json',
                                 'timeout': 10000,
                             })

    pos = nlp.annotate(text,
                       properties={
                           'annotators': 'lemma',
                           'outputFormat': 'json',
                           'timeout': 10000,
                       })

    sn = SenticNet()
    word_to_dep = [{} for i in range(len(dep_parse['sentences']))]
    word_to_par = [{} for i in range(len(dep_parse['sentences']))]
    word_to_pos = [{} for i in range(len(dep_parse['sentences']))]
    word_to_lemma = [{} for i in range(len(dep_parse['sentences']))]
    word_to_child = [{} for i in range(len(dep_parse['sentences']))]
    sents = [[] for i in range(len(dep_parse['sentences']))]
    index_to_word = {}
    aspect_result = [[] for i in range(len(dep_parse['sentences']))]
    '''
		Constructing dicts for maintaining the dependencies among words. 
	'''
    '''
		Appending each word by occurence number to maintain distinct word count
	'''
    print("********")
    for i, sent in enumerate(dep_parse['sentences']):
        for dep in sent['basicDependencies']:
            word_to_dep[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['dep']
            word_to_par[i][dep['dependentGloss'] +
                           str(dep['dependent'])] = dep['governorGloss'] + str(
                               dep['governor'])
            index_to_word[dep['dependentGloss'] +
                          str(dep['dependent'])] = dep['dependentGloss']

            if (dep['governorGloss'] + str(dep['governor'])
                    not in word_to_child[i]):
                word_to_child[i][dep['governorGloss'] +
                                 str(dep['governor'])] = []
            if (dep['dependentGloss'] + str(dep['dependent'])
                    not in word_to_child[i]):
                word_to_child[i][dep['dependentGloss'] +
                                 str(dep['dependent'])] = []
            word_to_child[i][dep['governorGloss'] +
                             str(dep['governor'])].append(
                                 dep['dependentGloss'] + str(dep['dependent']))
            sents[i].append(dep['dependentGloss'] + str(dep['dependent']))
        word_to_dep[i]['ROOT0'] = 'root'
        word_to_par[i]['ROOT0'] = 'root'

    for i, sent in enumerate(pos['sentences']):
        for pos_tagger in sent['tokens']:
            word_to_pos[i][pos_tagger['word']] = pos_tagger['pos']
            word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma']
        word_to_pos[i]['ROOT'] = 'root'
        word_to_lemma[i]['ROOT'] = 'root'
    '''
		Displaying the deps
	'''

    ##Implemeting rules to extract aspects
    for i, sent in enumerate(sents):
        if (__name__ == '__main__'):
            print(word_to_dep[i], word_to_par[i], word_to_pos[i],
                  word_to_lemma[i])
            print("Children==>")
            print(word_to_child[i])

    for i, sent in enumerate(sents):
        token_t = word_to_child[i]['ROOT0'][0]
        is_sub = False
        token_h = None
        for child in word_to_child[i][token_t]:
            if 'subj' in word_to_dep[i][child]:
                is_sub = True
                token_h = child

        #If subject noun relationship present
        if is_sub:
            """
				Rule 0: if any adv or adj modifies the token t.

			"""
            for child in word_to_child[i][token_t]:
                if ('amod' in word_to_dep[i][child]
                        or 'advmod' in word_to_dep[i][child]):
                    try:
                        concept_info = sn.concept(index_to_word[child])
                        aspect_result[i].append(token_t)
                        if __name__ == '__main__':
                            print("Rule 1 triggered.")
                            print("present in senticnet")
                    except KeyError:
                        print("OOps")
            """
				Rule 1: The battery lasts little.

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'advmod' or word_to_dep[i][child]
                        == 'amod' or word_to_dep[i][child] == 'advcl') and (
                            'VB' in word_to_pos[i][index_to_word[token_t]]):
                    aspect_result[i].append(token_t)
                    aspect_result[i].append(token_h)
                    if __name__ == '__main__':
                        print("Rule 1 triggered.")
                        print(token_t)
                        print(token_h)
            """
				Rule 2: I like the beauty of the screen (and I like the lens of this camera). 

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'dobj'
                        and 'NN' in word_to_pos[i][index_to_word[child]]):
                    aspect_result[i].append(child)
                    if __name__ == '__main__':
                        print(child)
                    try:
                        concept_info = sn.concept(index_to_word[child])
                        if __name__ == '__main__':
                            print("Rule 2 triggered")
                        for grandchild in word_to_child[i][child]:
                            if ('NN' in word_to_pos[i][
                                    index_to_word[grandchild]]):
                                aspect_result[i].append(grandchild)
                                print(grandchild)
                    except KeyError:
                        print("OOps")
            """
				Rule 3: I would like to comment on the camera of this phone.
	
			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'xcomp'):
                    try:
                        sn.concept(index_to_word[child])
                        aspect_result[i].append(child)
                        if __name__ == '__main__':
                            print(child)
                    except KeyError:
                        print("OOps")
                    for grandchild in word_to_child[i][child]:
                        if ('NN' in word_to_pos[i][index_to_word[grandchild]]):
                            aspect_result[i].append(grandchild)
                            if __name__ == '__main__':
                                print(grandchild)
                                print("Rule 3 triggered.")
            """
				Rule 4: The car is expensive.

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'):
                    try:
                        sn.concept(word_to_lemma[i][index_to_word[token_t]])
                        aspect_result[i].append(token_t)
                        if __name__ == '__main__':
                            print("Rule 4 triggered")
                            print(token_t)
                    except KeyError:
                        pass
            """
				Rule 5: The camera is nice

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'
                        and 'NN' in word_to_pos[i][index_to_word[token_h]]):
                    aspect_result[i].append(token_h)
                    if __name__ == '__main__':
                        print("Rule 5 triggered.")
                        print(token_h)
            """
				Rule 6: 

			"""
            for child in word_to_child[i][token_t]:
                if (word_to_dep[i][child] == 'cop'):
                    for child2 in word_to_child[i][token_t]:
                        if (child != child2 and 'VB'
                                in word_to_pos[i][index_to_word[child2]]):
                            try:
                                sn.concept(index_to_word[token_t])
                                sn.concept(index_to_word[child2])
                                aspect_result[i].append(token_t)
                                aspect_result[i].append(child2)
                                if __name__ == '__main__':
                                    print("rule 6 trigg")
                                    print(token_t)
                                    print(child2)
                            except KeyError:
                                pass
        else:
            """
				Rule 7:Very big to hold.

			"""
            for word in sent:
                if ('RB' in word_to_pos[i][index_to_word[word]]
                        or 'JJ' in word_to_pos[i][index_to_word[word]]):

                    for child in word_to_child[i][word]:
                        if (word_to_dep[i][child] == 'xcomp'
                                or word_to_dep[i][child] == 'ccomp'):
                            aspect_result[i].append(word)
                            if __name__ == '__main__':
                                print("Rule 7 triggered")
                                print(word)
            """
				Rule 8: Love the sleekness of the player.
			"""
            for word in sent:
                for child in word_to_child[i][word]:
                    if ('NN' in word_to_pos[i][index_to_word[child]]
                            and word_to_dep[i][child] == 'nmod'):
                        for grandchild in word_to_child[i][child]:
                            if ('IN' in word_to_pos[i][
                                    index_to_word[grandchild]]):
                                aspect_result[i].append(word)
                                aspect_result[i].append(child)
                                if __name__ == '__main__':
                                    print(word)
                                    print(child)
                                    print("Rule 8 triggered.")
            """
				Rule 9: Not to mention the price of the phone.

			"""
            for word in sent:
                for child in word_to_child[i][word]:
                    if (word_to_dep[i][child] == 'dobj'):
                        aspect_result[i].append(child)
                        if __name__ == '__main__':
                            print(child)
                            print("Rule 9 triggered")
            '''
				Rule 11 : Checking for conjuctions
			'''
        for asp in aspect_result[i]:
            for word in sent:
                if (word_to_dep[i][word] == 'conj'
                        and word_to_par[i][word] == asp):
                    aspect_result[i].append(word)
                    if (__name__ == '__main__'):
                        print("Rule conj triggered.")
                        print(word)

    finalIAC = [set(aspect_result[i]) for i in range(len(sents))]
    finalIAC = [[index_to_word[w] for w in finalIAC[i]]
                for i in range(len(sents))]

    print(finalIAC)
    singleFinalIAC = []
    for i in range(len(sents)):
        for w in finalIAC[i]:
            if w not in stop_words:
                singleFinalIAC.append(w)
    print(singleFinalIAC)

    finalSenti = []
    for iac in singleFinalIAC:
        try:
            concept_info = sn.concept((iac))
            finalSenti.append(iac)
        except KeyError:
            print("No word available for " + iac)

    return singleFinalIAC, finalSenti
示例#8
0
    sisa = 0

    # print("Jumlah existing wordlist : {} ".format(len(existWordlistDepression)))
    #here it is
    for wordExist in existWordlistDepression:
        word = wordExist['word']
        tokens = word_tokenize(word)

        for token, tag in pos_tag(tokens):
            lemma = wn_lemmater.lemmatize(token, tag_map[tag[0]])
            print("lemma asli : {}".format(lemma))
            if lemma not in lemmaOfExistWordlist:
                lemmaOfExistWordlist.append(lemma)  # 685 lemma unique ditemukan dari DB
                try:
                    syns = wordnet.synsets(lemma)
                    concept_info = sn.concept(lemma)
                    concept_info_origin = concept_info

                    conceptExist.append(concept_info)
                    #word dimasukkan ke dalam objek
                    senticwordObj = WordList_sentic()
                    senticwordObj = WordList_sentic(None,lemma,concept_info) #OBJEK WORD ASLI

                    if (float(concept_info['polarity_intense']) < 0): #TERBUKTI NEGATIF
                        # print(concept_info['polarity_intense'])
                        # conceptExistNegative.append(concept_info)
                        objsConceptExistNegative.append(senticwordObj)
                    elif (float(concept_info['polarity_intense']) > 0): #TIDAK - di senticnet
                        # SINONIM
                        # cek SINONIM di wordnet jika tidak dinyatakan -
                        print("lemma positif masuk di - : {}".format(lemma))
from senticnet.senticnet import SenticNet

teste = []
sn = SenticNet('pt')
concept_info = sn.concept('amor')
polarity_value = sn.polarity_value('amor')
polarity_intense = sn.polarity_intense('amor')
moodtags = sn.moodtags('amor')
semantics = sn.semantics('amor')
sentics = sn.sentics('amor')

teste.append(concept_info)

print(teste)