import pprint

if __name__ == '__main__':
    nlp = StanfordCoreNLP('http://localhost:9000')
    # text = "Non tolerance was gandhijis weapon."
    # text = ("We went to pitapit,it can be expensive but not hygienic.")
    # text = ("The dishes at Alkareem are highly recommended.")
    text = ("The sitting which is mostly outdoor is the prettiest you can come across in CP")
    # text = ('I loved The Crispy Vegetables but found the Wontons to be devoid of any flavor')
    # text = ("delicious veg manchurian.")
    # text = ('London is good at studies but bad at sports.')
    # text = ("The tiger prawns here,it doesn't get better.")
    # text = ('Check out the pics to find out who greeted me on my first visit to Bercos CP branch, it can be expensive but not hygienic.')
    
    output = nlp.annotate(text, properties={
        'annotators': 'tokenize,ssplit,pos,depparse,parse,ner',
        'outputFormat': 'json'
    })
    # pprint.pprint(output)
    tree = output['sentences'][0]['parse']
    print tree
    x = output['sentences'][0]['collapsed-dependencies']
    # pprint.pprint(x)
    print '-------------------------------------------------'
    for i in range(len(x)):
        print x[i]['dep'] + '-->' + x[i]['governorGloss'] + '-' + str(x[i]['governor']) + ' ' + x[i]['dependentGloss'] + '-' + str(x[i]['dependent'])
    # print(output['sentences'][0]['parse'])
    # output = nlp.tokensregex(text, pattern='/Pusheen|Smitha/', filter=False)
    # print(output)
    # output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False)
    # print(output)
    print '-------------------------------------------------'
    nlp = StanfordCoreNLP('http://localhost:9000')
    # text = "Non tolerance was gandhijis weapon."
    # text = ("We went to pitapit,it can be expensive but not hygienic.")
    # text = ("The dishes at Alkareem are highly recommended.")
    text = (
        "The sitting which is mostly outdoor is the prettiest you can come across in CP"
    )
    # text = ('I loved The Crispy Vegetables but found the Wontons to be devoid of any flavor')
    # text = ("delicious veg manchurian.")
    # text = ('London is good at studies but bad at sports.')
    # text = ("The tiger prawns here,it doesn't get better.")
    # text = ('Check out the pics to find out who greeted me on my first visit to Bercos CP branch, it can be expensive but not hygienic.')

    output = nlp.annotate(text,
                          properties={
                              'annotators':
                              'tokenize,ssplit,pos,depparse,parse,ner',
                              'outputFormat': 'json'
                          })
    # pprint.pprint(output)
    tree = output['sentences'][0]['parse']
    print tree
    x = output['sentences'][0]['collapsed-dependencies']
    # pprint.pprint(x)
    print '-------------------------------------------------'
    for i in range(len(x)):
        print x[i]['dep'] + '-->' + x[i]['governorGloss'] + '-' + str(
            x[i]['governor']) + ' ' + x[i]['dependentGloss'] + '-' + str(
                x[i]['dependent'])
    # print(output['sentences'][0]['parse'])
    # output = nlp.tokensregex(text, pattern='/Pusheen|Smitha/', filter=False)
    # print(output)
def typedependencies(sent_list, neg_words, compound_word_list):

    pos_dict = {}
    depend_dict = {}
    depend_list = []
    proper_names = []
    # neg_words = []
    compound_dic = {}

    nlp = StanfordCoreNLP('http://localhost:9000')
    for i in range(len(sent_list)):
        compound_list = []
        print sent_list[i]
        output = nlp.annotate(sent_list[i],
                              properties={
                                  'annotators':
                                  'tokenize,ssplit,pos,depparse,parse,ner',
                                  'outputFormat': 'json'
                              })
        # pprint.pprint(output)
        x = output['sentences'][0]['basic-dependencies']
        # pprint.pprint(output['sentences'][0]['parse'])
        # pprint.pprint(x)
        # print '-------------------------------------------------'
        for j in range(len(x)):

            if 'compound' in x[j]['dep']:
                # compound_word(x[j])
                ll = [
                    x[j]['governorGloss'], x[j]['governor'],
                    x[j]['dependentGloss'], x[j]['dependent']
                ]
                compound_dic[x[j]['governor']] = x[j]['governorGloss']
                compound_dic[x[j]['dependent']] = x[j]['dependentGloss']
                # compound_list.append(ll)

            d = [
                x[j]['dep'], x[j]['governorGloss'],
                str(x[j]['governor']), x[j]['dependentGloss'],
                str(x[j]['dependent'])
            ]
            depend_list.append(d)

            # getting the negative words..
            if 'neg' in x[j]['dep']:
                x1 = x[j]['governorGloss'].lower()
                x2 = x[j]['dependentGloss'].lower()
                if x1 not in stopwords:
                    neg_words.append([x1, x[j]['governor']])
                else:
                    neg_words.append([x2, x[j]['dependent']])

            if 'conj' in x[j]['dep']:
                x1 = x[j]['governorGloss'].lower()
                x2 = x[j]['dependentGloss'].lower()
                if x1 in neg_prefix:
                    neg_words.append([x2, x[j]['dependent']])
                # elif (x2 == 'not' or x2 == 'nor' or x2 == 'non'):
                #   neg_words.append(x1)
                elif x2 in neg_prefix:
                    neg_words.append([x1, x[j]['governor']])

            print(x[j]['dep'] + '-->' + x[j]['governorGloss'] + '-' +
                  str(x[j]['governor']) + ' ' + x[j]['dependentGloss'] + '-' +
                  str(x[j]['dependent']))
        print '==================================='

        for key, value in sorted(compound_dic.items()):
            compound_list.append([key, value])
        # print compound_word(compound_list)
        compound_dic.clear()

        y = output['sentences'][0]['tokens']
        for k in range(len(y)):
            pos_dict[y[k]['word']] = y[k]['pos']
            if 'NNP' in y[k]['pos']:
                proper_names.append(y[k]['word'])

        depend_dict[i] = depend_list
        depend_list = []

        if len(compound_list) > 0:
            w = compound_word(compound_list)
        else:
            w = []
        for jj in range(len(w)):
            if w[jj] != '':
                print w[jj]
                compound_word_list.append(w[jj])

    print '--------NAMES------' + str(proper_names)
    print '--------NEGATIVE----' + str(neg_words)
    return depend_dict, pos_dict, proper_names
def typedependencies(sent_list,neg_words,compound_word_list):

    pos_dict = {}
    depend_dict = {}
    depend_list = []
    proper_names = []
    # neg_words = []
    compound_dic = {}
    
    nlp = StanfordCoreNLP('http://localhost:9000')
    for i in range(len(sent_list)):
        compound_list = []
        print sent_list[i]
        output = nlp.annotate(sent_list[i], properties={
                    'annotators': 'tokenize,ssplit,pos,depparse,parse,ner',
                    'outputFormat': 'json'
                    })
        # pprint.pprint(output)
        x = output['sentences'][0]['basic-dependencies']
        # pprint.pprint(output['sentences'][0]['parse'])
        # pprint.pprint(x)
        # print '-------------------------------------------------'
        for j in range(len(x)):
         
            if 'compound' in x[j]['dep']:
                # compound_word(x[j])
                ll = [x[j]['governorGloss'],x[j]['governor'],
                        x[j]['dependentGloss'],x[j]['dependent']]
                compound_dic[x[j]['governor']] = x[j]['governorGloss']
                compound_dic[x[j]['dependent']] = x[j]['dependentGloss']
                # compound_list.append(ll)

            d = [x[j]['dep'],x[j]['governorGloss'],str(x[j]['governor'])
                ,x[j]['dependentGloss'],str(x[j]['dependent'])]
            depend_list.append(d)


            # getting the negative words..
            if 'neg' in x[j]['dep']:
                x1 = x[j]['governorGloss'].lower()
                x2 = x[j]['dependentGloss'].lower()
                if x1 not in stopwords:
                    neg_words.append([x1,x[j]['governor']])
                else:
                    neg_words.append([x2,x[j]['dependent']])

            if 'conj' in x[j]['dep']:
                x1 = x[j]['governorGloss'].lower()
                x2 = x[j]['dependentGloss'].lower()
                if x1 in neg_prefix:
                    neg_words.append([x2,x[j]['dependent']])
                # elif (x2 == 'not' or x2 == 'nor' or x2 == 'non'):
                #   neg_words.append(x1)
                elif x2 in neg_prefix:
                    neg_words.append([x1,x[j]['governor']])

            print (x[j]['dep'] + '-->' + x[j]['governorGloss'] + '-' 
                + str(x[j]['governor']) + ' ' + x[j]['dependentGloss'] +
                 '-' + str(x[j]['dependent']))
        print '==================================='
        

        for key,value in sorted(compound_dic.items()):
            compound_list.append([key,value])
        # print compound_word(compound_list)  
        compound_dic.clear()
        

        y = output['sentences'][0]['tokens']
        for k in range(len(y)):
            pos_dict[y[k]['word']] = y[k]['pos']
            if 'NNP' in y[k]['pos']:
                proper_names.append(y[k]['word'])

        depend_dict[i] = depend_list
        depend_list = []

        if len(compound_list) > 0:
            w = compound_word(compound_list)
        else:
            w = []
        for jj in range(len(w)):
            if w[jj] != '':
                print w[jj]
                compound_word_list.append(w[jj])

    print '--------NAMES------' + str(proper_names)
    print '--------NEGATIVE----' + str(neg_words)
    return depend_dict,pos_dict,proper_names
示例#5
0
time.sleep(10)
# Other human languages support, e.g. Chinese
nlp = StanfordCoreNLP(local_corenlp_path, lang='zh', quiet=False)

sentence = '清华大学位于北京。'
print(nlp.word_tokenize(sentence))
print(nlp.pos_tag(sentence))
print(nlp.ner(sentence))
print(nlp.parse(sentence))
print(nlp.dependency_parse(sentence))

del nlp
time.sleep(10)
# General Stanford CoreNLP API
nlp = StanfordCoreNLP(local_corenlp_path, memory='8g', lang='zh')
print(nlp.annotate(sentence))

del nlp
time.sleep(10)
nlp = StanfordCoreNLP(local_corenlp_path)

text = 'Guangdong University of Foreign Studies is located in Guangzhou. ' \
       'GDUFS is active in a full range of international cooperation and exchanges in education. '
print(
    nlp.annotate(text,
                 properties={
                     'annotators': 'tokenize,ssplit,pos',
                     'pinelineLanguage': 'en',
                     'outputFormat': 'xml'
                 }))