import pprint if __name__ == '__main__': nlp = StanfordCoreNLP('http://localhost:9000') # text = "Non tolerance was gandhijis weapon." # text = ("We went to pitapit,it can be expensive but not hygienic.") # text = ("The dishes at Alkareem are highly recommended.") text = ("The sitting which is mostly outdoor is the prettiest you can come across in CP") # text = ('I loved The Crispy Vegetables but found the Wontons to be devoid of any flavor') # text = ("delicious veg manchurian.") # text = ('London is good at studies but bad at sports.') # text = ("The tiger prawns here,it doesn't get better.") # text = ('Check out the pics to find out who greeted me on my first visit to Bercos CP branch, it can be expensive but not hygienic.') output = nlp.annotate(text, properties={ 'annotators': 'tokenize,ssplit,pos,depparse,parse,ner', 'outputFormat': 'json' }) # pprint.pprint(output) tree = output['sentences'][0]['parse'] print tree x = output['sentences'][0]['collapsed-dependencies'] # pprint.pprint(x) print '-------------------------------------------------' for i in range(len(x)): print x[i]['dep'] + '-->' + x[i]['governorGloss'] + '-' + str(x[i]['governor']) + ' ' + x[i]['dependentGloss'] + '-' + str(x[i]['dependent']) # print(output['sentences'][0]['parse']) # output = nlp.tokensregex(text, pattern='/Pusheen|Smitha/', filter=False) # print(output) # output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False) # print(output) print '-------------------------------------------------'
nlp = StanfordCoreNLP('http://localhost:9000') # text = "Non tolerance was gandhijis weapon." # text = ("We went to pitapit,it can be expensive but not hygienic.") # text = ("The dishes at Alkareem are highly recommended.") text = ( "The sitting which is mostly outdoor is the prettiest you can come across in CP" ) # text = ('I loved The Crispy Vegetables but found the Wontons to be devoid of any flavor') # text = ("delicious veg manchurian.") # text = ('London is good at studies but bad at sports.') # text = ("The tiger prawns here,it doesn't get better.") # text = ('Check out the pics to find out who greeted me on my first visit to Bercos CP branch, it can be expensive but not hygienic.') output = nlp.annotate(text, properties={ 'annotators': 'tokenize,ssplit,pos,depparse,parse,ner', 'outputFormat': 'json' }) # pprint.pprint(output) tree = output['sentences'][0]['parse'] print tree x = output['sentences'][0]['collapsed-dependencies'] # pprint.pprint(x) print '-------------------------------------------------' for i in range(len(x)): print x[i]['dep'] + '-->' + x[i]['governorGloss'] + '-' + str( x[i]['governor']) + ' ' + x[i]['dependentGloss'] + '-' + str( x[i]['dependent']) # print(output['sentences'][0]['parse']) # output = nlp.tokensregex(text, pattern='/Pusheen|Smitha/', filter=False) # print(output)
def typedependencies(sent_list, neg_words, compound_word_list): pos_dict = {} depend_dict = {} depend_list = [] proper_names = [] # neg_words = [] compound_dic = {} nlp = StanfordCoreNLP('http://localhost:9000') for i in range(len(sent_list)): compound_list = [] print sent_list[i] output = nlp.annotate(sent_list[i], properties={ 'annotators': 'tokenize,ssplit,pos,depparse,parse,ner', 'outputFormat': 'json' }) # pprint.pprint(output) x = output['sentences'][0]['basic-dependencies'] # pprint.pprint(output['sentences'][0]['parse']) # pprint.pprint(x) # print '-------------------------------------------------' for j in range(len(x)): if 'compound' in x[j]['dep']: # compound_word(x[j]) ll = [ x[j]['governorGloss'], x[j]['governor'], x[j]['dependentGloss'], x[j]['dependent'] ] compound_dic[x[j]['governor']] = x[j]['governorGloss'] compound_dic[x[j]['dependent']] = x[j]['dependentGloss'] # compound_list.append(ll) d = [ x[j]['dep'], x[j]['governorGloss'], str(x[j]['governor']), x[j]['dependentGloss'], str(x[j]['dependent']) ] depend_list.append(d) # getting the negative words.. if 'neg' in x[j]['dep']: x1 = x[j]['governorGloss'].lower() x2 = x[j]['dependentGloss'].lower() if x1 not in stopwords: neg_words.append([x1, x[j]['governor']]) else: neg_words.append([x2, x[j]['dependent']]) if 'conj' in x[j]['dep']: x1 = x[j]['governorGloss'].lower() x2 = x[j]['dependentGloss'].lower() if x1 in neg_prefix: neg_words.append([x2, x[j]['dependent']]) # elif (x2 == 'not' or x2 == 'nor' or x2 == 'non'): # neg_words.append(x1) elif x2 in neg_prefix: neg_words.append([x1, x[j]['governor']]) print(x[j]['dep'] + '-->' + x[j]['governorGloss'] + '-' + str(x[j]['governor']) + ' ' + x[j]['dependentGloss'] + '-' + str(x[j]['dependent'])) print '===================================' for key, value in sorted(compound_dic.items()): compound_list.append([key, value]) # print compound_word(compound_list) compound_dic.clear() y = output['sentences'][0]['tokens'] for k in range(len(y)): pos_dict[y[k]['word']] = y[k]['pos'] if 'NNP' in y[k]['pos']: proper_names.append(y[k]['word']) depend_dict[i] = depend_list depend_list = [] if len(compound_list) > 0: w = compound_word(compound_list) else: w = [] for jj in range(len(w)): if w[jj] != '': print w[jj] compound_word_list.append(w[jj]) print '--------NAMES------' + str(proper_names) print '--------NEGATIVE----' + str(neg_words) return depend_dict, pos_dict, proper_names
def typedependencies(sent_list,neg_words,compound_word_list): pos_dict = {} depend_dict = {} depend_list = [] proper_names = [] # neg_words = [] compound_dic = {} nlp = StanfordCoreNLP('http://localhost:9000') for i in range(len(sent_list)): compound_list = [] print sent_list[i] output = nlp.annotate(sent_list[i], properties={ 'annotators': 'tokenize,ssplit,pos,depparse,parse,ner', 'outputFormat': 'json' }) # pprint.pprint(output) x = output['sentences'][0]['basic-dependencies'] # pprint.pprint(output['sentences'][0]['parse']) # pprint.pprint(x) # print '-------------------------------------------------' for j in range(len(x)): if 'compound' in x[j]['dep']: # compound_word(x[j]) ll = [x[j]['governorGloss'],x[j]['governor'], x[j]['dependentGloss'],x[j]['dependent']] compound_dic[x[j]['governor']] = x[j]['governorGloss'] compound_dic[x[j]['dependent']] = x[j]['dependentGloss'] # compound_list.append(ll) d = [x[j]['dep'],x[j]['governorGloss'],str(x[j]['governor']) ,x[j]['dependentGloss'],str(x[j]['dependent'])] depend_list.append(d) # getting the negative words.. if 'neg' in x[j]['dep']: x1 = x[j]['governorGloss'].lower() x2 = x[j]['dependentGloss'].lower() if x1 not in stopwords: neg_words.append([x1,x[j]['governor']]) else: neg_words.append([x2,x[j]['dependent']]) if 'conj' in x[j]['dep']: x1 = x[j]['governorGloss'].lower() x2 = x[j]['dependentGloss'].lower() if x1 in neg_prefix: neg_words.append([x2,x[j]['dependent']]) # elif (x2 == 'not' or x2 == 'nor' or x2 == 'non'): # neg_words.append(x1) elif x2 in neg_prefix: neg_words.append([x1,x[j]['governor']]) print (x[j]['dep'] + '-->' + x[j]['governorGloss'] + '-' + str(x[j]['governor']) + ' ' + x[j]['dependentGloss'] + '-' + str(x[j]['dependent'])) print '===================================' for key,value in sorted(compound_dic.items()): compound_list.append([key,value]) # print compound_word(compound_list) compound_dic.clear() y = output['sentences'][0]['tokens'] for k in range(len(y)): pos_dict[y[k]['word']] = y[k]['pos'] if 'NNP' in y[k]['pos']: proper_names.append(y[k]['word']) depend_dict[i] = depend_list depend_list = [] if len(compound_list) > 0: w = compound_word(compound_list) else: w = [] for jj in range(len(w)): if w[jj] != '': print w[jj] compound_word_list.append(w[jj]) print '--------NAMES------' + str(proper_names) print '--------NEGATIVE----' + str(neg_words) return depend_dict,pos_dict,proper_names
time.sleep(10) # Other human languages support, e.g. Chinese nlp = StanfordCoreNLP(local_corenlp_path, lang='zh', quiet=False) sentence = '清华大学位于北京。' print(nlp.word_tokenize(sentence)) print(nlp.pos_tag(sentence)) print(nlp.ner(sentence)) print(nlp.parse(sentence)) print(nlp.dependency_parse(sentence)) del nlp time.sleep(10) # General Stanford CoreNLP API nlp = StanfordCoreNLP(local_corenlp_path, memory='8g', lang='zh') print(nlp.annotate(sentence)) del nlp time.sleep(10) nlp = StanfordCoreNLP(local_corenlp_path) text = 'Guangdong University of Foreign Studies is located in Guangzhou. ' \ 'GDUFS is active in a full range of international cooperation and exchanges in education. ' print( nlp.annotate(text, properties={ 'annotators': 'tokenize,ssplit,pos', 'pinelineLanguage': 'en', 'outputFormat': 'xml' }))