def Class_ification(sentence): #进行文本分类 plot.rcParams['font.sans-serif'] = ['SimHei'] plot.rcParams['axes.unicode_minus'] = False nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN') result = nlp.classify(sentence) info = { 0: "体育", 1: "教育", 2: "财经", 3: "社会", 4: "娱乐", 5: "军事", 6: "国内", 7: "科技", 8: "互联网", 9: "房产", 10: "科技", 11: "女人", 12: "汽车", 13: "游戏", } DG = nx.DiGraph() plot.figure(figsize=(3, 3)) plot.subplot(1, 1, 1) plot.title('文本分类', color='red', fontsize=15) DG.add_node(info[result[0]]) nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue') plot.show()
def getAnswerKeys(text_set, api_key): keys = [] nlp = BosonNLP(api_key) result = nlp.extract_keywords(text_set, top_k=30) for weight, word in result: keys.append((word, int(weight * 10))) return keys
def sentiment(contents): sentiments = {} nlp = BosonNLP(boson_token) for model in models: sentiment = nlp.sentiment(contents, model=model) sentiments[model_to_name[model]] = sentiment return sentiments
def maintain(): apitoken = "ZUKLt9tO.24611.KI1wUPXknGRP" nlp = BosonNLP(apitoken) conn = pymysql.connect(host='123.206.68.192', port=3306, user='******', passwd='', db='news', charset='utf8') cur = conn.cursor() cur.execute("DELETE FROM `data` WHERE context = 'error'") conn.commit() cur.execute("SELECT * FROM `data` WHERE abstract = 'error'") data = cur.fetchall() for entry in data: result = nlp.summary('', entry[3], 50).replace('\n', '') if (result == 'error'): print '[Deleted]wrong entry: ' + entry cur.execute("DELETE FROM `data` WHERE ID = %s", (entry[0])) else: cur.execute("UPDATE `data` SET abstract = %s WHERE ID = %s", (result, entry[0])) cur.close() conn.commit() conn.close()
def getAnswerKeys (text_set, api_key): keys = [] nlp = BosonNLP(api_key) result = nlp.extract_keywords(text_set, top_k=30) for weight, word in result: keys.append((word, int(weight*10))) return keys
def classify(contents): nlp = BosonNLP(boson_token) result = nlp.classify(contents) topics = [] for topic in result: topics.append(topic_to_id[topic]) return topics
def execute(news, vectors, statements): nlp = BosonNLP('3KJW0U-I.24870.1PdhvJB30HgY') # print("\n情感分析") for i in range(len(news)): combine = "" for s in news[i].Sentences: combine += s # print (news[i].PressName,"\n",combine,"\n",nlp.sentiment(combine)[0][0]) # print("") for new in news: new.Coordinate = np.zeros(len(statements)) for i in range(len(statements)): if statements[i] in new.Sentences: for j in range(len(vectors)): new.Coordinate[j] += vectors[i][j] ##坐标归一化 nomalization(new.Coordinate) ##添加情感分析维度在新闻坐标里 combine = "" for s in new.Sentences: combine += s sentiment = nlp.sentiment(combine) new.Sentiment = sentiment[0][0] # print (new.PressName,"\n",combine,"\n",sentiment[0][0]) # new.Coordinate.append(sentiment[0][0]) new.Coordinate = np.append(new.Coordinate, sentiment[0][0] * len(vectors) / 2) print("%s %s" % (new.PressName, new.Coordinate)) distance_matrix = generate_distance_matrix(news) print("\n新闻距离矩阵\n", distance_matrix) return distance_matrix
def ConfirmMuseum(text, museum, textid): # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss') # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6') nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv') try: flag = 0 text = text[0:1000] result = nlp.ner(text)[0] words = result['word'] entities = result['entity'] for entitie in entities: if entitie[2] == 'org_name': org_name = ''.join(words[entitie[0]:entitie[1]]) if museum in org_name: flag = 1 break elif entitie[2] == 'location': location = ''.join(words[entitie[0]: entitie[1]]) if museum in location: flag = 1 break if flag: print('Confirm!') return 1 else: print('Not!') return 0 except KeyError as e: print('exit in %s' % textid) print(e)
def getKeyWords(command): nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-") r = nlp.extract_keywords(command, top_k=3) l = [] for k, v in r: v = v.encode('utf8') l.append(v) return l
def dict_mining(): nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f') s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。', '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。'] data = nlp.depparser(s) nouns = extract_noun(data) print(nouns)
def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token)
def bosonnlp_segmentation(str_list): nlp = BosonNLP('NBSC61pl.10304.Fnwc_rUz9fyw') result = nlp.tag(str_list) for tag_map in result: word_tokens = tag_map['word'] for word in word_tokens: print word.encode("utf-8") + "|", print "\n"
def getKeyWords(command): nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-") r = nlp.extract_keywords(command, top_k=3) l = [] for k, v in r: v = v.encode("utf8") l.append(v) return l
def getKeyWords(command): nlp = BosonNLP("ofW2OZMI.4712.UzT0VvLGGkdi") r = nlp.extract_keywords(command, top_k=3) l = [] for k,v in r: v = v.encode('utf8') l.append(v) return l
def extract_keywords(text, top_num=10): """Extract Keywords.""" # 注意:在测试时请更换为您的 API token nlp = BosonNLP('') result = nlp.extract_keywords(text, top_k=top_num) result_dict = {k: v for (v, k) in result} return result_dict
def Entity_extraction(text): nlp = BosonNLP("x-gOGutn.27554.G6_6QvdJafES") rest = nlp.ner(text)[0] print(rest) words = rest['word'] entities = rest['entity'] tags = rest['tag'] for entity in entities: print(" ".join(words[entity[0]:entity[1]]), entity[2])
def test_custom_requests_session(): import requests session = requests.session() nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session) assert nlp.session == session result = nlp.sentiment(['再也不来了', '美好的世界']) assert result[0][1] > result[0][0] assert result[1][0] > result[1][1]
def extract_keywords(text, top_num=10): """Extract Keywords.""" # 注意:在测试时请更换为您的 API token nlp = BosonNLP("") result = nlp.extract_keywords(text, top_k=top_num) result_dict = {k: v for (v, k) in result} return result_dict
class YoNLP: def __init__(self, boson_api_token): self._nlp = BosonNLP(boson_api_token) def sentiment(self, contents): return self._nlp.sentiment(contents) def tag(self, contents): return self._nlp.tag(contents)
class CNSegment: """ 封装分词工具。 使用bosonnlp提供API """ #停用词表 stopwords = [] def __init__(self): self.nlp=BosonNLP(bosonkey) def get_tags(self,sentences): """ 获取分词 :param sentences:分词的句子或者句子list :return: 分词结果list """ result= self.nlp.tag(sentences) return result def denoisingOne(self,tagdict , uTag = None,useStopWord = False): """通过词性和停用词去除噪声 :param tagList : 分词过后得到的列表 :param uTag : 需要去噪的词性标记列表,默认为('w','o','y','u') :return: 分词结果list """ if (uTag): uselessTag = uTag else: uselessTag = ('w', 'o', 'y', 'u') tagdict word_list = [] for index, it in enumerate(tagdict['tag']): if it[0] not in uselessTag: if not useStopWord: word_list.append(tagdict['word'][index]) elif tagdict['word'][index] not in self.stopwords: word_list.append(tagdict['word'][index]) return word_list def cut(self,sentences): """ 分词 :param sentences:需要分词的语料集 :return: 去噪后的单词list """ tags=self.get_tags(sentences) cutedSentences=[] for sentence in tags: cutedSentences.append(self.denoisingOne(sentence)) return cutedSentences def depenPars(self,sentences): return self.nlp.depparser(sentences)
def segment_tag(text): nlp = BosonNLP('2DgGSC-8.33497.8yeNchBP6L9n') result = nlp.tag(text) words = result[0]['word'] tags = result[0]['tag'] assert len(words) == len(tags) return words, tags
def getAnswerNounKeys(text_set, api_key): nlp = BosonNLP(api_key) result = nlp.tag(text_set) words = '' for d in result: for it in zip(d['word'], d['tag']): if it[1] == 'n': words += it[0] # print(' '.join([ '%s/%s' % it])) return getAnswerKeys(words, api_key)
def senti_by_sdk(self): nlp_obj = BosonNLP(self.__TOKEN) senti_results = nlp_obj.sentiment(self.texts, model=self.model) print(senti_results, flush=True) # 查验剩余调用次数 limit_remain = self.senti_limits_remaining() print("BosonNLP 剩余调用次数:{}".format(limit_remain), flush=True) return senti_results
def __init__(self, opt): self.opt = opt self.sep = opt.seprator + " " if opt.cuda: torch.cuda.set_device(opt.gpu) self.bpe = BPE(codecs.open(self.opt.bpe_codes, 'r', encoding="UTF-8"), self.opt.seprator, None, None) self.translator = onmt.Translator(opt) self.nlp = BosonNLP("NGhNiav2.16134.DvyEDmGzYd2S")
def getAbstract(allContext): nlp = BosonNLP(apitoken) ret = [] for i, text in enumerate(allContext): try: print("handling %dth abstract from buaa" % (i + 1)) result = nlp.summary('', text, 50) ret.append(result.replace('\n', '')) except: print("error when handling %dth abstract from buaa" % (i + 1)) ret.append('error') print(traceback.print_exc()) return ret
class QueryParser(object): def __init__(self): self.nlp = BosonNLP(bosonnlp_token) def parse(self, query_string): """ input: 7月22号 北京到上海的高铁票 output: [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文 'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'], 'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}] """ result = self.nlp.ner(query_string)[0] words = result['word'] tags = result['tag'] entities = result['entity'] return (words, entities, tags) def get_entity(self, parsed_words, index_tuple): """ 获取已识别的实体 采用filter 参考 python cookbook部分 input: entities : 二元组 parsed_words : 解析好的词组 """ return parsed_words[index_tuple[0]:index_tuple[1]] def format_entities(self, entities): """ 给元组命名 """ namedentity = collections.namedtuple('namedentity', 'index_begin index_end entity_name') return [namedentity(entity[0], entity[1], entity[2]) for entity in entities] def get_format_time(self, time_entity): """ output {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'} """ basetime = datetime.datetime.today() result = self.nlp.convert_time( time_entity, basetime) # print(result) timestamp = result["timestamp"] return timestamp.split(" ")[0]
def bosonnlpNER(news): from bosonnlp import BosonNLP nlp = BosonNLP('cKWUytiR.34676.f5F2YbS_EyX2') ner = nlp.ner(news)[0] print(ner) words = ner['word'] entity = ner['entity'] N = [] # record the entity start and end. k:v = start : end entity_start = {} for e in entity: if e[2] in {'org_name', 'person_name'}: entity_start[e[0]] = e[1] N.append([''.join(words[e[0]:e[1]]), e[2]]) return N, entity_start, words
def getAbstract(self, allContext): apitoken = "XB2l3mQj.14588.GJCICyNoqghJ" nlp = BosonNLP(apitoken) ret = [] for i, text in enumerate(allContext): try: print("handling %dth abstract from %s" % (i + 1, self._school)) result = nlp.summary('', text, 50) ret.append(result.replace('\n', '')) except: print("error when handling %dth abstract from %s" % (i + 1, self._school)) ret.append('error') print(traceback.print_exc()) return ret
def _boson_seg(self, text): nlp = BosonNLP('g8lQg9Mv.25818.fAbbwt6TYhh8') if type(text) == str: text = [text] corpus_len = len(text) word, tag = [], [] for idx in range(corpus_len // 100 + 1): curr_idx = idx * 100 result = nlp.tag(text[curr_idx:min(curr_idx + 100, corpus_len)]) for seg in result: word.append(seg['word']) tag.append(seg['tag']) return word
def __init__(self): args = Arguments('RSJ') self.rsj = RSJ(args) self.rsj.restore_model() APP_ID = '14465679' API_KEY = 'DDNA68lRaVxKCUHP13t79acC' SECRET_KEY = 'RisCmApExjn5hcSH0KHul71Uldza8vDe' self.feature_maps = {} with open('../data/feature_maps.txt',encoding='utf8') as r: for line in r: features = line.split(' ') self.feature_maps[features[0]] = features self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.boson = BosonNLP('m9YSebz-.27886.-Jh0KNhk__Q2')
class QueryParser(object): def __init__(self): self.nlp = BosonNLP(bosonnlp_token) def parse(self, query_string): """ input: 7月22号 北京到上海的高铁票 output: [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文 'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'], 'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}] """ result = self.nlp.ner(query_string)[0] words = result['word'] tags = result['tag'] entities = result['entity'] return (words,entities,tags) def get_entity(self,parsed_words,index_tuple): """ 获取已识别的实体 采用filter 参考 python cookbook部分 input: entities : 二元组 parsed_words : 解析好的词组 """ return parsed_words[index_tuple[0]:index_tuple[1]] def format_entities(self,entities): """ 给元组命名 """ namedentity = collections.namedtuple('namedentity','index_begin index_end entity_name') return [namedentity(entity[0],entity[1],entity[2]) for entity in entities] def get_format_time(self,time_entity): """ output {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'} """ basetime = datetime.datetime.today() result = self.nlp.convert_time( time_entity, basetime) #print(result) timestamp = result["timestamp"] return timestamp.split(" ")[0]
def ScentenceSimilar(str1, str2): """得到str1和str2的相似度,使用余弦相似性计算。 采用bosonnlp分词;联网使用。 """ nlp = BosonNLP('wx3Ua05Y.21658.Ch876jBfuqIH') #获取分词结果 tags1 = nlp.tag(str1.lower()) tags2 = nlp.tag(str2.lower()) tfdict1 = getTFdict(Denoising(tags1[0])) tfdict2 = getTFdict(Denoising(tags2[0])) return getSimilar_by_cos(tfdict1, tfdict2)
def words_cut(txt_lines, isJieba=True): #分词,返回列表 text_cut = [] if isJieba: for line in txt_lines: line = line.strip() #去除空白符 seg_line = cut(line) #返回的是生成器,只可遍历一遍 line_str = " ".join(seg_line) + "\n" text_cut.append(line_str) return text_cut nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8') for line in txt_lines: line_list = nlp.tag(line)[0][ 'word'] #分词,返回一个嵌套的列表格式为[{'word':[分好的词], ''}] line_str = " ".join(line_list) + '\n' #将列表连接为字符串 text_cut.append(line_str) return text_cut
def Text_Segmentation_5_1(): input_txt = open('static/files/方滨兴_互动百科.txt', 'r', encoding='utf-8') # 有的文件编码使用GBK形式,在读文件时需要再添加一个参数:encoding='utf-8' # 有的记事本文件编码使用ANSI,读文件添加encoding='utf-8'反而会报错 lines = input_txt.readlines() input_txt.close() for line in lines: nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8') result = nlp.tag(line)[0]['word'] output_txt = open('static/files/方滨兴_互动百科_split_unattributed.txt', mode='a', encoding='utf-8') # output_txt.write('{}\n'.format(result)) # 以列表字符串的形式写入 output_txt.write('{}\n'.format(' '.join(result))) # 以纯文本的形式写入 output_txt.close()
def __init__(self, nername, phrase_list, groupid=0): multiprocessing.Process.__init__(self) self.nername = nername self.phrase_list = phrase_list self.numofphrase = len(phrase_list) # batch ID, and will be used for file name self.group_id = str(groupid) # load NER modules self.boson_ner = BosonNLP("bJ0hvqpK.21947.dpf19nyJfNHp") #self.conn = self.boson_ner.connect() #self.ltp_ner = LTPNer() self.jsonData = {} print "creating subprocess : " + self.nername + ":" + self.group_id + ", number of phrase: " + str( self.numofphrase)
def getAnswerEntities(text_set, api_key, level): def f(x): return { '0': 'location', '1': 'person_name', '2': 'product_name', '3': ('org_name', 'company_name'), '4': ('product_name', 'org_name', 'company_name'), }[str(x)] nlp = BosonNLP(api_key) result = nlp.ner(text_set)[0] words = result['word'] entities = result['entity'] ul = [] for entity in entities: if (entity[2] == f(level) or entity[2] in f(level)): ul.append(''.join(words[entity[0]:entity[1]])) keys = sortList(ul) return keys
def __init__(self): self.nlp = BosonNLP(bosonnlp_token)
def getAttitude(comment): nlp = BosonNLP('ofW2OZMI.4712.UzT0VvLGGkdi') # s = ['他是个傻逼', '美好的世界'] result = nlp.sentiment(comment) print result return result
@author: JieJ """ from bosonnlp import BosonNLP if __name__ == '__main__': # # look up the usage condition # HEADERS = {'X-Token': 'RvfFdvC_.4154.f2IbbrWgZrP8'} # RATE_LIMIT_URL = 'http://api.bosonnlp.com/application/rate_limit_status.json' # import requests # result = requests.get(RATE_LIMIT_URL, headers=HEADERS).json() # for key,val in result['limits'].iteritems(): # print key,'\t',val nlp = BosonNLP('RvfFdvC_.4154.f2IbbrWgZrP8') nlp = BosonNLP('vQdBA8k_.4176.hUiXrb6354i2') #LiYB's token nlp = BosonNLP('6pcRO9QY.4254.H0BK-v3mB5Cv') #WangLY's token # s = ['对于该小孩是不是郑尚金的孩子,目前已做亲子鉴定,结果还没出来,' # '纪检部门仍在调查之中。成都商报记者 姚永忠'] # result = nlp.ner(s) # print result # print ' '.join([x for x in result[0]['word']]) fname = 'D:\\Github\\Sentiment-Analysis\\data\\nlpcc_emotion\\train\\neg_raw' all_texts = [x.strip() for x in open(fname).readlines()] for i in range(7000): print "handing "+str(i+1)+"th 100 documents....." start = i*100 end = start+100
def main(): global last_extrect_tag_time, last_extrect_tag_time token = json.load(open('./config/token.json','r')) if DEBUG: client = EvernoteClient(token=token['en_token'],sandbox=True) else: client = EvernoteClient(token=token['en_token']) client.service_host = 'app.yinxiang.com' print '现在服务器是:',client._get_endpoint() #bosonNlp nlp = BosonNLP(token['boson_nlp_token']) note_store = client.get_note_store() #获取上一次同步状态 if os.path.exists(data_file('sync_state')): last_sync_state = json.load(open(data_file('sync_state'),'r')) last_update_count = last_sync_state['updateCount'] last_extrect_tag_time = last_sync_state['currentTime'] #获取当前同步状态 currnet_sync_state = get_current_sync_state(note_store) if currnet_sync_state.updateCount > last_update_count: new_updated_count = currnet_sync_state.updateCount - last_update_count print currnet_sync_state.__dict__ new_note_filter = NoteStore.NoteFilter() new_note_filter.order = Types.NoteSortOrder.CREATED new_notes = note_store.findNotes(new_note_filter,0,new_updated_count) print 'totalNumber:%d\tNoteListNum:%d' %(new_notes.totalNotes,len(new_notes.notes)) else: print('没有新增更新...') exit(1) # 获取用户的所有tags tags = Tags(note_store=note_store) alltags = tags.tags print '标签云:\n' print '\t'.join(alltags.keys()) #操作新note for note in new_notes.notes: #如果笔记创建时间小于上次同步时间 if note.created <= last_extrect_tag_time: continue print '\n'+'*'*120 content = note_store.getNoteContent(note.guid) print "guid:%s\ntitle:%s\ncreated:%s\n作者:%s" %(note.guid,note.title,note.created,note.attributes.author) print 'author:%s\nsource:%s\nsourceURL:%s\nsourceApplication:%s' %(note.attributes.author,note.attributes.source,note.attributes.sourceURL,note.attributes.sourceApplication) if not note.attributes.sourceURL: continue print "现有标签(tags):%s" %(",".join(note_store.getNoteTagNames(note.guid))) print '-'*120 #print "内容(%d):created:%s,\n%s" %(note.contentLength,note.created,content) #解析note xml 提取出所有的文字 try: parser = ET.XMLParser() parser.entity['nbsp'] = '' parser.entity['ldquo'] = '' parser.entity['rdquo'] = '' parser.entity['hellip'] = '' tree = ET.parse(StringIO(content),parser=parser) except Exception,data: print 'ElementTree parser error' print content print 'errorData:' print data print 'exception:' print Exception exit(1) en_note = tree.findall('.')[0] content_string = ''.join(en_note.itertext()) #写入文件 with codecs.open(note_file(note.guid),'w+',encoding='utf-8') as f: f.write(content_string) #通过BosonNLP 拿到文章命名实体 ner_tag_guid_list = [] ner_tag_name_list = [] ner = Ner(content_string).process(nlp) entites = ner.collect_type_entity(count=1) for entity in entites: tag = tags.add(entity) ner_tag_guid_list.append(tag.guid) ner_tag_name_list.append(tag.name) #通过 BosonNLP 拿到文章的关键字 extract_keywords = nlp.extract_keywords(content_string,top_k=20) keywords = [item[1].upper() for item in extract_keywords] print '通过 BosonNLP extract_keywords 拿到文章的前20个关键字:' for keyword in extract_keywords: print '%s \t %s' %(keyword[1],keyword[0]) print '-'*120 #对比 找出交集tag的guid keywords_tag_guid_list = [] newKeyWords = [] for keyword in keywords: if tags.exist(keyword): existTag = tags.get(keyword) keywords_tag_guid_list.append(existTag.guid) newKeyWords.append(existTag.name) print '\nextract_keywords与自己所有tag的交集:' print '\t'.join(newKeyWords) #追加新笔记的tags new_tag_guid_list = list(set(keywords_tag_guid_list).union(set(ner_tag_guid_list))) print 'extract_keywords+ner的tag:' newKeyWords.extend(ner_tag_name_list) print '\t'.join(newKeyWords) if note.tagGuids: note.tagGuids.extend(new_tag_guid_list) else: note.tagGuids = new_tag_guid_list note_store.updateNote(note)
def bosonNer(text, sensitivity): nlp = BosonNLP('qJWJc-f3.4334.MamzfHZ-9wUL') return nlp.ner(text, sensitivity)
# -*- coding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP nlp = BosonNLP("WXH_K9If.5750.ZIsluNnG8bpA") print(nlp.sentiment("大众深陷断轴门事件"))
def test_invalid_token_raises_HTTPError(): nlp = BosonNLP('invalid token') pytest.raises(HTTPError, lambda: nlp.sentiment('美好的世界'))
def test_invalid_token_raises_HTTPError(): nlp = BosonNLP("invalid token") pytest.raises(HTTPError, lambda: nlp.sentiment("美好的世界"))
# -*- coding: utf-8 -*- import os import sys import datetime import time from bosonnlp import BosonNLP myApiToken = "X0njNWj2.5612.pYnhvqV02Kgn" nlp = BosonNLP(myApiToken) for eachLine in open("simple.txt"): # print eachLine,type(eachLine) # break # print nlp.extract_keywords(eachLine) result = nlp.tag(eachLine) print result # print nlp.sentiment("这家味道还不错") # print nlp.extract_keywords("instructor.txt")
# -*- encoding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP # 注意:在测试时请更换为您的API token nlp = BosonNLP('VaUKhf7X.7870.xbHiGWB_gx49') s = ['中新网周口9月15日电(刘鹏) 15日,针对媒体报道的河南省太康县女子在当地一家KTV遭3名协警暴力殴打一事,太康县警方向记者回复称,3名打人者中两名为协警身份,其中一名协警未参与打架,但目前两名协警均被辞退。而当晚一同前往KTV娱乐的一名正式女民警被关禁闭。 据之前媒体报道,今年9月4日晚11时左右,太康县一家KTV内,一名姜姓女士在送走一位朋友后正返回KTV时,在门口碰到正从里面出来的三名男子。其中一名男子对姜女士动手动脚,另一男子则说姜女士为“小姐”。 受到羞辱的姜女士要求对方赔礼道歉。没想到竟遭到了三名男子拳脚相加。据姜女士反映,事发当晚黑衣男子对她一番推搡致其头部撞到门上;绿衣男子则直接拽着她的头发将其摁倒在地,随后又遭到了拳头打脸、脚踹并拉着衣服将其头往门上撞。姜女士试图报警,结果三名男子将其手机夺走摔到地上。为了阻止围观群众报警,白衣男子直接拿出“警官证”,称自己是刑警队人员,若是报警,不把录像删了,就把KTV店给砸了。 15日上午,太康县公安局发布对此事件的调查处理通报。通报称,9月4日晚,葛某(太康县人,无业)、师某(协警)等人到盛世年华夜总会唱歌,当晚23时结束后,师某、葛某与姜某发生争执吵骂,并引起厮打,致使姜某轻微伤。目前双方已达成调解协议,姜某对师某、葛某达成谅解。 太康县公安局负责处理此事的王姓警官透露,事发当晚,和打人者葛某、师某一同前往KTV娱乐的还有该局一名刚入职不久的女民警李某某及协警司某等人,但他们并未参与打架。后经太康县公安局党委研究决定,对违规进入娱乐场所的民警李某某先行禁闭,待调查结束后再做处理;对违规进入娱乐场所的协警师某、司某予以辞退。' '纪检部门仍在调查之中。成都商报记者 姚永'] result = nlp.ner(s)[0] words = result['word'] entities = result['entity'] for entity in entities: print(''.join(words[entity[0]:entity[1]]), entity[2]) print(s) result = nlp.sentiment(s) print(result)
# -*- coding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP import json file_path = r"C:\workspace\Taikor_NLP_service\Thirdparty_NLP_WebAPI\Bosson\corpos\msr_test.txt" with open(file_path, "r", encoding="utf8") as f: s = f.read() nlp = BosonNLP("2ZmFSLeL.3212.Y6W7eOViuyZZ") pos = nlp.tag(s) dump = json.dumps(pos) with open("pos", "w") as f: f.write(dump)
def emotion_analysis(context): nlp = BosonNLP('GrrNaeVG.6417.dsK_xHt0qE6-') return nlp.sentiment(context, model='weibo')
def bosonNer(text, sensitivity): nlp = BosonNLP('O8M_j1Nd.4200.wIlhsL46w9-C') return nlp.ner(text, sensitivity)