def sentiment(contents): sentiments = {} nlp = BosonNLP(boson_token) for model in models: sentiment = nlp.sentiment(contents, model=model) sentiments[model_to_name[model]] = sentiment return sentiments
def maintain(): apitoken = "ZUKLt9tO.24611.KI1wUPXknGRP" nlp = BosonNLP(apitoken) conn = pymysql.connect(host='123.206.68.192', port=3306, user='******', passwd='', db='news', charset='utf8') cur = conn.cursor() cur.execute("DELETE FROM `data` WHERE context = 'error'") conn.commit() cur.execute("SELECT * FROM `data` WHERE abstract = 'error'") data = cur.fetchall() for entry in data: result = nlp.summary('', entry[3], 50).replace('\n', '') if (result == 'error'): print '[Deleted]wrong entry: ' + entry cur.execute("DELETE FROM `data` WHERE ID = %s", (entry[0])) else: cur.execute("UPDATE `data` SET abstract = %s WHERE ID = %s", (result, entry[0])) cur.close() conn.commit() conn.close()
def classify(contents): nlp = BosonNLP(boson_token) result = nlp.classify(contents) topics = [] for topic in result: topics.append(topic_to_id[topic]) return topics
def getAnswerKeys(text_set, api_key): keys = [] nlp = BosonNLP(api_key) result = nlp.extract_keywords(text_set, top_k=30) for weight, word in result: keys.append((word, int(weight * 10))) return keys
def ConfirmMuseum(text, museum, textid): # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss') # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6') nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv') try: flag = 0 text = text[0:1000] result = nlp.ner(text)[0] words = result['word'] entities = result['entity'] for entitie in entities: if entitie[2] == 'org_name': org_name = ''.join(words[entitie[0]:entitie[1]]) if museum in org_name: flag = 1 break elif entitie[2] == 'location': location = ''.join(words[entitie[0]: entitie[1]]) if museum in location: flag = 1 break if flag: print('Confirm!') return 1 else: print('Not!') return 0 except KeyError as e: print('exit in %s' % textid) print(e)
def Class_ification(sentence): #进行文本分类 plot.rcParams['font.sans-serif'] = ['SimHei'] plot.rcParams['axes.unicode_minus'] = False nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN') result = nlp.classify(sentence) info = { 0: "体育", 1: "教育", 2: "财经", 3: "社会", 4: "娱乐", 5: "军事", 6: "国内", 7: "科技", 8: "互联网", 9: "房产", 10: "科技", 11: "女人", 12: "汽车", 13: "游戏", } DG = nx.DiGraph() plot.figure(figsize=(3, 3)) plot.subplot(1, 1, 1) plot.title('文本分类', color='red', fontsize=15) DG.add_node(info[result[0]]) nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue') plot.show()
def execute(news, vectors, statements): nlp = BosonNLP('3KJW0U-I.24870.1PdhvJB30HgY') # print("\n情感分析") for i in range(len(news)): combine = "" for s in news[i].Sentences: combine += s # print (news[i].PressName,"\n",combine,"\n",nlp.sentiment(combine)[0][0]) # print("") for new in news: new.Coordinate = np.zeros(len(statements)) for i in range(len(statements)): if statements[i] in new.Sentences: for j in range(len(vectors)): new.Coordinate[j] += vectors[i][j] ##坐标归一化 nomalization(new.Coordinate) ##添加情感分析维度在新闻坐标里 combine = "" for s in new.Sentences: combine += s sentiment = nlp.sentiment(combine) new.Sentiment = sentiment[0][0] # print (new.PressName,"\n",combine,"\n",sentiment[0][0]) # new.Coordinate.append(sentiment[0][0]) new.Coordinate = np.append(new.Coordinate, sentiment[0][0] * len(vectors) / 2) print("%s %s" % (new.PressName, new.Coordinate)) distance_matrix = generate_distance_matrix(news) print("\n新闻距离矩阵\n", distance_matrix) return distance_matrix
def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token)
def dict_mining(): nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f') s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。', '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。'] data = nlp.depparser(s) nouns = extract_noun(data) print(nouns)
def bosonnlp_segmentation(str_list): nlp = BosonNLP('NBSC61pl.10304.Fnwc_rUz9fyw') result = nlp.tag(str_list) for tag_map in result: word_tokens = tag_map['word'] for word in word_tokens: print word.encode("utf-8") + "|", print "\n"
def getKeyWords(command): nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-") r = nlp.extract_keywords(command, top_k=3) l = [] for k, v in r: v = v.encode('utf8') l.append(v) return l
def Entity_extraction(text): nlp = BosonNLP("x-gOGutn.27554.G6_6QvdJafES") rest = nlp.ner(text)[0] print(rest) words = rest['word'] entities = rest['entity'] tags = rest['tag'] for entity in entities: print(" ".join(words[entity[0]:entity[1]]), entity[2])
def extract_keywords(text, top_num=10): """Extract Keywords.""" # 注意:在测试时请更换为您的 API token nlp = BosonNLP('') result = nlp.extract_keywords(text, top_k=top_num) result_dict = {k: v for (v, k) in result} return result_dict
def test_custom_requests_session(): import requests session = requests.session() nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session) assert nlp.session == session result = nlp.sentiment(['再也不来了', '美好的世界']) assert result[0][1] > result[0][0] assert result[1][0] > result[1][1]
def senti_by_sdk(self): nlp_obj = BosonNLP(self.__TOKEN) senti_results = nlp_obj.sentiment(self.texts, model=self.model) print(senti_results, flush=True) # 查验剩余调用次数 limit_remain = self.senti_limits_remaining() print("BosonNLP 剩余调用次数:{}".format(limit_remain), flush=True) return senti_results
def getAnswerNounKeys(text_set, api_key): nlp = BosonNLP(api_key) result = nlp.tag(text_set) words = '' for d in result: for it in zip(d['word'], d['tag']): if it[1] == 'n': words += it[0] # print(' '.join([ '%s/%s' % it])) return getAnswerKeys(words, api_key)
def segment_tag(text): nlp = BosonNLP('2DgGSC-8.33497.8yeNchBP6L9n') result = nlp.tag(text) words = result[0]['word'] tags = result[0]['tag'] assert len(words) == len(tags) return words, tags
def __init__(self, opt): self.opt = opt self.sep = opt.seprator + " " if opt.cuda: torch.cuda.set_device(opt.gpu) self.bpe = BPE(codecs.open(self.opt.bpe_codes, 'r', encoding="UTF-8"), self.opt.seprator, None, None) self.translator = onmt.Translator(opt) self.nlp = BosonNLP("NGhNiav2.16134.DvyEDmGzYd2S")
def getAbstract(allContext): nlp = BosonNLP(apitoken) ret = [] for i, text in enumerate(allContext): try: print("handling %dth abstract from buaa" % (i + 1)) result = nlp.summary('', text, 50) ret.append(result.replace('\n', '')) except: print("error when handling %dth abstract from buaa" % (i + 1)) ret.append('error') print(traceback.print_exc()) return ret
def getAbstract(self, allContext): apitoken = "XB2l3mQj.14588.GJCICyNoqghJ" nlp = BosonNLP(apitoken) ret = [] for i, text in enumerate(allContext): try: print("handling %dth abstract from %s" % (i + 1, self._school)) result = nlp.summary('', text, 50) ret.append(result.replace('\n', '')) except: print("error when handling %dth abstract from %s" % (i + 1, self._school)) ret.append('error') print(traceback.print_exc()) return ret
def __init__(self): args = Arguments('RSJ') self.rsj = RSJ(args) self.rsj.restore_model() APP_ID = '14465679' API_KEY = 'DDNA68lRaVxKCUHP13t79acC' SECRET_KEY = 'RisCmApExjn5hcSH0KHul71Uldza8vDe' self.feature_maps = {} with open('../data/feature_maps.txt',encoding='utf8') as r: for line in r: features = line.split(' ') self.feature_maps[features[0]] = features self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.boson = BosonNLP('m9YSebz-.27886.-Jh0KNhk__Q2')
def bosonnlpNER(news): from bosonnlp import BosonNLP nlp = BosonNLP('cKWUytiR.34676.f5F2YbS_EyX2') ner = nlp.ner(news)[0] print(ner) words = ner['word'] entity = ner['entity'] N = [] # record the entity start and end. k:v = start : end entity_start = {} for e in entity: if e[2] in {'org_name', 'person_name'}: entity_start[e[0]] = e[1] N.append([''.join(words[e[0]:e[1]]), e[2]]) return N, entity_start, words
def _boson_seg(self, text): nlp = BosonNLP('g8lQg9Mv.25818.fAbbwt6TYhh8') if type(text) == str: text = [text] corpus_len = len(text) word, tag = [], [] for idx in range(corpus_len // 100 + 1): curr_idx = idx * 100 result = nlp.tag(text[curr_idx:min(curr_idx + 100, corpus_len)]) for seg in result: word.append(seg['word']) tag.append(seg['tag']) return word
def ScentenceSimilar(str1, str2): """得到str1和str2的相似度,使用余弦相似性计算。 采用bosonnlp分词;联网使用。 """ nlp = BosonNLP('wx3Ua05Y.21658.Ch876jBfuqIH') #获取分词结果 tags1 = nlp.tag(str1.lower()) tags2 = nlp.tag(str2.lower()) tfdict1 = getTFdict(Denoising(tags1[0])) tfdict2 = getTFdict(Denoising(tags2[0])) return getSimilar_by_cos(tfdict1, tfdict2)
def Text_Segmentation_5_1(): input_txt = open('static/files/方滨兴_互动百科.txt', 'r', encoding='utf-8') # 有的文件编码使用GBK形式,在读文件时需要再添加一个参数:encoding='utf-8' # 有的记事本文件编码使用ANSI,读文件添加encoding='utf-8'反而会报错 lines = input_txt.readlines() input_txt.close() for line in lines: nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8') result = nlp.tag(line)[0]['word'] output_txt = open('static/files/方滨兴_互动百科_split_unattributed.txt', mode='a', encoding='utf-8') # output_txt.write('{}\n'.format(result)) # 以列表字符串的形式写入 output_txt.write('{}\n'.format(' '.join(result))) # 以纯文本的形式写入 output_txt.close()
def words_cut(txt_lines, isJieba=True): #分词,返回列表 text_cut = [] if isJieba: for line in txt_lines: line = line.strip() #去除空白符 seg_line = cut(line) #返回的是生成器,只可遍历一遍 line_str = " ".join(seg_line) + "\n" text_cut.append(line_str) return text_cut nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8') for line in txt_lines: line_list = nlp.tag(line)[0][ 'word'] #分词,返回一个嵌套的列表格式为[{'word':[分好的词], ''}] line_str = " ".join(line_list) + '\n' #将列表连接为字符串 text_cut.append(line_str) return text_cut
def __init__(self, nername, phrase_list, groupid=0): multiprocessing.Process.__init__(self) self.nername = nername self.phrase_list = phrase_list self.numofphrase = len(phrase_list) # batch ID, and will be used for file name self.group_id = str(groupid) # load NER modules self.boson_ner = BosonNLP("bJ0hvqpK.21947.dpf19nyJfNHp") #self.conn = self.boson_ner.connect() #self.ltp_ner = LTPNer() self.jsonData = {} print "creating subprocess : " + self.nername + ":" + self.group_id + ", number of phrase: " + str( self.numofphrase)
def judges(config, text): """ 通过三个NLP平台分析文本并投票决定情绪,分别调用百度AI,BosonNLP和腾讯文智的API :param config: dict {'baidu': [APP_ID, API_KEY, SECRET_KEY], 'boson': [API_TOKEN], 'tencent': [SECRET_ID, SECRET_KEY]} :param text: string :return: string """ default_timeout = 10 w = Wenzhi(config['tencent'][0], config['tencent'][1], 'gz', 'POST') cli = AipNlp(config['baidu'][0], config['baidu'][1], config['baidu'][2]) bo = BosonNLP(config['boson'][0]) pool = ThreadPoolExecutor(max_workers=3) candidate1 = pool.submit(w.text_sentiment, {'content': text}) candidate2 = pool.submit(cli.sentimentClassify, text) candidate3 = pool.submit(bo.sentiment, text) result = [ candidate1.result(default_timeout), candidate2.result(default_timeout), candidate3.result(default_timeout) ] post_votes = [ result[0]['positive'], result[1]['items'][0]['positive_prob'], result[2][0][0] ] nega_votes = [ result[0]['negative'], result[1]['items'][0]['negative_prob'], result[2][0][1] ] # return [post_votes, nega_votes] pv = [i for i in post_votes if i > 0.5] nv = [x for x in nega_votes if x > 0.5] result = None if len(pv) / len(post_votes) > 1 / 3: result = 'P' else: if len(nv) / len(nega_votes) > 1 / 3: result = 'N' if result is None: return 'N' else: return result
def getConnection(): global tokenLength, tokenNumber, connectionTag count = 0 token = getToken() headers = {'X-Token': token} RATE_LIMIT_URL = 'http://api.bosonnlp.com/application/rate_limit_status.json' result = requests.get(RATE_LIMIT_URL, headers=headers).json() tokenLength = getTokenArrayLen() if result['status'] == 200: count = result['limits']['ner']['count-limit-remaining'] if not count: if tokenLength > tokenNumber: tokenNumber += 1 return getConnection() else: init() connectionTag = 1 return None return BosonNLP(token)
def AnalyzePositive(cfg): # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss') # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6') nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv') # 打开数据库连接 db = pymysql.connect(host="%s" % cfg.get('db', 'host'), user='******' % cfg.get('db', 'user'), passwd='%s' % cfg.get('db', 'passwd'), db='%s' % cfg.get('db', 'db'), port=int(cfg.get('db', 'port')), charset='%s' % cfg.get('db', 'charset')) # 使用cursor()方法获取操作游标 cur = db.cursor() # 1.查询操作 # 编写sql 查询语句 sql1 = "select * from news" flag = 0 content = [] try: cur.execute(sql1) # 执行sql语句 results = cur.fetchall() # 获取查询的所有记录 # 遍历结果 for row in results: flag = 0 content.clear() content.append(row[0]) positive = nlp.sentiment(content) print(positive) if positive[0][0] > positive[0][1]: flag = 1 sql2 = "UPDATE `news` SET `positive` = %s WHERE `news`.`id` = %s" % ( flag, row[7]) cur.execute(sql2) db.commit() print(sql2) except Exception as e: raise e finally: db.commit() db.close() # 关闭连接