def sentiment(contents): sentiments = {} nlp = BosonNLP(boson_token) for model in models: sentiment = nlp.sentiment(contents, model=model) sentiments[model_to_name[model]] = sentiment return sentiments
def execute(news, vectors, statements): nlp = BosonNLP('3KJW0U-I.24870.1PdhvJB30HgY') # print("\n情感分析") for i in range(len(news)): combine = "" for s in news[i].Sentences: combine += s # print (news[i].PressName,"\n",combine,"\n",nlp.sentiment(combine)[0][0]) # print("") for new in news: new.Coordinate = np.zeros(len(statements)) for i in range(len(statements)): if statements[i] in new.Sentences: for j in range(len(vectors)): new.Coordinate[j] += vectors[i][j] ##坐标归一化 nomalization(new.Coordinate) ##添加情感分析维度在新闻坐标里 combine = "" for s in new.Sentences: combine += s sentiment = nlp.sentiment(combine) new.Sentiment = sentiment[0][0] # print (new.PressName,"\n",combine,"\n",sentiment[0][0]) # new.Coordinate.append(sentiment[0][0]) new.Coordinate = np.append(new.Coordinate, sentiment[0][0] * len(vectors) / 2) print("%s %s" % (new.PressName, new.Coordinate)) distance_matrix = generate_distance_matrix(news) print("\n新闻距离矩阵\n", distance_matrix) return distance_matrix
class YoNLP: def __init__(self, boson_api_token): self._nlp = BosonNLP(boson_api_token) def sentiment(self, contents): return self._nlp.sentiment(contents) def tag(self, contents): return self._nlp.tag(contents)
def test_custom_requests_session(): import requests session = requests.session() nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session) assert nlp.session == session result = nlp.sentiment(['再也不来了', '美好的世界']) assert result[0][1] > result[0][0] assert result[1][0] > result[1][1]
def senti_by_sdk(self): nlp_obj = BosonNLP(self.__TOKEN) senti_results = nlp_obj.sentiment(self.texts, model=self.model) print(senti_results, flush=True) # 查验剩余调用次数 limit_remain = self.senti_limits_remaining() print("BosonNLP 剩余调用次数:{}".format(limit_remain), flush=True) return senti_results
class Boson_NLP: def __init__(self, API_TOKEN='ADdt3aLr.27579.d0xt4IOfHR96'): self.nlp = BosonNLP(API_TOKEN) def polarity_of_text(self, text): """ 返回值: - 正面指数,0到1之间 - 置信度 """ return self.nlp.sentiment(text, model='news')[0][0], 1 def polarity_of_list(self, lst): """ 返回值:正面指数,0到1之间 """ len_ = len(lst) results = self.nlp.sentiment(lst, model='news') positive_rate = 0 for i in range(len_): positive_rate += results[i][0] return positive_rate / len_
def AnalyzePositive(cfg): # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss') # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6') nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv') # 打开数据库连接 db = pymysql.connect(host="%s" % cfg.get('db', 'host'), user='******' % cfg.get('db', 'user'), passwd='%s' % cfg.get('db', 'passwd'), db='%s' % cfg.get('db', 'db'), port=int(cfg.get('db', 'port')), charset='%s' % cfg.get('db', 'charset')) # 使用cursor()方法获取操作游标 cur = db.cursor() # 1.查询操作 # 编写sql 查询语句 sql1 = "select * from news" flag = 0 content = [] try: cur.execute(sql1) # 执行sql语句 results = cur.fetchall() # 获取查询的所有记录 # 遍历结果 for row in results: flag = 0 content.clear() content.append(row[0]) positive = nlp.sentiment(content) print(positive) if positive[0][0] > positive[0][1]: flag = 1 sql2 = "UPDATE `news` SET `positive` = %s WHERE `news`.`id` = %s" % ( flag, row[7]) cur.execute(sql2) db.commit() print(sql2) except Exception as e: raise e finally: db.commit() db.close() # 关闭连接
def remove_tag(content): f = content.split("<") if f[-1].replace(";", "").replace(" ", "") == "": return "", 0 if f[0].find(">") == -1: all = f[0] all = "" for x in f: try: content = x.split(">")[1] all = all + content except: pass from bosonnlp import BosonNLP # 注意:在测试时请更换为您的API Token nlp = BosonNLP('API Token') result = nlp.sentiment(all) print(result) return all + "------积极度:" + str(result[0][0]), result[0][0]
class _BosonNLPWrapper(object): """ NLP object using the BosonNLP API Python SDK. """ news_categories = [ 'physical education', 'education', 'finance', 'society', 'entertainment', 'military', 'domestic', 'science and technology', 'the internet', 'real estate', 'international', 'women', 'car', 'game' ] def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token) def get_sentiment(self, text): pos, neg = self.nlp.sentiment(text)[0] return {'positive': pos, 'negative': neg} def classify_news(self, text): numbering = range(len(_BosonNLPWrapper.news_categories)) cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories)) clsfy_num = self.nlp.classify(text)[0] return cats_dict[clsfy_num] def extract_keywords(self, text, top_k=3): result = self.nlp.extract_keywords( text, top_k) # outputs in sorted order of weight return [{result[i][1]: result[i][0]} for i in range(len(result))] def segment_words_and_tag(self, text): """ Splits up text into segments of "words" and tags them with their respective part of speech. See: http://docs.bosonnlp.com/tag.html Parameters ---------- text (string): text passage to segment into separate "words" and tags them with parts of speech Returns ------- list of key-value pairs {word: part-of-speech-tag} """ result = self.nlp.tag(text)[0] words = result['word'] tags = result['tag'] return [{words[i]: tags[i]} for i in range(len(words))] def get_summary(self, content, title='', pct_limit=0.2): """ Extracts a new digest (summary) of the content. See: http://docs.bosonnlp.com/summary.html Parameters ---------- text (string): text passage to summarize title (string): title of the passage (optional, may provide more accurate results) pct_limit (float): max length of the summary in terms of percentage of the original word count Returns ------- string containing the summary of the passage """ summary = self.nlp.summary(title, content, pct_limit) return summary
# -*- encoding: utf-8 -*- """ function:利用bosonnlp的api实现文本的关键词提取和情感分析 token:******************************* author:[email protected] """ # from __future__ import print_function, unicode_literals import json import requests from bosonnlp import BosonNLP nlp = BosonNLP('********************') # 密令 KEYWORDS_URL = 'http://api.bosonnlp.com/keywords/analysis' # api f = open("NBA.txt", "r") # 读取文本 string = f.read().decode("utf-8") params = {'top_k': 15} # 设置获取关键词的个数 data = json.dumps(string) # 转成json模式 f.close() headers = {'X-Token': '*******************'} # 传入密令 resp = requests.post(KEYWORDS_URL, headers=headers, params=params, data=data.encode('utf-8')) # 发送请求 # print(resp.json()) # 查看数据格式 for weight, word in resp.json(): print(word) # 关键词 print(weight, word) # 关键词和权重 result = nlp.sentiment(string) # 返回情感分析结果 print result
# -*- encoding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP # 注意:在测试时请更换为您的API token nlp = BosonNLP('VaUKhf7X.7870.xbHiGWB_gx49') s = ['中新网周口9月15日电(刘鹏) 15日,针对媒体报道的河南省太康县女子在当地一家KTV遭3名协警暴力殴打一事,太康县警方向记者回复称,3名打人者中两名为协警身份,其中一名协警未参与打架,但目前两名协警均被辞退。而当晚一同前往KTV娱乐的一名正式女民警被关禁闭。 据之前媒体报道,今年9月4日晚11时左右,太康县一家KTV内,一名姜姓女士在送走一位朋友后正返回KTV时,在门口碰到正从里面出来的三名男子。其中一名男子对姜女士动手动脚,另一男子则说姜女士为“小姐”。 受到羞辱的姜女士要求对方赔礼道歉。没想到竟遭到了三名男子拳脚相加。据姜女士反映,事发当晚黑衣男子对她一番推搡致其头部撞到门上;绿衣男子则直接拽着她的头发将其摁倒在地,随后又遭到了拳头打脸、脚踹并拉着衣服将其头往门上撞。姜女士试图报警,结果三名男子将其手机夺走摔到地上。为了阻止围观群众报警,白衣男子直接拿出“警官证”,称自己是刑警队人员,若是报警,不把录像删了,就把KTV店给砸了。 15日上午,太康县公安局发布对此事件的调查处理通报。通报称,9月4日晚,葛某(太康县人,无业)、师某(协警)等人到盛世年华夜总会唱歌,当晚23时结束后,师某、葛某与姜某发生争执吵骂,并引起厮打,致使姜某轻微伤。目前双方已达成调解协议,姜某对师某、葛某达成谅解。 太康县公安局负责处理此事的王姓警官透露,事发当晚,和打人者葛某、师某一同前往KTV娱乐的还有该局一名刚入职不久的女民警李某某及协警司某等人,但他们并未参与打架。后经太康县公安局党委研究决定,对违规进入娱乐场所的民警李某某先行禁闭,待调查结束后再做处理;对违规进入娱乐场所的协警师某、司某予以辞退。' '纪检部门仍在调查之中。成都商报记者 姚永'] result = nlp.ner(s)[0] words = result['word'] entities = result['entity'] for entity in entities: print(''.join(words[entity[0]:entity[1]]), entity[2]) print(s) result = nlp.sentiment(s) print(result)
def test_invalid_token_raises_HTTPError(): nlp = BosonNLP('invalid token') pytest.raises(HTTPError, lambda: nlp.sentiment('美好的世界'))
# -*- coding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP nlp = BosonNLP("WXH_K9If.5750.ZIsluNnG8bpA") print(nlp.sentiment("大众深陷断轴门事件"))
def translate(res): from bosonnlp import BosonNLP nlp = BosonNLP("nPom9h4a.18434.tEA4SsUlkG8g") return nlp.sentiment(res)[0]
def emotion_analysis(context): nlp = BosonNLP('GrrNaeVG.6417.dsK_xHt0qE6-') return nlp.sentiment(context, model='weibo')
class BosonNlpp: def __init__(self): self.bonlp = BosonNLP('IKBIoANy.14545.A7GCYBnT9jIB') #情感分析 def testSentiment(self, s): result = self.bonlp.sentiment(s) return result #print(result) #命名实体识别 def lexicalAnalysis(self, s): result = self.bonlp.ner(s)[0] return result #依存文法分析 def textDependency(self, s): result = self.bonlp.depparser(s) return result #关键词提取 def testKeywords(self, s): result = self.bonlp.extract_keywords(s, top_k=10) return result #新闻分类 def textClassify(self, s): resultlist = self.bonlp.classify(s) classifys = { 0: '体育', 1: '教育', 2: '财经', 3: '社会', 4: '娱乐', 5: '军事', 6: '国内', 7: '科技', 8: '互联网', 9: '房产', 10: '国际', 11: '女人', 12: '汽车', 13: '游戏' } return (classifys[resultlist[0]]) #语义联想 def lexicalSynonym(self, term): result = self.bonlp.suggest(term, top_k=10) return result #分词与词性标注 def fenci(self, s): result = self.bonlp.tag(s) return result def newssubstract(self, s): #s=s.encode('utf8') s = s.decode('utf-8') result = self.bonlp.summary('', s) return result
class _BosonNLPWrapper(object): """ NLP object using the BosonNLP API Python SDK. """ news_categories = ['physical education', 'education', 'finance', 'society', 'entertainment', 'military', 'domestic', 'science and technology', 'the internet', 'real estate', 'international', 'women', 'car', 'game'] def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token) def get_sentiment(self, text): """ Performs sentiment analysis on a text passage (works for Chinese text). See: http://docs.bosonnlp.com/sentiment.html Parameters ---------- text (string): text passage to be analyzed for sentiment Returns ------- dictionary with 'positive' and 'negative' as keys with their respective weights as values >>> nlp = BosonNLPWrapper('') >>> nlp.get_sentiment('不要打擾我') {'positive': 0.3704911989140307, 'negative': 0.6295088010859693} >>> nlp.get_sentiment('我很高興跟你見面') {'positive': 0.856280735624867, 'negative': 0.14371926437513308} """ pos, neg = self.nlp.sentiment(text)[0] return {'positive': pos, 'negative': neg} def classify_news(self, text): """ Classifies news text into 14 different categories. See: http://docs.bosonnlp.com/classify.html Parameters ---------- text (string): text passage to classify into news categories defined in news_categories Returns ------- one of the 14 categories in news_categories that the text was classified into """ numbering = range(len(_BosonNLPWrapper.news_categories)) cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories)) clsfy_num = self.nlp.classify(text)[0] return cats_dict[clsfy_num] def extract_keywords(self, text, top_k=3): """ Extracts the top k keywords and the weight of each word in the text. See: http://docs.bosonnlp.com/keywords.html Parameters ---------- text (string): text passage from which to extract keywords top_k (integer): number of keywords to return Returns ------- list of key-value pairs {word: weight} >>> nlp = BosonNLPWrapper('') >>> nlp.extract_keywords('我最愛老虎堂,奶茶香醇,波霸彈Q 好香的黑糖味') [{'波霸彈': 0.5980681967308248}, {'黑糖': 0.4699792421671365}, {'香醇': 0.4497614275300947}] """ result = self.nlp.extract_keywords(text, top_k) # outputs in sorted order of weight return [{result[i][1]: result[i][0]} for i in range(len(result))] def segment_words_and_tag(self, text): """ Splits up text into segments of "words" and tags them with their respective part of speech. See: http://docs.bosonnlp.com/tag.html Parameters ---------- text (string): text passage to segment into separate "words" and tags them with parts of speech Returns ------- list of key-value pairs {word: part-of-speech-tag} """ result = self.nlp.tag(text)[0] words = result['word'] tags = result['tag'] return [{words[i]: tags[i]} for i in range(len(words))] def get_summary(self, content, title='', pct_limit=0.2): """ Extracts a new digest (summary) of the content. See: http://docs.bosonnlp.com/summary.html Parameters ---------- text (string): text passage to summarize title (string): title of the passage (optional, may provide more accurate results) pct_limit (float): max length of the summary in terms of percentage of the original word count Returns ------- string containing the summary of the passage """ summary = self.nlp.summary(title, content, pct_limit) return summary
# -*- coding: utf-8 -*- """ Created on Mon Jul 1 14:52:16 2019 @author: 楼 """ # -*- coding: utf-8 -*- from __future__ import print_function, unicode_literals from bosonnlp import BosonNLP nlp = BosonNLP('Q3yvGChV.35506.ZybCCXz7pWJz') print( nlp.sentiment( "平凡的一天每个早晨七点半就自然醒风铃响起又是一天云很轻晒好的衣服味道很安心一切都是柔软又宁静每个路口花都开在阳光里小店门前传来好听的恋曲不用太久就能走到目的地人来人往里满是善意这是最平凡的一天啊你也想念吗不追不赶慢慢走回家就这样虚度着年华 没牵挂只有晚风轻拂着脸颊日落之前斜阳融在小河里逛了黄昏市场收获很满意朋友打来电话说他在等你见面有聊不完的话题餐桌摆在开满花的院子里微微酒意阵阵欢歌笑语从不考虑明天应该去哪里因为今夜的风太和煦这是最平凡的一天啊你也想念吗不追不赶慢慢走回家就这样虚度着年华 没牵挂只有晚风轻拂着脸颊这是最完美的一天啊你也想要吗生活可以不那么复杂就这样虚度着年华 没牵挂只有晚风轻拂着脸颊总有一天 我们会找到她.每个早晨七点半就自然醒风铃响起又是一天云很轻晒好的衣服味道很安心一切都是柔软又宁静每个路口花都开在阳光里小店门前传来好听的恋曲不用太久就能走到目的地人来人往里满是善意这是最平凡的一天啊你也想念吗不追不赶慢慢走回家就这样虚度着年华 没牵挂只有晚风轻拂着脸颊日落之前斜阳融在小河里逛了黄昏市场收获很满意朋友打来电话说他在等你见面有聊不完的话题餐桌摆在开满花的院子里微微酒意阵阵欢歌笑语从不考虑明天应该去哪里因为今夜的风太和煦这是最平凡的一天啊你也想念吗不追不赶慢慢走回家就这样虚度着年华 没牵挂只有晚风轻拂着脸颊这是最完美的一天啊你也想要吗生活可以不那么复杂就这样虚度着年华 没牵挂只有晚风轻拂着脸颊总有一天 我们会找到她" ))
'Sk9pHqGr.34372.E7atgOZBbm3j', 'x35CKIJf.34373.sEfA7p69Sm9T', 'tf3ornL4.34374.H74Yp4Rx9c6n', 'sVRIZcvC.34375.XkDW26mUPXu4', 'ZBAa5U-p.34376.f5quJmgi9MjV', 'h6sjAsGv.34378.txeNWPJgrcoU', 'g2f4CmD9.34380.cMzFBC_W8BEQ', 'TYUre4Y6.34382.LoQo-__GTihX', 'N09l8vC-.34389.qfp3W7qRd0sb', 'kN6Rh57B.34391.xlKfN5EyWMJs', 'dC5oWMVw.34394.v_iZp5RNkvH_', 'mHDht3mw.34353.bKMovEqibv-j', 'gAtY7TBD.34356.Zo3YVnIZEEJ3', 'sRhV4NxQ.34358.e69blIeUxHOZ', '2g9uniW5.34360.L1snvuyCiVfv', 'UM_rv6SN.34361.u1_9tXRlrFty' ] content = title + text print(f'title = {title}') nlp = BosonNLP(token[counter]) semti_resolt = nlp.sentiment(content) tuple_result = (title, semti_resolt[0][0], semti_resolt[0][1]) #print(tuple_result) resolt_list.append(tuple_result) #title / posi / neg counter += 1 if counter > 29: counter = 0 stop_contral += 1 #改成SQL updata print(stop_contral) sql_commands = [] # with open('C:/Users/Student/Desktop/newsdata/sent201701.csv', 'w', encoding='utf8', newline="") as outfile: # csv_out = csv.writer(outfile, delimiter=',') # for i in resolt_list: # csv_out.writerow(i)
# coding:utf-8 from bosonnlp import BosonNLP nlp = BosonNLP('') print(nlp.sentiment('这家味道还不错'))
def Affective_analysis(text): nlp = BosonNLP("x-gOGutn.27554.G6_6QvdJafES") rest = nlp.sentiment(text) return rest
def getAttitude(comment): nlp = BosonNLP('ofW2OZMI.4712.UzT0VvLGGkdi') # s = ['他是个傻逼', '美好的世界'] result = nlp.sentiment(comment) print result return result
class Controller: def __init__(self): args = Arguments('RSJ') self.rsj = RSJ(args) self.rsj.restore_model() APP_ID = '14465679' API_KEY = 'DDNA68lRaVxKCUHP13t79acC' SECRET_KEY = 'RisCmApExjn5hcSH0KHul71Uldza8vDe' self.feature_maps = {} with open('../data/feature_maps.txt',encoding='utf8') as r: for line in r: features = line.split(' ') self.feature_maps[features[0]] = features self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.boson = BosonNLP('m9YSebz-.27886.-Jh0KNhk__Q2') def processing(self, sentence): rel = self.rsj.evaluate_line(sentence) result = {'sentence':sentence,'entities':[],'rels':[]} sub_sentences,indexes = DataUtils.cut_to_sentence(sentence,index=True) def sub(indexes,index): index = int(index) for i,v in enumerate(indexes): if v[0]<=index and v[1]>index: return i def boson_sen(boson,tag): r = 1 if (boson[0]-boson[1])>0.3: r=2 elif (boson[1]-boson[0])>0.3: r=0 return tag[r] ps = [] fs = [] for item in rel['rels']: if item['rel_type']=='none': continue if rel['entities'][int(item['start'])]['type'] == 'pair-perspective': ps.append(int(item['start'])) fs.append(int(item['end'])) else: ps.append(int(item['end'])) fs.append(int(item['start'])) id2sen = {0:'negative',1:'neutral',2:'positive'} for i,item in enumerate(rel['entities']): # 这个for循环判断复杂观点词和整体观点词 item_ = item if 'perspective' in item['type'] and i not in ps: # 整体观点词 # item_['type'] = 'perspective' index = sub(indexes, item['start']) feature_value = '手机' perspective_value = item['value'] sentiment_resu = self.boson.sentiment(sub_sentences[index])[0] sentiment = boson_sen(sentiment_resu,id2sen) # sentiment_resu = self.client.sentimentClassify(sub_sentences[index]) # sentiment = id2sen[sentiment_resu['items'][0]['sentiment']] # if sentiment_resu['items'][0]['confidence']<0.6: # sentiment = 'positive' result['rels'].append({'feature':-1,'perspective':i,'sentiment':sentiment,'parent_feature':'手机', 'value':'{}-{}'.format(feature_value,perspective_value),'feature_value':feature_value, 'perspective_value':perspective_value}) elif 'feature' in item['type'] and i not in fs: # item_['type'] = 'feature' index = sub(indexes, item['start']) sentiment_resu = self.boson.sentiment(sub_sentences[index])[0] sentiment = boson_sen(sentiment_resu,id2sen) # sentiment_resu = self.client.sentimentClassify(sub_sentences[index]) # sentiment = id2sen[sentiment_resu['items'][0]['sentiment']] feature_value = item['value'] perspective_value = sentiment nums = 0 parent_feature = "其他" for name, features in self.feature_maps.items(): if feature_value in features and len(features) > nums: parent_feature = name nums = len(features) result['rels'].append( {'feature': i, 'perspective': -1, 'sentiment': sentiment, 'parent_feature': parent_feature, 'value': '{}-{}'.format(feature_value, perspective_value),'feature_value':feature_value,'perspective_value':perspective_value}) # elif item['type'] == 'pair-perspective': # item_['type'] = 'perspective' # else: # item_['type'] = 'feature' result['entities'].append(item_) for item in rel['rels']: # 判断词对 if item['rel_type']=='none': continue if 'perspective' in result['entities'][int(item['start'])]['type']: perspective = int(item['start']) feature = int(item['end']) else: perspective = int(item['end']) feature = int(item['start']) index1 = sub(indexes, rel['entities'][int(item['start'])]['start']) index2 = sub(indexes, rel['entities'][int(item['start'])]['end']) feature_value = result['entities'][int(feature)]['value'] perspective_value = result['entities'][perspective]['value'] sentiment_resu = self.boson.sentiment(''.join(sub_sentences[index1:index2+1]))[0] sentiment = boson_sen(sentiment_resu, id2sen) # sentiment_resu = self.client.sentimentClassify(''.join(sub_sentences[index1:index2+1])) # sentiment = id2sen[sentiment_resu['items'][0]['sentiment']] nums = 0 parent_feature = '其他' for name, features in self.feature_maps.items(): if feature_value in features and len(features) > nums: parent_feature = name nums = len(features) result['rels'].append({'feature':feature,'perspective':perspective,'sentiment':sentiment, 'parent_feature':parent_feature,'value':'{}-{}'.format(feature_value,perspective_value), 'feature_value':feature_value,'perspective_value':perspective_value}) return result
def test_invalid_token_raises_HTTPError(): nlp = BosonNLP("invalid token") pytest.raises(HTTPError, lambda: nlp.sentiment("美好的世界"))