def comment_Analysis(self, commmentlist): #对每条评价进行打分,最高1.0,分越低说明评价越消极 xm.set_stopword('手机关键词停词表.txt') totalscore = 0 for commtent in commmentlist: totalscore = totalscore + xm.sentiment(commtent) avescore = totalscore / len(commmentlist) return avescore
def get_data(): html = login() if (html == False): get_cookie() html = login() else: soup = BeautifulSoup(html.content, 'lxml') friends = soup.findAll(attrs={'class': 'f-name q_namecard'}) msgs = soup.findAll(attrs={'class': 'f-info'}) with open('data.csv', 'a+', encoding='utf-8') as fo: for i in range(len(friends) - 1): num = str(friends[i].attrs['href']) doc = msgs[i].text score = xmnlp.sentiment(doc) print(doc) print('Score: ', score) res = "" if score > 0.49: res = '积极' else: res = '消极' print("%s,%s,%s,%d,%s" % (friends[i].text, num.split('/')[-1], doc, score, res)) fo.writelines("%s,%s,%s,%s,%s\n" % (friends[i].text, num.split('/')[-1], doc, str(score), res)) fo.close()
def create(cls, cfg): # type: (RasaNLUModelConfig) -> JiebaTokenizer #模块的算法加载。 import xmnlp #xmnlp.set_stopword('/path/to/stopword.txt') # 用户自定义停用词 #下次补上具体creat的代码 doc = """这件衣服的质量也太差了吧!""" print('Load sentiment analyzer.Text: ', doc, 'score', xmnlp.sentiment(doc)) component_conf = cfg.for_component(cls.name, cls.defaults) analyzer = xmnlp.sentiment return cls(component_conf, analyzer)
def addscore(chart): # add a column containing the sentiment score on the original 4-column chart chart1 = copy.deepcopy(chart) for vvv in range(len(chart1)): if len((chart1[vvv])[3]) == 0: chart1[vvv].append("blank") chart1[vvv].append("blank") chart1[vvv].append("blank") else: x = (chart1[vvv])[3] s = SnowNLP(chart1[vvv][3]) t1 = xmnlp.sentiment(x) t2 = s.sentiments t3 = (t1 + t2) / 2 chart1[vvv].append(t1) chart1[vvv].append(t2) chart1[vvv].append(t3) return chart1
def analysis(self, filein_path, fileout_path): isFileInExists = os.path.exists(filein_path) isFileOutExists = os.path.exists(fileout_path) if isFileInExists is False: print 'in file: {0} not exits.'.format(filein_path) return if isFileOutExists is False: print 'out file: {0} not exits.'.format(fileout_path) self.writeToCSVWithoutHeader(fileout_path, [ 'share_number', 'comment_number', 'url', 'title', 'sentiment' ]) print 'create an new out file: {0}.'.format(fileout_path) in_content = self.readFromCSV(filein_path) in_content.pop(0) for item in in_content: s = xmnlp.sentiment(item[3]) self.writeToCSVWithoutHeader( fileout_path, [item[0], item[1], item[2], item[3], s]) print "{0}--{1}".format(item[3], s)
def get_pos_neg(text_list): positive_texts = [] negative_texts = [] for text in text_list: text = text.replace('<','') text = text.replace(' ','') if len(text)>0: score1 = SnowNLP(text).sentiments score2 = xmnlp.sentiment(text) tmp = "".join(jieba.analyse.textrank(text)) if len(tmp)>0: score3 = SnowNLP(tmp).sentiments if score1<0.45 and score2<0.45 and score3<0.45: negative_texts.append(text) if score1>0.55 and score2>0.55 and score3>0.55: positive_texts.append(text) if len(tmp)==0: if score1<0.45 and score2<0.45: negative_texts.append(text) if score1>0.55 and score2>0.55: positive_texts.append(text) return positive_texts, negative_texts
def comment_extract(self, commentlist): rank = [] BEST = [] WORST = [] for comment in commentlist: #删除长度小于30的评论 if len(comment) < 12: commentlist.remove(comment) for comment in commentlist: group = (xm.sentiment(comment), comment) rank.append(group) ranklist = sorted(rank) num = int(len(ranklist) * 0.05) bestComment = ranklist[num - 1:num + 4] for comment in bestComment: BEST.append(comment[1]) worstComment = ranklist[-num - 6:-num - 1] for comment in worstComment: WORST.append(comment[1]) return WORST, BEST #返回五条评分高的评价和五条比较低分的评价
def test_sentiment(): score = xmnlp.sentiment('这酒店真心不错') assert score > 0.5
情感计算 / naive bayes / """ print(descr) doc = """真伤心""" doc2 = """天气太好了,我们去钓鱼吧""" print('\n++++++++++++++++++++++++ usage 1 ++++++++++++++++++++++++\n') """ 1. 使用类来进行操作 """ from xmnlp import XmNLP xm = XmNLP(doc, stopword=True) print('Text: ', doc) print('Score: ', xm.sentiment()) print('Text: ', doc2) print('Score: ', xm.sentiment(doc2)) print('\n++++++++++++++++++++++++ usage 2 ++++++++++++++++++++++++\n') import xmnlp print('Text: ', doc) print('Score: ', xmnlp.sentiment(doc)) print('Text: ', doc2) print('Score: ', xmnlp.sentiment(doc2))
def test_sentiment(): score = xmnlp.sentiment('这酒店真心不错哦') assert score[1] > 0.5 score = xmnlp.sentiment('这酒店真心太差了') assert score[1] < 0.5