def run(self): ltp = Ltp.get_object2() ltp.load_dict(ALL_DICT_PATH) analyzer = EmotionAnalysis(ltp) topic_collection = MongoDB.get_client()[MONGO_DB]['topic'] comment_collection = MongoDB.get_client()[MONGO_DB]['comment'] topic = topic_collection.find_one({'_id': ObjectId(self.id)}) count1 = 0 for weibo_id in topic['text_id_list']: if MONGO_DB == 'weibo': comments = comment_collection.find({'id': weibo_id}) else: data = MongoDB.get_client()[MONGO_DB]['mid'].find_one( {'id': weibo_id}) if not data: continue comments = comment_collection.find({'mid': data['mid']}) for data in comments: if 'score' in data and data['score']: continue content = data['content'].strip() if not content: continue data['score'] = analyzer.sent_sentiment_score( data['content'].strip()) comment_collection.update_one({'_id': data['_id']}, {'$set': data}, True) count1 += 1 self.count.emit(round((count1 / topic['text_num']) * 100)) self.stop.emit(True)
def __init__(self, mainwindow): super().__init__() client = MongoDB.get_client() db = client[MONGO_DB] self.collection = db['topic'] self.weibo_collection = db['weibo'] self.setupUi(mainwindow) self.id = None
def create_emotion_pie(self): """ 绘制情感倾向饼图 :return: """ figure = plt.figure(figsize=(6, 4), dpi=80, frameon=True) data = [0, 0, 0] topic = self.topic_collection.find_one({'_id': ObjectId(self.id)}) # if 'score' not in topic or not topic['score']: for weibo_id in topic['text_id_list']: if MONGO_DB == 'weibo': comments = self.comment_collection.find({'id': weibo_id}) else: mid = MongoDB.get_client()[MONGO_DB]['mid'].find_one( {'id': weibo_id}) if not mid: continue comments = self.comment_collection.find({'mid': mid['mid']}) for comment in comments: if not comment['content'].strip(): continue if 'score' not in comment or not comment['score']: continue score = comment['score'] if score[0] - score[1] > 2: data[0] += 1 elif score[0] - score[1] < 0: data[1] += 1 else: data[2] += 1 topic['score'] = data self.topic_collection.update_one({'_id': topic['_id']}, {'$set': topic}, True) # else: # data = topic['score'] try: data[0] += data[2] * 0.3 * data[0] / (data[0] + data[1]) data[1] += data[2] * 0.3 * data[1] / (data[0] + data[1]) data[2] *= 0.7 except: pass labels = ['积极', '消极', '其它'] colors = ['r', 'b', 'y'] fig1 = plt.subplot(1, 1, 1) plt.xlim(0, 4) plt.ylim(0, 4) fig1.pie(x=data, labels=labels, colors=colors, autopct='%.1f%%') fig1.set_title('话题舆情倾向度') plt.xticks(()) plt.yticks(()) return figure
def __init__(self, mainwindow): super().__init__() client = MongoDB.get_client() db = client[MONGO_DB] self.collection = db['topic'] self.weibo_collection = db['weibo'] self.ltp = Ltp.get_object() self.area = '' self.keys = [] self.setupUi(mainwindow)
def __init__(self, parent=None): super().__init__(parent) self.parent = parent db = MongoDB.get_client()[MONGO_DB] self.topic_collection = db['topic'] self.weibo_collection = db['weibo'] self.attention = False self.emotion = False self._time = False self.ltp = Ltp.get_object() self.setupUi()
def __init__(self, id, parent=None): super().__init__(parent) self.parent = parent self.ltp = Ltp.get_object() client = MongoDB.get_client() db = client[MONGO_DB] self.topic_collection = db['topic'] self.weibo_collection = db['weibo'] self.comment_collection = db['comment'] self.id = id self.setupUi()
def __init__(self, parent, id): super().__init__(parent) self.id = id client = MongoDB.get_client() db = client[MONGO_DB] self.topic_collection = db['topic'] self.weibo_collection = db['weibo'] self.comment_collection = db['comment'] self.horizontalLayout = QtWidgets.QHBoxLayout(self) self.create_figure(self.create_heatline()) self.create_figure(self.create_emotion_pie()) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False
def __init__(self, parent=None): """ 初始化 :param parent: 调用浏览器引擎的父窗口 """ super().__init__(parent) self.superwindow = parent self.setupUi() self.client = MongoDB.get_client() self.db = MongoDB.get_client()[MONGO_DB] self.weibo = self.db['weibo'] self.topic = self.db['topic'] self.comment = self.db['comment'] self.user = self.db['user'] self.repost = self.db['repost'] self.flag = None self.maparea = None self.maphtml = None self.barhtml = None self.wordcloudhtml = None self.graphtml = None self.geohtml = None self.id = None
def run(self): ltp = Ltp.get_object() tdt = Tdt.get_object() model = Text2Vec.get_object() weibo_collection = MongoDB.get_client()[MONGO_DB]['weibo'] count1 = 0 weibo_set = weibo_collection.find().sort('posted_at', pymongo.ASCENDING) for weibo in weibo_set: tdt.single_pass(weibo, 'topic', ltp, model) weibo_collection.update_one({'_id': weibo['_id']}, {'$set': weibo}, True) count1 += 1 self.count.emit(count1) self.stop.emit(True)
import sys from emotion import EmotionAnalysis sys.path.append('E:\Python\workspace\TDTSystem') from ltp.ltp import Ltp from setting import * from mongo import MongoDB if __name__ == '__main__': ltp = Ltp(4) ltp.load_dict(ALL_DICT_PATH) analyzer = EmotionAnalysis(ltp) comment = MongoDB.get_client()['weibo']['comment'] count = 0 for data in comment.find(): count += 1 print(count) if 'score' in data and data['score']: continue content = data['content'].strip() if not content: continue data['score'] = analyzer.sent_sentiment_score(data['content'].strip()) comment.update_one({'_id': data['_id']}, {'$set': data}, True)
def single_pass(self, weibo, topic_table, ltp=None, text2vec=None): """ Single-Pass聚类算法,微博weibo属于话题集topic_set某话题,则加入话题并更新话题,否则,自成一个话题加入话题库 :param ltp: Ltp类实例 :param text2vec: Text2Vec类实例 :param topic_table: str, mongoDB话题库名 :param weibo:dict, 微博数据 :return: """ if 'if_topic' in weibo and weibo['if_topic']: return if not ltp: ltp = Ltp.get_object() if not text2vec: model = Text2Vec.get_object() else: model = text2vec content = weibo['content'] parser = ltp.text_parser(content) vector = model.text2dict(list(parser[0:3])) # 微博切分: [标题, 正文, hashtag] entity = parser[3] # 命名实体 topic_collection = MongoDB.get_client()[MONGO_DB][topic_table] topic_set = topic_collection.find() similiratiy = [] # 存储微博与所有话题的相似度 for topic in topic_set: # if cls > 0 and cls != topic['cls'] : # continue keydict = topic['keywords'] vector2 = {} count = 0 for key, value in keydict.items(): if len(vector2) > len(vector): break vector2[key] = value count += value similar_score = model.similarity(vector2, vector) # 计算相似度 if similar_score < 0.4: # 相似度低,微博不属于话题,判断是否将话题淘汰 time_gip = (self.get_timestamp(weibo['posted_at']) - self.get_timestamp(topic['latest_time'])) / 86400 if topic['text_num'] < 5 and time_gip > 60: # 话题微博数小于5且两个月得不到更新,淘汰 topic_collection.delete_one({'_id': topic['_id']}) else: similiratiy.append(similar_score) else: similiratiy.append(similar_score) try: score = max(similiratiy) except: score = 0.0 if score >= 0.5: # 微博加入话题,更新话题 index = similiratiy.index(score) topic = topic_collection.find_one(skip=index) keywords = topic['keywords'] text_num = topic['text_num'] topic['text_id_list'].append(weibo['id']) topic['text_list'].append(weibo['content']) ltp.netag_dict_merge(topic['entity'], entity) self.dict_combine(keywords, vector, text_num) topic['keywords'] = dict( sorted(keywords.items(), key=lambda item: item[1], reverse=True)) topic['heat'] += weibo['comment_count'] + sqrt( weibo['forward_count'] + weibo['like_count']) topic['text_num'] += 1 if weibo['posted_at'] < topic['start_time']: topic['start_time'] = weibo['posted_at'] elif weibo['posted_at'] > topic['latest_time']: topic['latest_time'] = weibo['posted_at'] topic['central_time'] = self.datetime_update( topic['central_time'], weibo['posted_at'], text_num) topic_collection.update_one({'_id': topic['_id']}, {'$set': topic}, True) else: # 微博自成一新话题 one_topic = { 'entity': {}, 'keywords': {}, 'text_id_list': [], 'text_list': [], 'text_num': 1, 'heat': 0, 'start_time': None, 'latest_time': None, 'central_time': None, # 'cls': cls } one_topic['text_id_list'].append(weibo['id']) one_topic['text_list'].append(weibo['content']) one_topic['entity'] = entity one_topic['heat'] = weibo['comment_count'] + sqrt( weibo['forward_count'] + weibo['like_count']) one_topic['start_time'] = one_topic['latest_time'] = one_topic[ 'central_time'] = weibo['posted_at'] one_topic['keywords'] = dict( sorted(vector.items(), key=lambda item: item[1], reverse=True)) topic_collection.insert_one(one_topic) weibo['if_topic'] = True
def __init__(self, parent=None): super().__init__(parent) self.db = MongoDB.get_client()[MONGO_DB] self.parent = parent self.setupUi()