Exemplo n.º 1
0
        def run(self):
            ltp = Ltp.get_object2()
            ltp.load_dict(ALL_DICT_PATH)
            analyzer = EmotionAnalysis(ltp)

            topic_collection = MongoDB.get_client()[MONGO_DB]['topic']
            comment_collection = MongoDB.get_client()[MONGO_DB]['comment']
            topic = topic_collection.find_one({'_id': ObjectId(self.id)})
            count1 = 0

            for weibo_id in topic['text_id_list']:
                if MONGO_DB == 'weibo':
                    comments = comment_collection.find({'id': weibo_id})
                else:
                    data = MongoDB.get_client()[MONGO_DB]['mid'].find_one(
                        {'id': weibo_id})
                    if not data:
                        continue
                    comments = comment_collection.find({'mid': data['mid']})
                for data in comments:
                    if 'score' in data and data['score']:
                        continue
                    content = data['content'].strip()
                    if not content:
                        continue
                    data['score'] = analyzer.sent_sentiment_score(
                        data['content'].strip())
                    comment_collection.update_one({'_id': data['_id']},
                                                  {'$set': data}, True)
                count1 += 1
                self.count.emit(round((count1 / topic['text_num']) * 100))
            self.stop.emit(True)
Exemplo n.º 2
0
 def __init__(self, mainwindow):
     super().__init__()
     client = MongoDB.get_client()
     db = client[MONGO_DB]
     self.collection = db['topic']
     self.weibo_collection = db['weibo']
     self.setupUi(mainwindow)
     self.id = None
Exemplo n.º 3
0
    def create_emotion_pie(self):
        """
        绘制情感倾向饼图
        :return:
        """
        figure = plt.figure(figsize=(6, 4), dpi=80, frameon=True)
        data = [0, 0, 0]
        topic = self.topic_collection.find_one({'_id': ObjectId(self.id)})

        # if 'score' not in topic or not topic['score']:
        for weibo_id in topic['text_id_list']:
            if MONGO_DB == 'weibo':
                comments = self.comment_collection.find({'id': weibo_id})
            else:
                mid = MongoDB.get_client()[MONGO_DB]['mid'].find_one(
                    {'id': weibo_id})
                if not mid:
                    continue
                comments = self.comment_collection.find({'mid': mid['mid']})

            for comment in comments:
                if not comment['content'].strip():
                    continue
                if 'score' not in comment or not comment['score']:
                    continue
                score = comment['score']
                if score[0] - score[1] > 2:
                    data[0] += 1
                elif score[0] - score[1] < 0:
                    data[1] += 1
                else:
                    data[2] += 1
        topic['score'] = data
        self.topic_collection.update_one({'_id': topic['_id']},
                                         {'$set': topic}, True)
        # else:
        #     data = topic['score']

        try:
            data[0] += data[2] * 0.3 * data[0] / (data[0] + data[1])
            data[1] += data[2] * 0.3 * data[1] / (data[0] + data[1])
            data[2] *= 0.7
        except:
            pass

        labels = ['积极', '消极', '其它']
        colors = ['r', 'b', 'y']
        fig1 = plt.subplot(1, 1, 1)
        plt.xlim(0, 4)
        plt.ylim(0, 4)
        fig1.pie(x=data, labels=labels, colors=colors, autopct='%.1f%%')
        fig1.set_title('话题舆情倾向度')
        plt.xticks(())
        plt.yticks(())

        return figure
Exemplo n.º 4
0
 def __init__(self, mainwindow):
     super().__init__()
     client = MongoDB.get_client()
     db = client[MONGO_DB]
     self.collection = db['topic']
     self.weibo_collection = db['weibo']
     self.ltp = Ltp.get_object()
     self.area = ''
     self.keys = []
     self.setupUi(mainwindow)
Exemplo n.º 5
0
 def __init__(self, parent=None):
     super().__init__(parent)
     self.parent = parent
     db = MongoDB.get_client()[MONGO_DB]
     self.topic_collection = db['topic']
     self.weibo_collection = db['weibo']
     self.attention = False
     self.emotion = False
     self._time = False
     self.ltp = Ltp.get_object()
     self.setupUi()
Exemplo n.º 6
0
 def __init__(self, id, parent=None):
     super().__init__(parent)
     self.parent = parent
     self.ltp = Ltp.get_object()
     client = MongoDB.get_client()
     db = client[MONGO_DB]
     self.topic_collection = db['topic']
     self.weibo_collection = db['weibo']
     self.comment_collection = db['comment']
     self.id = id
     self.setupUi()
Exemplo n.º 7
0
 def __init__(self, parent, id):
     super().__init__(parent)
     self.id = id
     client = MongoDB.get_client()
     db = client[MONGO_DB]
     self.topic_collection = db['topic']
     self.weibo_collection = db['weibo']
     self.comment_collection = db['comment']
     self.horizontalLayout = QtWidgets.QHBoxLayout(self)
     self.create_figure(self.create_heatline())
     self.create_figure(self.create_emotion_pie())
     plt.rcParams['font.sans-serif'] = ['SimHei']
     plt.rcParams['axes.unicode_minus'] = False
Exemplo n.º 8
0
    def __init__(self, parent=None):
        """
        初始化
        :param parent: 调用浏览器引擎的父窗口
        """
        super().__init__(parent)
        self.superwindow = parent
        self.setupUi()
        self.client = MongoDB.get_client()
        self.db = MongoDB.get_client()[MONGO_DB]
        self.weibo = self.db['weibo']
        self.topic = self.db['topic']
        self.comment = self.db['comment']
        self.user = self.db['user']
        self.repost = self.db['repost']

        self.flag = None
        self.maparea = None
        self.maphtml = None
        self.barhtml = None
        self.wordcloudhtml = None
        self.graphtml = None
        self.geohtml = None
        self.id = None
Exemplo n.º 9
0
 def run(self):
     ltp = Ltp.get_object()
     tdt = Tdt.get_object()
     model = Text2Vec.get_object()
     weibo_collection = MongoDB.get_client()[MONGO_DB]['weibo']
     count1 = 0
     weibo_set = weibo_collection.find().sort('posted_at',
                                              pymongo.ASCENDING)
     for weibo in weibo_set:
         tdt.single_pass(weibo, 'topic', ltp, model)
         weibo_collection.update_one({'_id': weibo['_id']},
                                     {'$set': weibo}, True)
         count1 += 1
         self.count.emit(count1)
     self.stop.emit(True)
Exemplo n.º 10
0
import sys

from emotion import EmotionAnalysis

sys.path.append('E:\Python\workspace\TDTSystem')
from ltp.ltp import Ltp
from setting import *
from mongo import MongoDB
if __name__ == '__main__':
    ltp = Ltp(4)
    ltp.load_dict(ALL_DICT_PATH)
    analyzer = EmotionAnalysis(ltp)


    comment = MongoDB.get_client()['weibo']['comment']
    count = 0

    for data in comment.find():
        count += 1
        print(count)
        if 'score' in data and data['score']:
            continue
        content = data['content'].strip()
        if not content:
            continue
        data['score'] = analyzer.sent_sentiment_score(data['content'].strip())
        comment.update_one({'_id': data['_id']}, {'$set': data}, True)
Exemplo n.º 11
0
    def single_pass(self, weibo, topic_table, ltp=None, text2vec=None):
        """
        Single-Pass聚类算法,微博weibo属于话题集topic_set某话题,则加入话题并更新话题,否则,自成一个话题加入话题库
        :param ltp: Ltp类实例
        :param text2vec: Text2Vec类实例
        :param topic_table: str, mongoDB话题库名
        :param weibo:dict, 微博数据
        :return:
        """
        if 'if_topic' in weibo and weibo['if_topic']:
            return
        if not ltp:
            ltp = Ltp.get_object()
        if not text2vec:
            model = Text2Vec.get_object()
        else:
            model = text2vec
        content = weibo['content']
        parser = ltp.text_parser(content)
        vector = model.text2dict(list(parser[0:3]))  # 微博切分: [标题, 正文, hashtag]
        entity = parser[3]  # 命名实体
        topic_collection = MongoDB.get_client()[MONGO_DB][topic_table]
        topic_set = topic_collection.find()
        similiratiy = []  # 存储微博与所有话题的相似度

        for topic in topic_set:
            # if cls > 0 and cls != topic['cls'] :
            #     continue
            keydict = topic['keywords']
            vector2 = {}
            count = 0
            for key, value in keydict.items():
                if len(vector2) > len(vector):
                    break
                vector2[key] = value
                count += value
            similar_score = model.similarity(vector2, vector)  # 计算相似度

            if similar_score < 0.4:  # 相似度低,微博不属于话题,判断是否将话题淘汰
                time_gip = (self.get_timestamp(weibo['posted_at']) -
                            self.get_timestamp(topic['latest_time'])) / 86400
                if topic['text_num'] < 5 and time_gip > 60:  # 话题微博数小于5且两个月得不到更新,淘汰
                    topic_collection.delete_one({'_id': topic['_id']})
                else:
                    similiratiy.append(similar_score)
            else:
                similiratiy.append(similar_score)

        try:
            score = max(similiratiy)
        except:
            score = 0.0

        if score >= 0.5:  # 微博加入话题,更新话题
            index = similiratiy.index(score)
            topic = topic_collection.find_one(skip=index)
            keywords = topic['keywords']
            text_num = topic['text_num']
            topic['text_id_list'].append(weibo['id'])
            topic['text_list'].append(weibo['content'])
            ltp.netag_dict_merge(topic['entity'], entity)
            self.dict_combine(keywords, vector, text_num)
            topic['keywords'] = dict(
                sorted(keywords.items(),
                       key=lambda item: item[1],
                       reverse=True))
            topic['heat'] += weibo['comment_count'] + sqrt(
                weibo['forward_count'] + weibo['like_count'])
            topic['text_num'] += 1
            if weibo['posted_at'] < topic['start_time']:
                topic['start_time'] = weibo['posted_at']
            elif weibo['posted_at'] > topic['latest_time']:
                topic['latest_time'] = weibo['posted_at']
            topic['central_time'] = self.datetime_update(
                topic['central_time'], weibo['posted_at'], text_num)
            topic_collection.update_one({'_id': topic['_id']}, {'$set': topic},
                                        True)
        else:  # 微博自成一新话题
            one_topic = {
                'entity': {},
                'keywords': {},
                'text_id_list': [],
                'text_list': [],
                'text_num': 1,
                'heat': 0,
                'start_time': None,
                'latest_time': None,
                'central_time': None,
                # 'cls': cls
            }
            one_topic['text_id_list'].append(weibo['id'])
            one_topic['text_list'].append(weibo['content'])
            one_topic['entity'] = entity
            one_topic['heat'] = weibo['comment_count'] + sqrt(
                weibo['forward_count'] + weibo['like_count'])
            one_topic['start_time'] = one_topic['latest_time'] = one_topic[
                'central_time'] = weibo['posted_at']
            one_topic['keywords'] = dict(
                sorted(vector.items(), key=lambda item: item[1], reverse=True))
            topic_collection.insert_one(one_topic)
        weibo['if_topic'] = True
Exemplo n.º 12
0
 def __init__(self, parent=None):
     super().__init__(parent)
     self.db = MongoDB.get_client()[MONGO_DB]
     self.parent = parent
     self.setupUi()