Пример #1
0
def getEventTimeSE(clusterDbName):
    try:
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        etime = '0000-00-00'
        stime = '9999-99-99'
        for cluster in clusters:
            mostPossibleTime = cluster['time_infer']['most_possible_time'][
                0:10]
            if mostPossibleTime < stime:
                stime = mostPossibleTime
            if mostPossibleTime > etime:
                etime = mostPossibleTime
        timeInfo = {'stime': stime, 'etime': etime}
        client.close()
        return {
            'resCode': 1,
            'resStr': '获取事件始末时间成功',
            'resObject': timeInfo,
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '获取事件始末时间失败',
            'resObject': '',
            'resList': []
        }
Пример #2
0
def getEventLevelDataAction(clusterDbName):
    try:
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        levelInfo = dict()
        for cluster in clusters:
            level = cluster['summary']['level']
            if level in levelInfo.keys():
                levelInfo[level] += 1
            else:
                levelInfo[level] = 1
        # for key in levelInfo.keys():
        #     print(key + ':' + str(levelInfo[key]))
        client.close()
        return {
            'resCode': 1,
            'resStr': '获取事件等级统计数据成功',
            'resObject': levelInfo,
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '事件等级统计数据库查询失败',
            'resObject': '',
            'resList': []
        }
Пример #3
0
def queryVisibleDataByPname(pname):
    try:
        client = getMongoClient()
        db = client['programs']
        collection = db['visibleData']
        documents = collection.find()
        for document in documents:
            if document['pname'] == pname:
                client.close()
                return {
                    'resCode': 1,
                    'resStr': '方案' + pname + '预存图表数据存在!',
                    'resObject': document['chartData'],
                    'resList': []
                }
        # 没有预存数据,需要根据 数据库名查询生成图表数据
        program = db['programs']
        query = {'pname': pname}
        dbName = list(program.find(query))[0]['dbName']
        client.close()
        return {
            'resCode': 0,
            'resStr': '方案' + pname + '预存图表数据不存在!',
            'resObject': dbName,
            'resList': []
        }
    except:
        return {
            'resCode': -1,
            'resStr': '查询方案预存数据时出错!',
            'resObject': '',
            'resList': []
        }
Пример #4
0
def getEventAnalysisConfigDataAction():
    try:
        client = getMongoClient()
        db = client['programs']
        config = db['eventAnalysis']
        configObject = dict(list(config.find())[0])
        resultObject = {
            'wordCloud': configObject['wordCloud'],
            'mapHot': configObject['mapHot'],
            'discussionTrend': configObject['discussionTrend'],
            'levelPie': configObject['levelPie'],
            'popularTweeter': configObject['popularTweeter'],
            'timeTrack': configObject['timeTrack']
        }
        return {
            'resCode': 1,
            'resStr': '获取事件分析图表加载项数据成功',
            'resObject': resultObject,
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '事件分析图表加载项数据库查询失败',
            'resObject': '',
            'resList': []
        }
Пример #5
0
def getTweetWithTimePots(timePots, clusterDbName, clusterTweetDict):
    try:
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        # 先获取各时间节点的全部tweet
        timeTweetsDict = dict()
        for cluster in clusters:
            mostPossibleTime = cluster['time_infer']['most_possible_time'][
                0:10]
            cluid = cluster['cluid']
            # 先判断该事件实例的可能时间是否在时间节点中
            if mostPossibleTime in timePots:
                # 取出和mostPossibleTime时间一致的推文
                tweetList = clusterTweetDict[cluid]
                for tweet in tweetList:
                    time = tweet['created_at'][0:10]
                    if time == mostPossibleTime:
                        if time in timeTweetsDict.keys():
                            timeTweetsDict[time].append(tweet)
                        else:
                            timeTweetsDict[time] = [tweet]
        # 针对timeTweetsDict 每个时间的tweetList,选出一条
        timeTweetsData = list()
        timeKeys = list(timeTweetsDict.keys())
        timeKeys.sort()
        for i in range(len(timeKeys)):
            time = timeKeys[i]
            bestTweet, tweetActionNums = getBestTweet(timeTweetsDict[time])
            text = bestTweet['standard_text']
            userName = bestTweet['user']['data_name']
            userHeadSrc = 'none'
            hm = bestTweet['created_at'][11:16]
            timeTweetsData.append({
                'userName': userName,
                'userHead': userHeadSrc,
                'text': text,
                'tweetNums': tweetActionNums,
                'time': {
                    'ymd': time,
                    'hm': hm
                }
            })
        client.close()
        return {
            'resCode': 1,
            'resStr': '获取指定时间的推文成功',
            'resObject': '',
            'resList': timeTweetsData
        }
    except:
        return {
            'resCode': 0,
            'resStr': '获取指定时间的推文失败',
            'resObject': '',
            'resList': []
        }
Пример #6
0
def getPopularTweeterDataAction(clusterDbName, clusterTweetDict):
    try:
        # 先取热度最高的前 20 个事件实例
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        highHotEvents = list()
        eventHotIndexDict = dict()
        index = 0
        for cluster in clusters:
            hot = cluster['summary']['hot']
            eventHotIndexDict[index] = hot
            index += 1
            highHotEvents.append(cluster)
        # 热度值排序,取前20个
        eventHotIndexOrdered = sorted(eventHotIndexDict.items(),
                                      key=lambda x: x[1],
                                      reverse=True)[0:20]
        # 从每个事件实例的tweets列表中取出最具代表性的一条推文及作者信息
        resultList = list()
        for eventItem in eventHotIndexOrdered:
            event = highHotEvents[eventItem[0]]
            cluid = event['cluid']
            tweetsList = clusterTweetDict[cluid]
            bestTweet, tweetActionNums = getBestTweet(tweetsList)
            # 获取推文内容以及作者数据
            text = bestTweet['standard_text']
            userName = bestTweet['user']['data_name']
            userHeadSrc = 'none'
            time = bestTweet['created_at']
            resultList.append({
                'userName': userName,
                'userHead': userHeadSrc,
                'time': time,
                'text': text,
                'tweetNums': tweetActionNums
            })
        # for r in resultList:
        #     print(r)
        client.close()
        return {
            'resCode': 1,
            'resStr': '获取热门tweeter数据成功',
            'resObject': '',
            'resList': resultList
        }
    except:
        return {
            'resCode': 0,
            'resStr': '热门tweeter数据库查询失败',
            'resObject': '',
            'resList': []
        }
Пример #7
0
def addProgramAction(programForm):
    # 整理参数
    pname = programForm['pname']
    desc = programForm['desc']
    dbName = programForm['dbName']
    maxNum = programForm['maxNum']
    stime = programForm['timeRange'][0]
    etime = programForm['timeRange'][1]
    keywords = programForm['keywords']
    ignorewords = programForm['ignorewords']
    topics = programForm['topics']
    # 数据库操作
    client = getMongoClient()
    programsDb = client['programs']
    programsCol = programsDb['programs']
    if programsCol.find_one({'pname': pname}) is None:  # 查看记录是否已存在,若不存在则放入数据库
        try:
            programData = {
                'pname': pname,
                'desc': desc,
                'dbName': dbName,
                'maxNum': maxNum,
                'stime': stime,
                'etime': etime,
                'keywords': keywords,
                'ignorewords': ignorewords,
                'topics': topics,
                'status': '无数据'
            }
            programsCol.insert_one(programData)
            client.close()
            return {
                'resCode': 1,
                'resStr': '添加新方案成功',
                'resObject': '',
                'resList': []
            }
        except:
            client.close()
            return {
                'resCode': 0,
                'resStr': '插入记录时发生错误',
                'resObject': '',
                'resList': []
            }
    else:
        client.close()
        return {
            'resCode': -1,
            'resStr': '记录已存在',
            'resObject': '',
            'resList': []
        }
Пример #8
0
def getClusterTweetListDict(clusterDbName):
    client = getMongoClient()
    db = client[clusterDbName]
    clusterResultCol = db['clusterResult']
    clusterTweetCol = db['clusterTweet']
    clusters = clusterResultCol.find()
    clusterTweetDict = dict()
    for cluster in clusters:
        cluid = cluster['cluid']
        query = {'cluid': cluid}
        tweetObjList = clusterTweetCol.find(query)
        clusterTweetDict[cluid] = list()
        for tweetObj in tweetObjList:
            clusterTweetDict[cluid].append(tweetObj['tweet'])
    return clusterTweetDict
Пример #9
0
def getAllProgramsAction():
    try:
        client = getMongoClient()
        programsDb = client['programs']
        programsCol = programsDb['programs']
        resultList = getProgramTableList(list(programsCol.find()))
        client.close()
        return {
            'resCode': 1,
            'resStr': '查询数据库成功',
            'resObject': '',
            'resList': resultList
        }
    except:
        return {
            'resCode': 0,
            'resStr': '查询数据库失败',
            'resObject': '',
            'resList': []
        }
Пример #10
0
def storeVisibleData(pname, chartData):
    try:
        client = getMongoClient()
        db = client['programs']
        collection = db['visibleData']
        data = {'pname': pname, 'chartData': chartData}
        collection.insert_one(data)
        return {
            'resCode': 1,
            'resStr': '预存方案' + pname + '图表数据成功!',
            'resObject': '',
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '预存方案' + pname + '图表数据失败!',
            'resObject': '',
            'resList': []
        }
Пример #11
0
def changeEventAnalysisAction(data):
    try:
        client = getMongoClient()
        db = client['programs']
        config = db['eventAnalysis']
        query = {}
        newValues = {"$set": data}
        config.update_one(query, newValues)
        return {
            'resCode': 1,
            'resStr': '修改事件分析图表加载项数据成功',
            'resObject': '',
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '修改事件分析图表加载项数据失败',
            'resObject': '',
            'resList': []
        }
Пример #12
0
def deleteProgramAction(pname):
    client = getMongoClient()
    programsDb = client['programs']
    programsCol = programsDb['programs']
    deleteQuery = {'pname': pname}
    try:
        programsCol.delete_one(deleteQuery)
        client.close()
        return {
            'resCode': 1,
            'resStr': '删除方案:' + pname + '成功!',
            'resObject': '',
            'resList': []
        }
    except:
        client.close()
        return {
            'resCode': 0,
            'resStr': '删除方案出错!',
            'resObject': '',
            'resList': []
        }
Пример #13
0
def startProgramAction(pname, num):
    try:
        client = getMongoClient()
        programsDb = client['programs']
        programsCol = programsDb['programs']
        query = {'pname': pname}
        newStatus = {"$set": {"status": num}}
        programsCol.update_one(query, newStatus)
        client.close()
        return {
            'resCode': 1,
            'resStr': '启动方案成功!',
            'resObject': '',
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '启动方案出错!',
            'resObject': '',
            'resList': []
        }
Пример #14
0
def getMapHotDataAction(clusterDbName):
    try:
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        addressInfo = dict()
        for cluster in clusters:
            hot = cluster['summary']['hot']
            geo_infer = cluster['geo_infer']
            for geoItem in geo_infer:
                address = geoItem['address']
                lat = geoItem['lat']
                lon = geoItem['lon']
                freq = geoItem['freq']
                country = geoItem['country']
                # print(address + ';' + country + ';' + str(centerLat) + ';' + str(centerLng) + ';' + str(hot))
                if address:
                    if address in addressInfo.keys():
                        addressInfo[address]['hotSum'] += hot * freq
                        addressInfo[address]['hotCount'] += 1
                    else:
                        addressInfo[address] = {
                            'hotSum': hot,
                            'hotCount': 1,
                            'geo': {
                                'lat': lat,
                                'lng': lon
                            },
                            'country': country
                        }
        resultInfo = {'addressInfo': {}, 'countryInfo': []}
        countryInfoDic = dict()
        # 计算各个address的热点平均值
        for key in addressInfo.keys():
            hotAve = addressInfo[key]['hotSum'] / addressInfo[key]['hotCount']
            geo = addressInfo[key]['geo']
            country = addressInfo[key]['country']
            resultInfo['addressInfo'][key] = {'hot': hotAve, 'geo': geo}
            if country:
                if country in countryInfoDic.keys():
                    countryInfoDic[country] += hotAve
                else:
                    countryInfoDic[country] = hotAve
        # 国家热度总值最多保留10个
        countryInfoList = sorted(countryInfoDic.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
        if len(countryInfoList) > 10:
            resultInfo['countryInfo'] = countryInfoList[0:10]
        else:
            resultInfo['countryInfo'] = countryInfoList
        client.close()
        return {
            'resCode': 1,
            'resStr': '获取热点地图数据成功',
            'resObject': resultInfo,
            'resList': []
        }
    except:
        return {
            'resCode': 0,
            'resStr': '热点地图数据库查询失败',
            'resObject': '',
            'resList': []
        }
Пример #15
0
    clusterTweetCol = db['clusterTweet']
    clusters = clusterResultCol.find()
    clusterTweetDict = dict()
    for cluster in clusters:
        cluid = cluster['cluid']
        query = {'cluid': cluid}
        tweetObjList = clusterTweetCol.find(query)
        clusterTweetDict[cluid] = list()
        for tweetObj in tweetObjList:
            clusterTweetDict[cluid].append(tweetObj['tweet'])
    return clusterTweetDict


if __name__ == '__main__':
    # getTweetWithTimePots(['2016-02-26'], 'cluster_natural_disaster')
    client = getMongoClient()
    db = client['cluster_2019HKProtest']
    clusterTweetCol = db['clusterTweet']
    tweetObjList = clusterTweetCol.find()
    tweetTimeDict = dict()
    for tweetObj in tweetObjList:
        tweet = tweetObj['tweet']
        time = tweet['created_at'][0:10]
        if time in tweetTimeDict.keys():
            tweetTimeDict[time] += 1
        else:
            tweetTimeDict[time] = 1
    sortedList = sorted(tweetTimeDict.items(),
                        key=lambda x: x[1],
                        reverse=True)
    for item in sortedList:
Пример #16
0
def getWordCloudDataAction(clusterDbName):
    try:
        client = getMongoClient()
        db = client[clusterDbName]
        clusterResultCol = db['clusterResult']
        clusters = clusterResultCol.find()
        keyWordsDic = dict()
        geoWordsDic = dict()
        for cluster in clusters:
            keywords = cluster['summary']['keywords']
            geowords = cluster['summary']['geowords']
            for word in keywords:
                if word in keyWordsDic.keys():
                    keyWordsDic[word] += 1
                else:
                    keyWordsDic[word] = 1
            for wordItem in geowords:
                word = wordItem[0]
                freq = wordItem[1]
                if word in geoWordsDic.keys():
                    geoWordsDic[word] += freq
                else:
                    geoWordsDic[word] = freq
        # cNameList = db.collection_names()
        # keyWordsDic = dict()
        # for cName in cNameList:
        #     # 取每个事件簇
        #     collection = db[cName]
        #     # 取全部子事件
        #     subEvents = collection.find()
        #     for subEvent in subEvents:
        #         keyWords = subEvent['summary']['keywords']
        #         for word in keyWords:
        #             if word in keyWordsDic.keys():
        #                 keyWordsDic[word] += 1
        #             else:
        #                 keyWordsDic[word] = 1
    except:
        return {
            'resCode': 0,
            'resStr': '词云数据库查询失败',
            'resObject': '',
            'resList': []
        }
    # 按频次排序
    sortedKeyWords = sorted(keyWordsDic.items(),
                            key=lambda x: x[1],
                            reverse=True)
    sortedGeoWords = sorted(geoWordsDic.items(),
                            key=lambda x: x[1],
                            reverse=True)
    # for item in sortedKeyWords:
    #     print(item[0] + ':' + str(item[1]))
    # 显示结果保留最多40个  30个keywords 10个geowords
    result = list()
    if len(sortedKeyWords) > 30:
        tempList = sortedKeyWords[0:30]
    else:
        tempList = sortedKeyWords
    if len(sortedGeoWords) > 10:
        tempList += sortedGeoWords[0:10]
    else:
        tempList += sortedGeoWords
    for item in tempList:
        result.append({'name': item[0], 'value': item[1]})
    client.close()
    return {
        'resCode': 1,
        'resStr': '获取词云数据成功',
        'resObject': '',
        'resList': result
    }
Пример #17
0
def getEventSummaryProgramDataAction(pname, clusterTweetDict):
    try:
        client = getMongoClient()
        db = client['programs']
        col = db['programs']
        query = {'pname': pname}
        resProgram = list(col.find(query))
        resLen = len(resProgram)
        if resLen > 1:
            client.close()
            return {
                'resCode': 2,
                'resStr': '监控方案名' + pname + '数据库冗余',
                'resObject': '',
                'resList': []
            }
        elif resLen == 0:
            client.close()
            return {
                'resCode': 2,
                'resStr': '监控方案' + pname + '不存在',
                'resObject': '',
                'resList': []
            }
        else:
            result = '根据您设置的\"' + pname + '\"事件的检测条件,'
            resProgram = resProgram[0]
            # 组织关键词
            keywords = resProgram['keywords']
            keywordsList = list()
            for group in keywords:
                groupList = getProgramWordsListFromExp(group['exp'])
                for word in groupList:
                    if word not in keywordsList:
                        keywordsList.append(word)
                        if len(keywordsList) >= 5:
                            break
                if len(keywordsList) >= 5:
                    break
            result += '我们围绕着' + getEventSummaryWordsStr(keywordsList) + '等关键词,'
            # 组织过滤词
            ignorewords = resProgram['ignorewords']
            if ignorewords:
                ignorewordsList = list()
                for word in getProgramWordsListFromExp(ignorewords):
                    if word not in ignorewordsList:
                        ignorewordsList.append(word)
                        if len(ignorewordsList) >= 5:
                            break
                result += getEventSummaryWordsStr(ignorewordsList) + '等过滤词,'
            # 组织话题
            topics = resProgram['topics']
            if topics:
                topicsList = list()
                for word in getProgramWordsListFromExp(topics):
                    if word not in topicsList:
                        topicsList.append(word)
                        if len(topicsList) >= 5:
                            break
                result += getEventSummaryWordsStr(topicsList) + '等话题,'
            # 组织其他条件
            maxNum = resProgram['maxNum']
            result += '以每次查询最多' + str(maxNum) + '条推文的方式,'
            stime = resProgram['stime']
            etime = resProgram['etime']
            result += '采集了从' + stime + '到' + etime + '的推文数据,'
            allTweetNum = getAllTweetListLength(clusterTweetDict)
            result += '对其中与事件相关的' + str(allTweetNum) + '条推文进行了统计分析。'
            # print(result)
            client.close()
            return {
                'resCode': 1,
                'resStr': '获取事件分析结果概述-方案描述数据成功',
                'resObject': result,
                'resList': []
            }
    except:
        return {
            'resCode': 0,
            'resStr': '事件分析结果概述-方案描述数据库查询失败',
            'resObject': '',
            'resList': []
        }