Пример #1
0
def utils_search(wxbot_id, period, startdate='', enddate=''):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts))
    index_names.reverse()
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"xnr_id": xnr_puid}},
                            {"term":{"sensitive_flag": 1}},
                            {'term':{'msg_type':'Text'.lower()}}
                        ]
                    }
                }
            }
        },
        "size": MAX_VALUE,
        "sort":{"sensitive_value":{"order":"desc"}}
    }
    results = []
    for index_name in index_names:
        try:
            search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body)
            if search_result:
                results.extend(search_result['hits']['hits'])
        except Exception,e:
            pass
Пример #2
0
def utils_show_report_content(wxbot_id, report_type, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    result = []
    query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'report_type': report_type}},
                        {'range':{'report_time':{'gte':start_ts, 'lte':end_ts}}},
                        {'term':{'xnr_user_no': wxbot_id}},
                        {'term':{'xnr_puid': xnr_puid}}
                        ]
                    }
                },
            'size': MAX_SEARCH_SIZE,
            'sort': [{'report_time':{'order':'desc'}}]
            }
    try:
        wx_report_management_mappings()
        es_result = es_xnr.search(index=wx_report_management_index_name, doc_type=wx_report_management_index_type, body=query_body)['hits']['hits']
        if es_result:
            result = [item['_source'] for item in es_result]
    except Exception,e:
        print e
Пример #3
0
def utils_show_sensitive_users(wxbot_id, period, startdate='', enddate=''):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts))
    index_names.reverse()
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"xnr_id": xnr_puid}},
                            {"term":{"sensitive_flag": 1}},
                            {'term':{'msg_type':'Text'.lower()}}
                        ]
                    }
                }
            }
        },
        "aggs":{
            "sen_users":{
                "terms":{"field": "speaker_id"}
            }
        },
        "sort":{"timestamp":{"order":"desc"}}
    }
    sensitive_users = {}
    for index_name in index_names:
        try:
            search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body)
            if search_result:
                res = search_result['aggregations']['sen_users']['buckets']
                docs = search_result['hits']['hits']
                for r in res:
                    groups_list = []
                    speaker_id = r['key']
                    count = r['doc_count']
                    for doc in docs:
                        if doc['_source']['speaker_id'] == speaker_id:
                            groups_list.append(doc['_source']['group_name'])
                    if speaker_id in sensitive_users:
                        #update: groups&count。因为是倒序查询,所以last_speak_ts在最初创建的时候就是最终的值,不需要更新。
                        sensitive_users[speaker_id]['count'] += count
                        sensitive_users[speaker_id]['groups_list'].extend(groups_list)
                    else:
                        #匹配第一条即可
                        for doc in docs:
                            if doc['_source']['speaker_id'] == speaker_id:
                                nickname = doc['_source']['speaker_name']
                                last_speak_ts = doc['_source']['timestamp']
                                break
                        sensitive_users[speaker_id] = {
                            'nickname': nickname,
                            'count': count,
                            'last_speak_ts': last_speak_ts,
                            'groups_list': groups_list
                        }
        except Exception,e:
            pass
Пример #4
0
def utils_show_report_content(wxbot_id, report_type, period, startdate,
                              enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    result = []
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'report_type': report_type
                    }
                }, {
                    'range': {
                        'report_time': {
                            'gte': start_ts,
                            'lte': end_ts
                        }
                    }
                }, {
                    'term': {
                        'xnr_user_no': wxbot_id
                    }
                }, {
                    'term': {
                        'xnr_puid': xnr_puid
                    }
                }]
            }
        },
        'size': MAX_SEARCH_SIZE,
        'sort': [{
            'report_time': {
                'order': 'desc'
            }
        }]
    }
    try:
        wx_report_management_mappings()
        es_result = es_xnr.search(index=wx_report_management_index_name,
                                  doc_type=wx_report_management_index_type,
                                  body=query_body)['hits']['hits']
        if es_result:
            for item in es_result:
                try:
                    data = item['_source']
                    data['_id'] = item['_id']
                    report_content = eval(item['_source']['report_content'])
                    data['sensitive_value'] = report_content['sensitive_value']
                    data['text'] = report_content['text']
                    data['sensitive_words_string'] = report_content[
                        'sensitive_words_string'].decode('utf8')
                    data.pop('report_content')
                    result.append(data)
                except:
                    pass
    except Exception, e:
        print 'wx_report_management Exception: ', str(e)
Пример #5
0
def utils_get_safe(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:     #获取今天的数据
        current_time = datetime2ts(current_date)
        last_date = ts2datetime(current_time-DAY)

        speak_dict = {}
        speak_dict['speak_day'] = {}
        speak_dict['speak_total'] = {}
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']

        #获取xnr今日发言总数
        today_count = 0
        query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'speaker_id': xnr_puid}},
                        {'term':{'xnr_id':xnr_puid}}
                    ]
                }
            }
        } 
        today_index_name = wx_group_message_index_name_pre + current_date
        try:
            today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body)
            if today_count_result['_shards']['successful'] != 0:
                today_count = today_count_result['count']
        except Exception,e:
            pass
			#print 'today_count Exception: ', str(e)



        #获取xnr历史发言总数
        total_count = 0
        total_query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_user_no': wxbot_id}},
                        {'term':{'puid':xnr_puid}},
                        {'term':{'date_time':last_date}}
                    ]
                }
            }
        }
        total_index_name = wx_xnr_history_count_index_name
        try:
            total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body)
            if total_count_result['_shards']['successful'] != 0:
                total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num']
        except Exception,e:
            pass
Пример #6
0
def get_penetration_num(xnr_user_no):
    current_timestamp = int(time.time() - DAY)
    current_date = ts2datetime(current_timestamp)
    current_time = datetime2ts(current_date)

    xnr_data = load_wxxnr_redis_data(wxbot_id=xnr_user_no,
                                     items=['puid', 'groups_list'])
    puid = xnr_data['puid']
    group_list = xnr_data['groups_list']

    #查询1
    sensitive_value = 0
    wx_group_message_index_name = wx_group_message_index_name_pre + current_date
    query_body_info = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'terms': {
                                'group_id': group_list
                            }
                        }, {
                            'range': {
                                'sensitive_value': {
                                    'gte': -1
                                }
                            }
                        }]
                    }
                }
            }
        },
        'aggs': {
            'avg_sensitive': {
                'avg': {
                    'field': 'sensitive_value'
                }
            }
        }
    }
    try:
        es_sensitive_result = es_xnr.search(
            index=wx_group_message_index_name,
            doc_type=wx_group_message_index_type,
            body=query_body_info)['aggregations']
        sensitive_value = es_sensitive_result['avg_sensitive']['value']
        if sensitive_value == None:
            sensitive_value = 0
    except Exception, e:
        print 'sensitive_value Exception: ', str(e)
Пример #7
0
def utils_get_influence(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:  #获取今天的数据
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id,
                                         items=['puid'])['puid']
        current_time = datetime2ts(current_date)
        query_at_num = {
            'query': {
                'bool': {
                    'must': [{
                        'term': {
                            'xnr_id': xnr_puid
                        }
                    }, {
                        'term': {
                            'at_flag': 1
                        }
                    }]
                }
            }
        }
        #虚拟人今天被@数量
        at_num_xnr = 0
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        try:
            results_xnr = es_xnr.count(index=wx_group_message_index_name,
                                       doc_type=wx_group_message_index_type,
                                       body=query_at_num)
            if results_xnr['_shards']['successful'] != 0:
                at_num_xnr = results_xnr['count']
        except Exception, e:
            print 'at_num_xnr Exception: ', str(e)

        # 截止目前所有被@总数
        at_num_total = 0
        wx_group_message_index_list = get_wx_groupmessage_index_list(
            WX_GROUP_MESSAGE_START_DATE_ASSESSMENT, ts2datetime(current_time))
        for index_name in wx_group_message_index_list:
            try:
                r = es_xnr.count(index=index_name,
                                 doc_type=wx_group_message_index_type,
                                 body=query_at_num)
                if r['_shards']['successful'] != 0:
                    at_num_total += r['count']
            except Exception, e:
                pass
Пример #8
0
def utils_report_warning_content(wxbot_id, report_type, report_time, speaker_id, wx_content_info_str):
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    report_dict = {
        'report_type': report_type,
        'report_time': report_time,
        'xnr_user_no': wxbot_id,
        'xnr_puid': xnr_puid,
        'speaker_id': speaker_id,
        'report_content': wx_content_info_str
    }
    report_id = wxbot_id + '_' + str(report_time)
    mark = 0
    try:
        wx_report_management_mappings()
        es_xnr.index(index=wx_report_management_index_name, doc_type=wx_report_management_index_type, id=report_id,body=report_dict)
        mark = 1
    except Exception,e:
        print e
Пример #9
0
def utils_get_penetration(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:  #获取今天的数据
        current_time = datetime2ts(current_date)
        xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id,
                                         items=['puid', 'groups_list'])
        puid = xnr_data['puid']
        group_list = xnr_data['groups_list']

        #查询1
        sensitive_value = 0
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        query_body_info = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'terms': {
                                    'group_id': group_list
                                }
                            }, {
                                'range': {
                                    'sensitive_value': {
                                        'gte': -1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'aggs': {
                'avg_sensitive': {
                    'avg': {
                        'field': 'sensitive_value'
                    }
                }
            }
        }
        try:
            es_sensitive_result = es_xnr.search(
                index=wx_group_message_index_name,
                doc_type=wx_group_message_index_type,
                body=query_body_info)['aggregations']
            sensitive_value = es_sensitive_result['avg_sensitive']['value']
            if sensitive_value == None:
                sensitive_value = 0
        except Exception, e:
            print 'sensitive_value Exception: ', str(e)

        #查询2
        max_sensitive = 0
        query_body_max = {
            "query": {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    'terms': {
                                        'group_id': group_list
                                    }
                                },
                                {
                                    "range": {
                                        "sensitive_value":
                                        {  #不会写exists语句,就用这个代替吧
                                            "gte": -1
                                        }
                                    }
                                }
                            ]
                        }
                    }
                }
            },
            'sort': {
                'sensitive_value': {
                    'order': 'desc'
                }
            }
        }
        try:
            max_results = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\
                            body=query_body_max)['hits']['hits']
            max_sensitive = max_results[0]['_source']['sensitive_value']
        except Exception, e:
            print 'max_sensitive Exception: ', str(e)
Пример #10
0
def utils_get_safe(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:     #获取今天的数据
        current_time = datetime2ts(current_date)
        last_date = ts2datetime(current_time-DAY)

        speak_dict = {}
        speak_dict['speak_day'] = {}
        speak_dict['speak_total'] = {}
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']

        #获取今日发言总数
        query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'speaker_id': xnr_puid}},
                        {'term':{'xnr_id':xnr_puid}}
                    ]
                }
            }
        } 
        today_index_name = wx_group_message_index_name_pre + current_date
        today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body)
        if today_count_result['_shards']['successful'] != 0:
            today_count = today_count_result['count']
        else:
            print 'es index rank error'
            today_count = 0
        #获取历史发言总数
        total_query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_user_no': wxbot_id}},
                        {'term':{'puid':xnr_puid}},
                        {'term':{'date_time':last_date}}
                    ]
                }
            }
        }
        total_index_name = wx_xnr_history_count_index_name
        try:
            total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body)
            if total_count_result['_shards']['successful'] != 0:
                total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num']
        except Exception,e:
            print e
            total_count = 0
        #包括今天在内的发言总数
        total_count_totay = total_count + today_count
        #发言次数最大值
        query_body_total_day = {
            'query':{
                'filtered':{
                    'filter':{
                        'term':{'xnr_id':xnr_puid}
                    }
                }
            },
            'aggs':{
                'all_speakers':{
                    'terms':{'field':'speaker_id',"order" : { "_count" : "desc" }}
                }
            }
        }
        try:
            results_total_day = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\
                        body=query_body_total_day)['aggregations']['all_speakers']['buckets']
            speaker_max = results_total_day[0]['doc_count']
        except:
            speaker_max = today_count
        #整合
        speak_dict = dict()
        speak_dict['speak_today'] = {}
        speak_dict['speak_total'] = {}
        speak_dict['speak_today'][current_time] = today_count
        speak_dict['speak_total'][current_time] = total_count_totay
        safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100
        safe_active = round(safe_active,2)  # 保留两位小数
        speak_dict['mark'] = safe_active
        return speak_dict
Пример #11
0
def utils_get_influence(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:    #获取今天的数据
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
        current_time = datetime2ts(current_date)
        query_at_num = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'term':{'at_flag':1}}
                    ]
                }
            }
        }
        #虚拟人今天被@数量
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        try:
            results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if results_xnr['_shards']['successful'] != 0:
               at_num_xnr = results_xnr['count']
            else:
                print 'es index rank error'
                at_num_xnr = 0
        except:
            at_num_xnr = 0
        # 截止目前所有被@总数
        wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time))
        at_num_total = 0
        for index_name in wx_group_message_index_list:
            r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if r['_shards']['successful'] != 0:
                at_num_total += r['count']
        #查询所有人被@的次数
        query_body_total_day = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'wildcard':{'text':'*'+'@'+'*'}}
                    ]
                }
            }
        }
        try:
            results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day)
            if results_total_day['_shards']['successful'] != 0:
               at_num_total_day = results_total_day['count']
            else:
                print 'es index rank error'
                at_num_total_day = 0
        except:
            at_num_total_day = 0
        #统计
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        at_dict['at_day'][current_time] = at_num_xnr
        at_dict['at_total'][current_time] = at_num_total
        influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100
        influence = round(influence,2)  # 保留两位小数
        at_dict['mark'] = influence
        return at_dict
    else:
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'xnr_user_no':wxbot_id}},
                                {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_SEARCH_SIZE,
            'sort':{'timestamp':{'order':'asc'}}
        }
        search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\
                        body=query_body)['hits']['hits']
        #初始化
        ts_list = load_timestamp_list(start_ts, end_ts)
        for ts in ts_list:
            at_dict['at_day'][ts] = 0
            at_dict['at_total'][ts] = 0
        at_dict['mark'] = 0
        #填充数据
        for result in search_results:
            result = result['_source']
            timestamp = result['timestamp']
            at_dict['at_day'][timestamp] = result['daily_be_at_num']
            at_dict['at_total'][timestamp] = result['total_be_at_num']
            at_dict['mark'] = result['influence']
        return at_dict
Пример #12
0
def utils_get_penetration(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0 :    #获取今天的数据
        current_time = datetime2ts(current_date)

        xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid','groups_list'])
        puid = xnr_data['puid']
        group_list = xnr_data['groups_list']
        
        #查询1
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        query_body_info = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'group_id':group_list}
                    }
                }
            },
            'aggs':{
                'avg_sensitive':{
                    'avg':{
                        'field':'sensitive_value'
                    }
                }
            }
        }
        try:
            es_sensitive_result = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_info)['aggregations']
            sensitive_value = es_sensitive_result['avg_sensitive']['value']
            if sensitive_value == None:
                sensitive_value = 0
        except:
            sensitive_value = 0
        #查询2
        query_body_max = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'group_id':group_list}
                    }
                }
            },
            'sort':{'sensitive_value':{'order':'desc'}}
        }
        try:
            max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\
                            body=query_body_max)['hits']['hits']
            max_sensitive = max_results[0]['_source']['sensitive_value']
        except:
            max_sensitive = 0
        #统计
        follow_group_sensitive = {'sensitive_info': {current_time: sensitive_value}}
        penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100
        penetration = round(penetration,2)
        follow_group_sensitive['mark'] = penetration
        return follow_group_sensitive
    else:  
        follow_group_sensitive = {}
        follow_group_sensitive['sensitive_info'] = {}
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'xnr_user_no':wxbot_id}},
                                {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_SEARCH_SIZE,
            'sort':{'timestamp':{'order':'asc'}}
        }
        search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\
                        body=query_body)['hits']['hits']
        #初始化
        ts_list = load_timestamp_list(start_ts, end_ts)
        for ts in ts_list:
            follow_group_sensitive['sensitive_info'][ts]  = 0
        follow_group_sensitive['mark'] = 0 
        #填充数据
        for result in search_results:
            result = result['_source']
            timestamp = result['timestamp']
            follow_group_sensitive['sensitive_info'][timestamp] = result['daily_sensitive_num']
            follow_group_sensitive['mark'] = result['penetration']
        return follow_group_sensitive