def utils_search(wxbot_id, period, startdate='', enddate=''): start_ts, end_ts, period = dump_date(period, startdate, enddate) index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts)) index_names.reverse() xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] query_body = { "query": { "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"xnr_id": xnr_puid}}, {"term":{"sensitive_flag": 1}}, {'term':{'msg_type':'Text'.lower()}} ] } } } }, "size": MAX_VALUE, "sort":{"sensitive_value":{"order":"desc"}} } results = [] for index_name in index_names: try: search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body) if search_result: results.extend(search_result['hits']['hits']) except Exception,e: pass
def utils_show_sensitive_users(wxbot_id, period, startdate='', enddate=''): start_ts, end_ts, period = dump_date(period, startdate, enddate) index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts)) index_names.reverse() xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] query_body = { "query": { "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"xnr_id": xnr_puid}}, {"term":{"sensitive_flag": 1}}, {'term':{'msg_type':'Text'.lower()}} ] } } } }, "aggs":{ "sen_users":{ "terms":{"field": "speaker_id"} } }, "sort":{"timestamp":{"order":"desc"}} } sensitive_users = {} for index_name in index_names: try: search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body) if search_result: res = search_result['aggregations']['sen_users']['buckets'] docs = search_result['hits']['hits'] for r in res: groups_list = [] speaker_id = r['key'] count = r['doc_count'] for doc in docs: if doc['_source']['speaker_id'] == speaker_id: groups_list.append(doc['_source']['group_name']) if speaker_id in sensitive_users: #update: groups&count。因为是倒序查询,所以last_speak_ts在最初创建的时候就是最终的值,不需要更新。 sensitive_users[speaker_id]['count'] += count sensitive_users[speaker_id]['groups_list'].extend(groups_list) else: #匹配第一条即可 for doc in docs: if doc['_source']['speaker_id'] == speaker_id: nickname = doc['_source']['speaker_name'] last_speak_ts = doc['_source']['timestamp'] break sensitive_users[speaker_id] = { 'nickname': nickname, 'count': count, 'last_speak_ts': last_speak_ts, 'groups_list': groups_list } except Exception,e: pass
def utils_get_influence(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] current_time = datetime2ts(current_date) query_at_num = { 'query': { 'bool': { 'must': [{ 'term': { 'xnr_id': xnr_puid } }, { 'term': { 'at_flag': 1 } }] } } } #虚拟人今天被@数量 at_num_xnr = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name, doc_type=wx_group_message_index_type, body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] except Exception, e: print 'at_num_xnr Exception: ', str(e) # 截止目前所有被@总数 at_num_total = 0 wx_group_message_index_list = get_wx_groupmessage_index_list( WX_GROUP_MESSAGE_START_DATE_ASSESSMENT, ts2datetime(current_time)) for index_name in wx_group_message_index_list: try: r = es_xnr.count(index=index_name, doc_type=wx_group_message_index_type, body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] except Exception, e: pass
} } #虚拟人今天被@数量 at_num_xnr = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] except Exception,e: #print 'at_num_xnr Exception: ', str(e) pass # 截止目前所有被@总数 at_num_total = 0 wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time)) for index_name in wx_group_message_index_list: try: r = es_xnr.count(index=index_name,doc_type=wx_group_message_index_type,body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] except Exception,e: pass #查询所有人被@的次数 at_num_total_day = 0 query_body_total_day = { 'query':{ 'bool':{ 'must':[
def utils_get_influence(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] current_time = datetime2ts(current_date) query_at_num = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'term':{'at_flag':1}} ] } } } #虚拟人今天被@数量 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] else: print 'es index rank error' at_num_xnr = 0 except: at_num_xnr = 0 # 截止目前所有被@总数 wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time)) at_num_total = 0 for index_name in wx_group_message_index_list: r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] #查询所有人被@的次数 query_body_total_day = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'wildcard':{'text':'*'+'@'+'*'}} ] } } } try: results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day) if results_total_day['_shards']['successful'] != 0: at_num_total_day = results_total_day['count'] else: print 'es index rank error' at_num_total_day = 0 except: at_num_total_day = 0 #统计 at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} at_dict['at_day'][current_time] = at_num_xnr at_dict['at_total'][current_time] = at_num_total influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100 influence = round(influence,2) # 保留两位小数 at_dict['mark'] = influence return at_dict else: at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':wxbot_id}}, {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}} ] } } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'timestamp':{'order':'asc'}} } search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\ body=query_body)['hits']['hits'] #初始化 ts_list = load_timestamp_list(start_ts, end_ts) for ts in ts_list: at_dict['at_day'][ts] = 0 at_dict['at_total'][ts] = 0 at_dict['mark'] = 0 #填充数据 for result in search_results: result = result['_source'] timestamp = result['timestamp'] at_dict['at_day'][timestamp] = result['daily_be_at_num'] at_dict['at_total'][timestamp] = result['total_be_at_num'] at_dict['mark'] = result['influence'] return at_dict