def show_trace_community(xnr_user_no,now_time): if S_TYPE == 'test': now_time = datetime2ts(WEIBO_COMMUNITY_DATE) else: pass query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':xnr_user_no}}, {'terms':{'community_status':[1,-2]}} ] } } } } } weibo_community_index_name = get_community_index(now_time) #print 'weibo_community_index_name:',weibo_community_index_name try: community_result = es_xnr.search(index = weibo_community_index_name,doc_type = weibo_community_index_type,body = query_body)['hits']['hits'] community_list = [] for item in community_result: #跟踪判断提示 if item['_source']['warning_remind'] >= 3: item['_source']['trace_message'] = u'该社区已经连续3周未出现预警,请选择放弃跟踪或强制跟踪!' else: item['_source']['trace_message'] = u'' # print 'community_id::',item['_id'] community_list.append(item['_source']) community_list.sort(key=lambda k:(k.get('warning_rank',0)),reverse=True) except: community_list = [] return community_list
def show_date_warning(account_name,start_time,end_time): if S_TYPE == 'test': test_today_date = TWITTER_FLOW_START_DATE test_time_gap = end_time - start_time today_datetime = datetime2ts(test_today_date) end_time = today_datetime start_time = end_time - test_time_gap end_datetime = datetime2ts(ts2datetime(end_time)) start_datetime = datetime2ts(ts2datetime(start_time)) else: now_time = int(time.time()) today_datetime = datetime2ts(ts2datetime(now_time)) end_datetime = datetime2ts(ts2datetime(end_time)) start_datetime = datetime2ts(ts2datetime(start_time)) result=lookup_date_info(account_name,start_time,end_time,today_datetime) #print 'result',result return result
def utils_get_penetration(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid', 'groups_list']) puid = xnr_data['puid'] group_list = xnr_data['groups_list'] #查询1 sensitive_value = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date query_body_info = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'terms': { 'group_id': group_list } }, { 'range': { 'sensitive_value': { 'gte': -1 } } }] } } } }, 'aggs': { 'avg_sensitive': { 'avg': { 'field': 'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search( index=wx_group_message_index_name, doc_type=wx_group_message_index_type, body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0 except Exception, e: print 'sensitive_value Exception: ', str(e) #查询2 max_sensitive = 0 query_body_max = { "query": { "filtered": { "filter": { "bool": { "must": [ { 'terms': { 'group_id': group_list } }, { "range": { "sensitive_value": { #不会写exists语句,就用这个代替吧 "gte": -1 } } } ] } } } }, 'sort': { 'sensitive_value': { 'order': 'desc' } } } try: max_results = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\ body=query_body_max)['hits']['hits'] max_sensitive = max_results[0]['_source']['sensitive_value'] except Exception, e: print 'max_sensitive Exception: ', str(e)
def get_generate_example_model(domain_name,role_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') role_en = fb_domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=task_id)['_source'] item = es_result print 'es_result:::',es_result # 政治倾向 political_side = json.loads(item['political_side'])[0][0] if political_side == 'mid': item['political_side'] = u'中立' elif political_side == 'left': item['political_side'] = u'左倾' else: item['political_side'] = u'右倾' # 心理特征 psy_feature_list = [] psy_feature = json.loads(item['psy_feature']) for i in range(TOP_PSY_FEATURE): psy_feature_list.append(psy_feature[i][0]) item['psy_feature'] = '&'.join(psy_feature_list) role_group_uids = json.loads(item['member_uids']) if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name_list = get_flow_text_index_list(current_time) query_body_search = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'uid':role_group_uids} } } }, 'size':MAX_VALUE, '_source':['keywords_string'] } es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\ body=query_body_search)['hits']['hits'] keywords_string = '' for mget_item in es_keyword_results: keywords_string += '&' keywords_string += mget_item['_source']['keywords_string'] k_dict = extract_keywords(keywords_string) monitor_keywords_list = [] for item_item in k_dict: monitor_keywords_list.append(item_item.word.encode('utf-8')) item['monitor_keywords'] = ','.join(monitor_keywords_list) mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs'] item['nick_name'] = [] for mget_item in mget_results_user: if mget_item['found']: content = mget_item['_source'] item['nick_name'] = '' if content.has_key('name'): item['nick_name'] = content['name'] item['location'] = '' if content.has_key('location'): item['location'] = get_user_location(json.loads(content['location'])) item['gender'] = 0 if content.has_key('gender'): gender_str = content['gender'] if gender_str == 'male': gender = 1 elif gender_str == 'female': gender = 2 item['description'] = '' if content.has_key('description'): item['description'] = content['description'] item['business_goal'] = u'渗透' item['daily_interests'] = u'旅游' item['age'] = 30 item['career'] = u'自由职业' active_time_list_np = np.array(json.loads(item['active_time'])) active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME] item['active_time'] = active_time_list_np_sort.tolist() day_post_num_list = np.array(json.loads(item['day_post_num'])) item['day_post_num'] = np.mean(day_post_num_list).tolist() item['role_name'] = role_name task_id_new = 'fb_' + domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json' try: with open(example_model_file_name,"w") as dump_f: json.dump(item,dump_f) item_dict = dict() item_dict['domain_name'] = domain_name item_dict['role_name'] = role_name es.index(index=fb_example_model_index_name,doc_type=fb_example_model_index_type,\ body=item_dict,id=task_id_new) mark = True except: mark = False return mark
def utils_get_influence(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] current_time = datetime2ts(current_date) query_at_num = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'term':{'at_flag':1}} ] } } } #虚拟人今天被@数量 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] else: print 'es index rank error' at_num_xnr = 0 except: at_num_xnr = 0 # 截止目前所有被@总数 wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time)) at_num_total = 0 for index_name in wx_group_message_index_list: r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] #查询所有人被@的次数 query_body_total_day = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'wildcard':{'text':'*'+'@'+'*'}} ] } } } try: results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day) if results_total_day['_shards']['successful'] != 0: at_num_total_day = results_total_day['count'] else: print 'es index rank error' at_num_total_day = 0 except: at_num_total_day = 0 #统计 at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} at_dict['at_day'][current_time] = at_num_xnr at_dict['at_total'][current_time] = at_num_total influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100 influence = round(influence,2) # 保留两位小数 at_dict['mark'] = influence return at_dict else: at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':wxbot_id}}, {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}} ] } } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'timestamp':{'order':'asc'}} } search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\ body=query_body)['hits']['hits'] #初始化 ts_list = load_timestamp_list(start_ts, end_ts) for ts in ts_list: at_dict['at_day'][ts] = 0 at_dict['at_total'][ts] = 0 at_dict['mark'] = 0 #填充数据 for result in search_results: result = result['_source'] timestamp = result['timestamp'] at_dict['at_day'][timestamp] = result['daily_be_at_num'] at_dict['at_total'][timestamp] = result['total_be_at_num'] at_dict['mark'] = result['influence'] return at_dict
def lookup_active_user(classify_id, xnr_id, start_time, end_time): time_gap = end_time - start_time now_time = time.time() test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_FB) if S_TYPE == 'test': today_date_time = datetime2ts(S_DATE_FB) start_time = start_time - test_time_gap end_time = end_time - test_time_gap from_date_ts = datetime2ts(ts2datetime(start_time)) to_date_ts = datetime2ts(ts2datetime(end_time)) bci_index_name = fb_bci_index_name_pre + ''.join( ts2datetime(end_time - DAY)) userlist = lookup_xnr_friends(xnr_id) if classify_id == 1: condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}] elif classify_id == 2: condition_list = [{ 'bool': { 'must_not': [{ 'terms': { 'uid': userlist } }] } }] elif classify_id == 0: condition_list = [{'match_all': {}}] print userlist, classify_id, condition_list results = [] for item in condition_list: query_body = { 'query': item, 'size': HOT_WEIBO_NUM, #查询影响力排名前50的用户即可 'sort': { 'influence': { 'order': 'desc' } } } try: flow_text_exist=es_xnr.search(index=bci_index_name,\ doc_type=fb_bci_index_type,body=query_body)['hits']['hits'] search_uid_list = [ item['_source']['uid'] for item in flow_text_exist ] user_exist = es_xnr.search(index=facebook_user_index_name,\ doc_type=facebook_user_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits'] user_dict = dict() for item in user_exist: uid = item['_source']['uid'] user_dict[uid] = item['_source'] for item in flow_text_exist: influence = item['_source']['influence'] active = item['_source']['active'] uid = item['_source']['uid'] try: user_info = user_dict[uid] uname = user_info['name'] location = user_info['locale'] link = user_info['link'] except: uname = '' location = '' link = '' results.append({'uid':uid, 'influence':influence, 'active':active, \ 'uname': uname, 'location':location, 'link': link}) except Exception, e: print e results = []
def get_recommend_follows(task_detail): recommend_results = dict() # daily_interests_list = task_detail['daily_interests'].split(',') monitor_keywords_list = task_detail['monitor_keywords'].split(',') create_time = time.time() if S_TYPE == 'test': create_time = datetime2ts(S_DATE) index_name_list = get_flow_text_index_list(create_time) '''#FB flow_text中没有daily_interests字段 ## 日常兴趣关注 try: query_body = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'daily_interests':daily_interests_list} } } }, # 'sort':{'user_fansnum':{'order':'desc'}}, 'size':DAILY_INTEREST_TOP_USER, '_source':['uid'] } es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body)['hits']['hits'] daily_interest_uid_set = set() for result in es_results: daily_interest_uid_set.add(result['_source']['uid']) daily_interest_uid_list = list(daily_interest_uid_set) es_daily_interests_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':daily_interest_uid_list})['docs'] nick_name_dict = {} es_daily_interests_results = es_daily_interests_results[:max(NICK_NAME_TOP,len(es_daily_interests_results))] for result in es_daily_interests_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['nick_name'] else: continue recommend_results['daily_interests'] = nick_name_dict except Exception,e: print e print '没有找到日常兴趣相符的用户' recommend_results['daily_interests'] = {} ''' ## 监测词关注 nest_query_list = [] #文本中可能存在英文或者繁体字,所以都匹配一下 monitor_en_keywords_list = trans(monitor_keywords_list, target_language='en') for i in range(len(monitor_keywords_list)): monitor_keyword = monitor_keywords_list[i] monitor_traditional_keyword = simplified2traditional(monitor_keyword) if len(monitor_en_keywords_list) == len(monitor_keywords_list): #确保翻译没出错 monitor_en_keyword = monitor_en_keywords_list[i] nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_en_keyword+'*'}}) nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}}) nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_traditional_keyword+'*'}}) try: query_body_monitor = { 'query':{ 'bool':{ # 'must':nest_query_list 'should':nest_query_list } }, # 'sort':{'user_fansnum':{'order':'desc'}}, 'size':MONITOR_TOP_USER, '_source':['uid'] } es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body_monitor)['hits']['hits'] monitor_keywords_uid_set = set() for result in es_results: monitor_keywords_uid_set.add(result['_source']['uid']) monitor_keywords_uid_list = list(monitor_keywords_uid_set) es_monitor_keywords_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':monitor_keywords_uid_list})['docs'] nick_name_dict = {} es_monitor_keywords_results = es_monitor_keywords_results[:max(NICK_NAME_TOP,len(es_monitor_keywords_results))] for result in es_monitor_keywords_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['username'] else: continue recommend_results['monitor_keywords'] = nick_name_dict except Exception,e: print e print '没有找到监测词相符的用户' recommend_results['monitor_keywords'] = {}
def search_by_xnr_number(xnr_qq_number, current_date, group_qq_name): group_qq_name_list = group_qq_name.encode('utf-8').split(',') # 用于显示操作页面初始的所有群历史信息 query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "xnr_qq_number": xnr_qq_number } }, { 'terms': { 'qq_group_nickname': group_qq_name_list } }] } } } }, "size": MAX_VALUE, "sort": { "timestamp": { "order": "desc" } } } enddate = current_date startdate = ts2datetime( datetime2ts(enddate) - group_message_windowsize * DAY) index_names = get_groupmessage_index_list(startdate, enddate) #print 'index_names::',index_names index_names.reverse() results = {} for index_name in index_names: # if not es_xnr.indices.exsits(index=index_name): # continue try: result = es_xnr.search(index=index_name, doc_type=group_message_index_type, body=query_body) if results != {}: results['hits']['hits'].extend(result['hits']['hits']) else: results = result #.copy() except: pass # results_new = [] # for index_name in index_names: # try: # es_results = es_xnr.search(index=index_name, doc_type=group_message_index_type,body=query_body)['hits']['hits'] # print 'es_results::',es_results # for es_result in es_results: # es_result = es_result['_source'] # results_new.append(es_result) # except: # continue return results
def lookup_active_weibouser(classify_id, weiboxnr_id, start_time, end_time): time_gap = end_time - start_time now_time = time.time() test_time_gap = datetime2ts( ts2datetime(now_time)) - datetime2ts(S_DATE_BCI) #print 'from, to:', ts2date(start_time), ts2date(end_time) today_date_time = end_time - DAY if S_TYPE == 'test': today_date_time = datetime2ts(S_DATE_BCI) start_time = start_time - test_time_gap end_time = end_time - test_time_gap from_date_ts = datetime2ts(ts2datetime(start_time)) to_date_ts = datetime2ts(ts2datetime(end_time)) #print 's_date_bci:', S_DATE_BCI #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts) bci_index_name = weibo_bci_index_name_pre + ''.join( ts2datetime(today_date_time).split('-')) print 'bci_index_name:', bci_index_name print 'end_time:', ts2date(end_time) #step1: users condition #make sure the users range by classify choice userlist = lookup_weiboxnr_concernedusers(weiboxnr_id) if classify_id == 1: #concrenedusers condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}] elif classify_id == 2: #unconcrenedusers condition_list = [{ 'bool': { 'must_not': [{ 'terms': { 'uid': userlist } }] } }] elif classify_id == 0: condition_list = [{'match_all': {}}] print userlist, classify_id, condition_list #step 2:lookup users user_max_index = count_maxweibouser_influence(end_time - DAY) results = [] for item in condition_list: query_body = { 'query': item, 'size': HOT_WEIBO_NUM, #查询影响力排名前50的用户即可 'sort': { 'user_index': { 'order': 'desc' } } } try: #print 'query_body:', query_body flow_text_exist=es_user_portrait.search(index=bci_index_name,\ doc_type=weibo_bci_index_type,body=query_body)['hits']['hits'] search_uid_list = [ item['_source']['user'] for item in flow_text_exist ] weibo_user_exist = es_user_profile.search(index=profile_index_name,\ doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits'] #print 'weibo_user_exist:', weibo_user_exist weibo_user_dict = dict() for item in weibo_user_exist: uid = item['_source']['uid'] weibo_user_dict[uid] = item['_source'] for item in flow_text_exist: #print 'item:', item['_source'] influence = item['_source']['user_index'] / user_max_index * 100 fans_num = item['_source']['user_fansnum'] friends_num = item['_source']['user_friendsnum'] total_number = item['_source']['total_number'] uid = item['_source']['user'] try: weibo_user_info = weibo_user_dict[uid] uname = weibo_user_info['nick_name'] location = weibo_user_info['user_location'] url = weibo_user_info['photo_url'] except: uname = '' location = '' url = '' #print 'uid:', uid results.append({'uid':uid, 'influence':influence, 'fans_num':fans_num, \ 'total_number':total_number, 'friends_num':friends_num,\ 'uname': uname, 'location':location, 'url': url}) #print 'results:', results ''' uid=item['_source']['uid'] #微博数 item['_source']['weibos_sum']=count_weibouser_weibosum(uid,end_time) #影响力 user_index=count_weibouser_index(uid,end_time) if user_max_index >0: item['_source']['influence']=user_index/user_max_index*100 else: item['_source']['influence']=0 if item['_source']['influence']>=INFLUENCE_MIN: results.append(item['_source']) ''' except: results = [] return results
def lookup_hot_posts(from_ts, to_ts, weiboxnr_id, classify_id, order_id): #step 1 :adjust the time condition for time time_gap = to_ts - from_ts now_time = time.time() test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE) #print 'from, to:', from_ts, to_ts if S_TYPE == 'test': today_date_time = datetime2ts(S_DATE) from_ts = from_ts - test_time_gap #to_ts = to_ts - test_time_gap to_ts = from_ts + MAX_FLOW_TEXT_DAYS * DAY from_date_ts = datetime2ts(ts2datetime(from_ts)) to_date_ts = datetime2ts(ts2datetime(to_ts)) #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts) #print from_date_ts,to_date_ts flow_text_index_name_list = [] days_num = MAX_FLOW_TEXT_DAYS for i in range(0, (days_num + 1)): date_range_start_ts = to_date_ts - i * DAY date_range_start_datetime = ts2datetime(date_range_start_ts) index_name = flow_text_index_name_pre + date_range_start_datetime if es_flow_text.indices.exists(index=index_name): flow_text_index_name_list.append(index_name) else: pass if order_id == 1: #按时间排序 sort_condition_list = [{'timestamp': {'order': 'desc'}}] elif order_id == 2: #按热度排序 sort_condition_list = [{'retweeted': {'order': 'desc'}}] elif order_id == 3: #按敏感度排序 sort_condition_list = [{'sensitive': {'order': 'desc'}}] #else: #默认设为按时间排序 # sort_condition_list=[{'timestamp':{'order':'desc'}}] userslist = lookup_weiboxnr_concernedusers(weiboxnr_id) #全部用户 0,已关注用户 1,未关注用户-1 range_time_list = { 'range': { 'timestamp': { 'gte': int(from_ts), 'lt': int(to_ts) } } } print range_time_list user_condition_list = [] if classify_id == 1: user_condition_list = [{ 'bool': { 'must': [{ 'terms': { 'uid': userslist } }, range_time_list] } }] elif classify_id == 2: user_condition_list = [{ 'bool': { 'must': [range_time_list], 'must_not': [{ 'terms': { 'uid': userslist } }] } }] elif classify_id == 0: user_condition_list = [{'bool': {'must': [range_time_list]}}] #print 'sort_condition_list',sort_condition_list #print 'user_condition_list',user_condition_list query_body = { 'query': { 'filtered': { 'filter': user_condition_list } }, 'size': HOT_WEIBO_NUM, 'sort': sort_condition_list } try: es_result=es_flow_text.search(index=flow_text_index_name_list,doc_type=flow_text_index_type,\ body=query_body)['hits']['hits'] hot_result = [] for item in es_result: item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) hot_result.append(item['_source']) except: hot_result = [] #print 'hot_result:', hot_result return hot_result
def show_event_warming(xnr_user_no): now_time = int(time.time()) #print 'first_time:',time.time() hashtag_list = get_hashtag() #print 'hashtag_list_time:',time.time() #print 'hashtag_list:::::::',hashtag_list if S_TYPE == 'test': test_day_date = S_DATE_EVENT_WARMING test_day_time = datetime2ts(test_day_date) flow_text_index_list = get_flow_text_index_list(test_day_time) #print flow_text_index_list hashtag_list = [('网络义勇军发布', 13), ('美国', 7), ('德国', 5), ('中国', 4), ('清真食品', 3), ('反邪动态', 2), ('台海观察', 2), ('雷哥微评', 2), ('中国军队', 1)] #hashtag_list=[('网络义勇军发布',13),('美国',7),('芒果TV',6),('德国',5),('中国',4),('清真食品',3),('反邪动态',2),('台海观察',2),('每日一药',2),('雷哥微评',2),('PPAP洗脑神曲',1),('中国军队',1)] #weibo_xnr_flow_text_listname=['flow_text_2016-11-26','flow_text_2016-11-25','flow_text_2016-11-24'] else: flow_text_index_list = get_flow_text_index_list(now_time) #weibo_xnr_flow_text_listname=get_xnr_flow_text_index_list(now_time) #print flow_text_index_list,hashtag_list #虚拟人的粉丝列表和关注列表 try: es_xnr_result = es_xnr.get( index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] followers_list = es_xnr_result['followers_list'] fans_list = es_xnr_result['fans_list'] except: followers_list = [] fans_list = [] #print 'weibo_xnr_fans_followers_time:',time.time() event_warming_list = [] for event_item in hashtag_list: #print event_item,event_item[0] event_sensitive_count = 0 event_warming_content = dict() #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间 event_warming_content['event_name'] = event_item[0] event_influence_sum = 0 event_time_sum = 0 query_body = { 'query': { 'bool': { 'should': { 'wildcard': { 'text': '*' + event_item[0] + '*' } } } } } try: event_results = es_flow_text.search( index=flow_text_index_list, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] weibo_result = [] fans_num_dict = dict() followers_num_dict = dict() alluser_num_dict = dict() #print event_results for item in event_results: if item['_source']['sensitive'] > 0: event_sensitive_count = event_sensitive_count + 1 #统计用户信息 if alluser_num_dict.has_key(str(item['_source']['uid'])): alluser_num_dict[str( item['_source']['uid'])] = alluser_num_dict[str( item['_source']['uid'])] + 1 else: alluser_num_dict[str(item['_source']['uid'])] = 1 for fans_uid in fans_list: if fans_uid == item['_source']['uid']: if fans_num_dict.has_key(str(fans_uid)): fans_num_dict[str(fans_uid)] = fans_num_dict[ str(fans_uid)] + 1 else: fans_num_dict[str(fans_uid)] = 1 for followers_uid in followers_list: if followers_uid == item['_source']['uid']: if followers_num_dict.has_key(str(followers_uid)): fans_num_dict[str( followers_uid )] = fans_num_dict[str(followers_uid)] + 1 else: fans_num_dict[str(followers_uid)] = 1 #计算影响力 origin_influence_value = (item['_source']['comment'] + item['_source']['retweeted']) * ( 1 + item['_source']['sensitive']) fans_value = judge_user_type(item['_source']['uid'], fans_list) followers_value = judge_user_type(item['_source']['uid'], followers_list) item['_source'][ 'weibo_influence_value'] = origin_influence_value * ( fans_value + followers_value) weibo_result.append(item['_source']) #统计影响力、时间 event_influence_sum = event_influence_sum + item[ '_source']['weibo_influence_value'] event_time_sum = item['_source']['timestamp'] #典型微博信息 weibo_result.sort(key=lambda k: (k.get('weibo_influence_value', 0)), reverse=True) event_warming_content['main_weibo_info'] = weibo_result #事件影响力和事件时间 number = len(event_results) event_warming_content[ 'event_influence'] = event_influence_sum / number event_warming_content['event_time'] = event_time_sum / number else: pass except: event_warming_content['main_weibo_info'] = [] event_warming_content['event_influence'] = [] event_warming_content['event_time'] = [] #print event_item[0],'event_search_time:',time.time() try: if event_sensitive_count > 0: #对用户进行排序 temp_userid_dict = union_dict(fans_num_dict, followers_num_dict) main_userid_dict = union_dict(temp_userid_dict, alluser_num_dict) main_userid_dict = sorted(main_userid_dict.items(), key=lambda d: d[1], reverse=True) main_userid_list = [] for i in xrange(0, len(main_userid_dict)): main_userid_list.append(main_userid_dict[i][0]) #print 'main_userid_list:',main_userid_list #主要参与用户信息 main_user_info = [] user_es_result = es_user_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={'ids': main_userid_list})['docs'] for item in user_es_result: #print 'item:',item #print 'found:',item['found'] #print 'id:',item['_id'] user_dict = dict() if item['found']: user_dict['photo_url'] = item['_source']['photo_url'] user_dict['uid'] = item['_id'] user_dict['nick_name'] = item['_source']['nick_name'] user_dict['favoritesnum'] = item['_source'][ 'favoritesnum'] user_dict['fansnum'] = item['_source']['fansnum'] else: user_dict['photo_url'] = '' user_dict['uid'] = item['_id'] user_dict['nick_name'] = '' user_dict['favoritesnum'] = 0 user_dict['fansnum'] = 0 main_user_info.append(user_dict) event_warming_content['main_user_info'] = main_user_info #print 'main_user_info:',main_user_info #print user_es_result ''' user_query_body={ 'query':{ 'filtered':{ 'filter':{ 'terms':{'uid':main_userid_list} } } } } user_es_result=es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,body=user_query_body)['hits']['hits'] #print user_es_result main_user_info=[] for item in user_es_result: main_user_info.append(item['_source']) event_warming_content['main_user_info']=main_user_info ''' else: event_warming_content['main_user_info'] = [] except: event_warming_content['main_user_info'] = [] #print 'user_search_time:',time.time() if event_sensitive_count > 0: #print event_warming_content['event_name'] event_warming_list.append(event_warming_content) else: pass #main_userid_list=['5536381570','2192435767','1070598590'] #user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list}) #print 'user_es_result',user_es_result #print 'end_time:',time.time() return event_warming_list
def show_personnal_warming(xnr_user_no, day_time): #查询关注列表 try: es_xnr_result = es_xnr.get( index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] followers_list = es_xnr_result['followers_list'] except: followers_list = [] #计算敏感度排名靠前的用户 query_body = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': followers_list } } } }, 'aggs': { 'followers_sensitive_num': { 'terms': { 'field': 'uid' }, 'aggs': { 'sensitive_num': { 'sum': { 'field': 'sensitive' } } } } }, 'size': MAX_VALUE } #测试状态下时间设置 if S_TYPE == 'test': test_day_date = S_DATE_BCI test_day_time = datetime2ts(test_day_date) flow_text_index_list = get_flow_text_index_list(test_day_time) else: flow_text_index_list = get_flow_text_index_list(day_time) #print flow_text_index_list try: first_sum_result=es_flow_text.search(index=flow_text_index_list,doc_type=flow_text_index_type,\ body=query_body)['aggregations']['followers_sensitive_num']['buckets'] except: first_sum_result = [] #print first_sum_result top_userlist = [] if USER_NUM < len(first_sum_result): temp_num = USER_NUM else: temp_num = len(first_sum_result) #print temp_num for i in xrange(0, temp_num): user_sensitive = first_sum_result[i]['sensitive_num']['value'] if user_sensitive > 0: user_dict = dict() user_dict['uid'] = first_sum_result[i]['key'] user_dict['sensitive'] = user_sensitive top_userlist.append(user_dict) else: pass #查询敏感用户的最敏感微博内容 results = [] for user in top_userlist: #print user user_detail = dict() user_detail['uid'] = user['uid'] user_detail['user_sensitive'] = user['sensitive'] try: user_result = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=user['uid'])['_source'] user_detail['user_name'] = user_result['nick_name'] except: user_detail['user_name'] = '' query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'uid': user['uid'] } }, { 'range': { 'sensitive': { 'gte': 1, 'lte': 100 } } }] } } } }, 'size': USER_CONTENT_NUM, 'sort': { 'sensitive': { 'order': 'desc' } } } #if S_TYPE == 'test': try: second_result = es_flow_text.search( index=flow_text_index_list, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] except: second_result = [] #else: # second_result=es_xnr.search(index=weibo_xnr_flow_text_listname,doc_type=xnr_flow_text_index_type,body=query_body)['hits']['hits'] s_result = [] tem_word_one = '静坐' tem_word_two = '集合' for item in second_result: sensitive_words = item['_source']['sensitive_words_string'] if ((sensitive_words == tem_word_one) or (sensitive_words == tem_word_two)): pass else: s_result.append(item['_source']) s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True) user_detail['content'] = s_result results.append(user_detail) results.sort(key=lambda k: (k.get('user_sensitive', 0)), reverse=True) return results
def show_speech_warming(xnr_user_no, show_type, day_time): #关注用户 try: es_xnr_result = es_xnr.get( index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] followers_list = es_xnr_result['followers_list'] except: followers_list = [] show_condition_list = [] if show_type == 0: #全部用户 show_condition_list.append( {'must': { 'range': { 'sensitive': { 'gte': 1, 'lte': 100 } } }}) elif show_type == 1: #关注用户 show_condition_list.append({ 'must': [{ 'terms': { 'uid': followers_list } }, { 'range': { 'sensitive': { 'gte': 1, 'lte': 100 } } }] }) elif show_type == 2: #未关注用户 show_condition_list.append({ 'must_not': { 'terms': { 'uid': followers_list } }, 'must': { 'range': { 'sensitive': { 'gte': 1, 'lte': 100 } } } }) query_body = { 'query': { 'filtered': { 'filter': { 'bool': show_condition_list[0] } } }, 'size': SPEECH_WARMING_NUM, 'sort': { 'sensitive': { 'order': 'desc' } } } #测试状态下时间设置 if S_TYPE == 'test': test_day_date = S_DATE_BCI test_day_time = datetime2ts(test_day_date) flow_text_index_list = get_flow_text_index_list(test_day_time) else: flow_text_index_list = get_flow_text_index_list(day_time) #try: results = es_flow_text.search(index=flow_text_index_list, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] result = [] un_id_list = [ '4045093692450438', '4045096116622444', '4045095374193153', '4045095567336676', '4045092304116237', '4045093297982719', '4045178576337277', '4044647661388452' ] for item in results: if item['_id'] in un_id_list: pass else: result.append(item['_source']) #except: # result=[] return result
def aggr_sen_users(xnr_qq_number, startdate, enddate): # print 'startdate:',startdate,type(startdate) start_ts = datetime2ts(startdate) end_ts = datetime2ts(enddate) query_body = { "query": { "bool": { "must": [{ 'term': { 'xnr_qq_number': xnr_qq_number } }, { "term": { "sensitive_flag": 1 } }, { 'range': { 'timestamp': { 'gte': start_ts, 'lt': end_ts } } }] } }, "aggs": { "all_senusers": { # "terms":{"field": "speaker_qq_number"} "terms": { "field": "speaker_nickname" } } } } #enddate = datetime.datetime.now().strftime('%Y-%m-%d') #startdate = ts2datetime(datetime2ts(enddate)-group_message_windowsize*DAY) index_names = get_groupmessage_index_list(startdate, enddate) print index_names results = [] for index_name in index_names: try: result = es_xnr.search(index=index_name,\ doc_type=group_message_index_type,\ body=query_body)["aggregations"]["all_senusers"]["buckets"] except Exception, e: result = [] print 'index_name,result:', index_name, result if result != []: for item in result: # print 'item:',item inner_item = {} # inner_item['qq_number'] = item['key'] inner_item['qq_nick'] = item['key'] inner_item['count'] = item['doc_count'] info = get_speaker_info(item['key'], index_name) if info == {}: # inner_item['qq_nick'] = '' inner_item['qq_number'] = '' inner_item['qq_groups'] = '' inner_item['last_speak_ts'] = '' inner_item['text'] = [] else: # inner_item['qq_nick'] = info['qq_nick'] inner_item['qq_number'] = info['qq_number'] inner_item['qq_groups'] = info['qq_groups'] inner_item['last_speak_ts'] = info['last_speak_ts'] inner_item['text'] = info['text'] flag = 1 for aa in results: #检验是否已经在结果中 # if aa['qq_number'] == inner_item['qq_number']: if aa['qq_nick'] == inner_item['qq_nick']: aa['count'] += inner_item['count'] aa['last_speak_ts'] = inner_item['last_speak_ts'] aa['qq_groups'].update( inner_item['qq_groups']) # 多个群发言的更新 aa['text'].extend(inner_item['text']) flag = 0 continue if flag: results.append(inner_item)
def utils_get_safe(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) last_date = ts2datetime(current_time - DAY) speak_dict = {} speak_dict['speak_day'] = {} speak_dict['speak_total'] = {} xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] #获取xnr今日发言总数 today_count = 0 query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'speaker_id': xnr_puid } }, { 'term': { 'xnr_id': xnr_puid } }] } } } today_index_name = wx_group_message_index_name_pre + current_date try: today_count_result = es_xnr.count( index=today_index_name, doc_type=wx_group_message_index_type, body=query_body) if today_count_result['_shards']['successful'] != 0: today_count = today_count_result['count'] except Exception, e: print 'today_count Exception: ', str(e) #获取xnr历史发言总数 total_count = 0 total_query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'xnr_user_no': wxbot_id } }, { 'term': { 'puid': xnr_puid } }, { 'term': { 'date_time': last_date } }] } } } total_index_name = wx_xnr_history_count_index_name try: total_count_result = es_xnr.search( index=total_index_name, doc_type=wx_xnr_history_count_index_type, body=total_query_body) if total_count_result['_shards']['successful'] != 0: total_count = total_count_result['hits']['hits'][0]['_source'][ 'total_post_num'] except Exception, e: print 'total_count Exception:', str(e)
def get_penetration_qq_today(xnr_user_no): follow_group_sensitive = {} follow_group_sensitive['sensitive_info'] = {} get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source'] qq_number = get_result['qq_number'] nickname = get_result['nickname'] #group_list = get_result['qq_groups'] group_list = [] group_info = json.loads(get_result['group_info']) for key, value_dict in group_info.iteritems(): group_name = value_dict['group_name'] group_list.extend(group_name) if S_TYPE == 'test': current_time = datetime2ts(QQ_S_DATE_ASSESSMENT) else: current_time = int(time.time()) current_date = ts2datetime(current_time) group_message_index_name = group_message_index_name_pre + current_date query_body_info = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'qq_group_nickname':group_list} } } }, 'aggs':{ 'avg_sensitive':{ 'avg':{ 'field':'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0.0 follow_group_sensitive['sensitive_info'][current_time] = round(sensitive_value,2) except: follow_group_sensitive['sensitive_info'][current_time] = 0 query_body_max = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'qq_group_nickname':group_list} } } }, 'sort':{'sensitive_value':{'order':'desc'}} } try: max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_max)['hits']['hits'] max_sensitive = max_results[0]['_source']['sensitive_value'] except: max_sensitive = 0 penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100 penetration = round(penetration,2) follow_group_sensitive['mark'] = penetration return follow_group_sensitive
def get_recommend_follows(task_detail): recommend_results = dict() daily_interests_list = task_detail['daily_interests'].encode('utf-8').split(',') monitor_keywords_list = task_detail['monitor_keywords'].encode('utf-8').split(',') #print 'daily_interests_list::',daily_interests_list create_time = time.time() if S_TYPE == 'test': create_time = datetime2ts(S_DATE) index_name_list = get_flow_text_index_list(create_time) ## 日常兴趣关注 try: query_body = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'daily_interests':daily_interests_list} } } }, 'sort':{'user_fansnum':{'order':'desc'}}, 'size':DAILY_INTEREST_TOP_USER, '_source':['uid'] } es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body)['hits']['hits'] daily_interest_uid_set = set() for result in es_results: daily_interest_uid_set.add(result['_source']['uid']) daily_interest_uid_list = list(daily_interest_uid_set) es_daily_interests_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':daily_interest_uid_list})['docs'] nick_name_dict = {} es_daily_interests_results = es_daily_interests_results[:max(NICK_NAME_TOP,len(es_daily_interests_results))] for result in es_daily_interests_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['nick_name'] else: continue recommend_results['daily_interests'] = nick_name_dict except: print '没有找到日常兴趣相符的用户' recommend_results['daily_interests'] = {} ## 监测词关注 nest_query_list = [] #print 'monitor_keywords_list:::',monitor_keywords_list for monitor_keyword in monitor_keywords_list: nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}}) #print 'nest_query_list::',nest_query_list try: query_body_monitor = { 'query':{ 'bool':{ 'must':nest_query_list } }, 'sort':{'user_fansnum':{'order':'desc'}}, 'size':MONITOR_TOP_USER, '_source':['uid'] } #print '123' es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body_monitor)['hits']['hits'] #print 'es_results::',es_results monitor_keywords_uid_set = set() for result in es_results: monitor_keywords_uid_set.add(result['_source']['uid']) monitor_keywords_uid_list = list(monitor_keywords_uid_set) es_monitor_keywords_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':monitor_keywords_uid_list})['docs'] nick_name_dict = {} es_monitor_keywords_results = es_monitor_keywords_results[:max(NICK_NAME_TOP,len(es_monitor_keywords_results))] for result in es_monitor_keywords_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['nick_name'] else: continue recommend_results['monitor_keywords'] = nick_name_dict except: print '没有找到监测词相符的用户' recommend_results['monitor_keywords'] = {} # print 'recommend_results::',recommend_results return recommend_results
def get_safe_qq_today(xnr_user_no): get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source'] qq_number = get_result['qq_number'] if S_TYPE == 'test': current_time = datetime2ts(QQ_S_DATE_ASSESSMENT) else: current_time = int(time.time()) current_date = ts2datetime(current_time) group_message_index_name = group_message_index_name_pre + current_date query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'speaker_qq_number':qq_number}}, {'term':{'xnr_qq_number':qq_number}} ] } } } count_result = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,body=query_body) if count_result['_shards']['successful'] != 0: today_count = count_result['count'] else: print 'es index rank error' today_count = 0 last_date = ts2datetime(current_time-DAY) qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + last_date try: get_result = es.get(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_count_index_type,\ id=_id_last)['_source'] total_count_history = get_result['total_post_num'] except: total_count_history = 0 total_count_totay = total_count_history + today_count item_dict = dict() item_dict['speak_today'] = {} item_dict['speak_total'] = {} item_dict['speak_today'][current_time] = today_count item_dict['speak_total'][current_time] = total_count_totay query_body_total_day = { 'query':{ 'filtered':{ 'filter':{ 'term':{'xnr_qq_number':qq_number} } } }, 'aggs':{ 'all_speakers':{ 'terms':{'field':'speaker_qq_number',"order" : { "_count" : "desc" }} } } } try: results_total_day = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_total_day)['aggregations']['all_speakers']['buckets'] speaker_max = results_total_day[0]['doc_count'] except: speaker_max = today_count safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100 safe_active = round(safe_active,2) # 保留两位小数 item_dict['mark'] = safe_active return item_dict
def show_event_warming(xnr_user_no,start_time,end_time): if S_TYPE == 'test': test_today_date = S_DATE_EVENT_WARMING test_time_gap = end_time - start_time today_datetime = datetime2ts(test_today_date) end_time = today_datetime start_time = end_time - test_time_gap end_datetime = datetime2ts(ts2datetime(end_time)) start_datetime = datetime2ts(ts2datetime(start_time)) else: now_time = int(time.time()) today_datetime = datetime2ts(ts2datetime(now_time)) end_datetime = datetime2ts(ts2datetime(end_time)) start_datetime = datetime2ts(ts2datetime(start_time)) event_warming=[] first_time=int(time.time()) if today_datetime > end_datetime : print 'aaaa' event_warming = lookup_history_event_warming(xnr_user_no,start_time,end_time) else: if end_datetime == start_datetime: print 'bbbbb' event_warming = create_event_warning(xnr_user_no,end_time,write_mark=False) else: print 'cccc' #print 'before_time',int(time.time()) today_event_warming = create_event_warning(xnr_user_no,end_time,write_mark=False) #print 'mid_time',int(time.time()) history_event_warming = lookup_history_event_warming(xnr_user_no,start_time,today_datetime) #print 'final_time',int(time.time()) #print 'today_event_warming:',today_event_warming #print 'history_event_warming:',history_event_warming history_event_warming.extend(today_event_warming) event_warming = history_event_warming #print 'event_warming:',event_warming #print start_datetime,end_datetime warming_list=[] event_name_list=[] #new_waining_list=[] for item in event_warming: event_name=item['event_name'] item['main_user_info']=json.loads(item['main_user_info']) item['main_weibo_info']=json.loads(item['main_weibo_info']) if event_name not in event_name_list: print 'event_name !!!!', event_name event_name_list.append(event_name) warming_list.append(item) else: old_event=[event for event in warming_list if event['event_name'] == event_name][0] new_warming_list = [event for event in warming_list if event['event_name'] != event_name] old_main_user_info = [event['main_user_info'] for event in warming_list if event['event_name'] == event_name][0] old_main_user_uids = [user['uid'] for user in old_main_user_info] now_uids = [u['uid'] for u in item['main_user_info']] new_uids = list(set(old_main_user_uids) - (set(old_main_user_uids) & set(now_uids))) print 'new_uid:',new_uids new_main_user_info = [] for uid in new_uids: uid_info = [u for u in item['main_user_info'] if u['uid'] == uid] if uid_info: new_main_user_info.append(uid_info[0]) else: pass old_event['main_user_info'].extend(new_main_user_info) old_main_weibo_info = [event['main_weibo_info'] for event in warming_list if event['event_name'] == event_name][0] old_main_mids = [content['mid'] for content in old_main_weibo_info] now_mids = [c['mid'] for c in item['main_weibo_info']] new_mids = list(set(old_main_mids) - (set(old_main_mids) & set(now_mids))) print 'new_mids',new_mids new_main_weibo_info = [] for mid in new_mids: mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid] new_main_weibo_info.append(mid_info[0]) old_event['main_weibo_info'].extend(new_main_weibo_info) old_event['event_influence']=old_event['event_influence']+item['event_influence'] new_warming_list.append(old_event) warming_list = new_warming_list if warming_list: warming_list.sort(key=lambda k:(k.get('event_influence',0)),reverse=True) else: pass final_time=int(time.time()) print 'time_coust:',final_time - first_time return warming_list
def get_influence_at_num_today(xnr_user_no): at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source'] qq_number = get_result['qq_number'] nickname = get_result['nickname'] if S_TYPE == 'test': current_time = datetime2ts(QQ_S_DATE_ASSESSMENT) else: current_time = int(time.time()) current_date = ts2datetime(current_time) group_message_index_name = group_message_index_name_pre + current_date #虚拟人今天被@数量 query_body_xnr = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_qq_number':qq_number}}, {'wildcard':{'text':'*'+'@ME'+'*'}} ] } } } try: results_xnr = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_xnr) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] else: print 'es index rank error' at_num_xnr = 0 except: at_num_xnr = 0 # 得到历史总数 current_time_last = current_time - DAY current_date_last = ts2datetime(current_time_last) qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + current_date_last try: result_last = es_xnr.get(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_be_at_index_type,id=xnr_user_no)['_source'] total_be_at_num_last = result_last['total_be_at_num'] except: total_be_at_num_last = 0 at_dict['at_day'][current_time] = at_num_xnr at_dict['at_total'][current_time]= at_num_xnr + total_be_at_num_last query_body_total_day = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_qq_number':qq_number}}, {'wildcard':{'text':'*'+'@'+'*'}} ] } } } try: results_total_day = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_total_day) if results_total_day['_shards']['successful'] != 0: at_num_total_day = results_total_day['count'] else: print 'es index rank error' at_num_total_day = 0 except: at_num_total_day = 0 influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100 influence = round(influence,2) # 保留两位小数 at_dict['mark'] = influence return at_dict
def lookup_hot_posts(from_ts, to_ts, xnr_id, classify_id, order_id): time_gap = to_ts - from_ts now_time = time.time() test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_FB) if S_TYPE == 'test': today_date_time = datetime2ts(S_DATE_FB) from_ts = from_ts - test_time_gap to_ts = to_ts - test_time_gap from_date_ts = datetime2ts(ts2datetime(from_ts)) to_date_ts = datetime2ts(ts2datetime(to_ts)) print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date( to_date_ts) print from_date_ts, to_date_ts flow_text_index_name_list = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, from_ts, to_ts) userslist = lookup_xnr_friends(xnr_id) #全部用户 0,好友 1,非好友-1 range_time_list = { 'range': { 'timestamp': { 'gte': int(from_ts), 'lt': int(to_ts) } } } # print range_time_list user_condition_list = [] if classify_id == 1: user_condition_list = [{ 'bool': { 'must': [{ 'terms': { 'uid': userslist } }, range_time_list] } }] elif classify_id == 2: user_condition_list = [{ 'bool': { 'must': [range_time_list], 'must_not': [{ 'terms': { 'uid': userslist } }] } }] elif classify_id == 0: user_condition_list = [{'bool': {'must': [range_time_list]}}] query_body = { 'query': { 'filtered': { 'filter': user_condition_list } }, 'size': MAX_HOT_POST_SIZE, 'sort': { 'timestamp': { 'order': 'desc' } } } # try: es_result=es_xnr.search(index=flow_text_index_name_list,doc_type=facebook_flow_text_index_type,\ body=query_body)['hits']['hits'] hot_result = [] for item in es_result: #查询三个指标字段 fid_result = lookup_fid_attend_index(item['_source']['fid'], from_ts, to_ts) if fid_result: item['_source']['comment'] = fid_result['comment'] item['_source']['share'] = fid_result['share'] item['_source']['favorite'] = fid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) hot_result.append(item['_source']) # except: # hot_result=[] if order_id == 1: #按时间排序 sort_condition = 'timestamp' elif order_id == 2: #按热度排序 sort_condition = 'retweeted' elif order_id == 3: #按敏感度排序 sort_condition = 'sensitive' else: #默认设为按时间排序 sort_conditiont = 'timestamp' if hot_result: hot_result.sort(key=lambda k: (k.get(sort_condition, 0)), reverse=True) hot_result = hot_result[:50] return hot_result
def utils_get_safe(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) last_date = ts2datetime(current_time-DAY) speak_dict = {} speak_dict['speak_day'] = {} speak_dict['speak_total'] = {} xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] #获取今日发言总数 query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'speaker_id': xnr_puid}}, {'term':{'xnr_id':xnr_puid}} ] } } } today_index_name = wx_group_message_index_name_pre + current_date today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body) if today_count_result['_shards']['successful'] != 0: today_count = today_count_result['count'] else: print 'es index rank error' today_count = 0 #获取历史发言总数 total_query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_user_no': wxbot_id}}, {'term':{'puid':xnr_puid}}, {'term':{'date_time':last_date}} ] } } } total_index_name = wx_xnr_history_count_index_name try: total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body) if total_count_result['_shards']['successful'] != 0: total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num'] except Exception,e: print e total_count = 0 #包括今天在内的发言总数 total_count_totay = total_count + today_count #发言次数最大值 query_body_total_day = { 'query':{ 'filtered':{ 'filter':{ 'term':{'xnr_id':xnr_puid} } } }, 'aggs':{ 'all_speakers':{ 'terms':{'field':'speaker_id',"order" : { "_count" : "desc" }} } } } try: results_total_day = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\ body=query_body_total_day)['aggregations']['all_speakers']['buckets'] speaker_max = results_total_day[0]['doc_count'] except: speaker_max = today_count #整合 speak_dict = dict() speak_dict['speak_today'] = {} speak_dict['speak_total'] = {} speak_dict['speak_today'][current_time] = today_count speak_dict['speak_total'][current_time] = total_count_totay safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100 safe_active = round(safe_active,2) # 保留两位小数 speak_dict['mark'] = safe_active return speak_dict
def get_generate_example_model(domain_name,role_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') role_en = domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=weibo_role_index_name,doc_type=weibo_role_index_type,id=task_id)['_source'] item = es_result print 'es_result:::',es_result # 政治倾向 political_side = json.loads(item['political_side'])[0][0] if political_side == 'mid': item['political_side'] = u'中立' elif political_side == 'left': item['political_side'] = u'左倾' else: item['political_side'] = u'右倾' # 心理特征 psy_feature_list = [] psy_feature = json.loads(item['psy_feature']) for i in range(TOP_PSY_FEATURE): psy_feature_list.append(psy_feature[i][0]) item['psy_feature'] = '&'.join(psy_feature_list) role_group_uids = json.loads(item['member_uids']) mget_results = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,body={'ids':role_group_uids})['docs'] # topic_list = [] # for mget_item in mget_results: # if mget_item['found']: # keywords_list = json.loads(mget_item['_source']['keywords']) # topic_list.extend(keywords_list) # topic_keywords_dict = {} # for topic_item in topic_list: # keyword = topic_item[0] # keyword_count = topic_item[1] # try: # topic_keywords_dict[keyword] += keyword_count # except: # topic_keywords_dict[keyword] = keyword_count # monitor_keywords_list = [] # for i in range(3): # keyword_max = max(topic_keywords_dict,key=topic_keywords_dict.get) # monitor_keywords_list.append(keyword_max) # del topic_keywords_dict[keyword_max] # item['monitor_keywords'] = '&'.join(monitor_keywords_list) if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name_list = get_flow_text_index_list(current_time) query_body_search = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'uid':role_group_uids} } } }, 'size':MAX_VALUE, '_source':['keywords_string'] } es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\ body=query_body_search)['hits']['hits'] keywords_string = '' for mget_item in es_keyword_results: #print 'mget_item:::',mget_item #if mget_item['found']: keywords_string += '&' keywords_string += mget_item['_source']['keywords_string'] k_dict = extract_keywords(keywords_string) monitor_keywords_list = [] for item_item in k_dict: monitor_keywords_list.append(item_item.word.encode('utf-8')) item['monitor_keywords'] = ','.join(monitor_keywords_list) mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs'] item['nick_name'] = [] for mget_item in mget_results_user: #print 'mget_item:::',mget_item if mget_item['found']: item['nick_name'] = mget_item['_source']['nick_name'] item['location'] = mget_item['_source']['user_location'] item['gender'] = mget_item['_source']['sex'] uid = mget_item['_source']['uid'] try: profile_results = es_user_portrait.get(index=profile_index_name,doc_type=profile_index_type,id=uid)['_source'] if profile_results['description']: item['description'] = profile_results['description'] break except: pass item['business_goal'] = u'渗透' item['daily_interests'] = u'旅游' # if S_TYPE == 'test': # user_mget_results = es.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs'] # if user_mget_results item['age'] = 30 item['career'] = u'自由职业' active_time_list_np = np.array(json.loads(item['active_time'])) active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME] item['active_time'] = active_time_list_np_sort.tolist() day_post_num_list = np.array(json.loads(item['day_post_num'])) item['day_post_num'] = np.mean(day_post_num_list).tolist() item['role_name'] = role_name task_id_new =domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json' try: with open(example_model_file_name,"w") as dump_f: json.dump(item,dump_f) item_dict = dict() #item_dict['xnr_user_no'] = xnr_user_no item_dict['domain_name'] = domain_name item_dict['role_name'] = role_name es.index(index=weibo_example_model_index_name,doc_type=weibo_example_model_index_type,\ body=item_dict,id=task_id_new) mark = True except: mark = False return mark
def utils_get_penetration(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0 : #获取今天的数据 current_time = datetime2ts(current_date) xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid','groups_list']) puid = xnr_data['puid'] group_list = xnr_data['groups_list'] #查询1 wx_group_message_index_name = wx_group_message_index_name_pre + current_date query_body_info = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'group_id':group_list} } } }, 'aggs':{ 'avg_sensitive':{ 'avg':{ 'field':'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0 except: sensitive_value = 0 #查询2 query_body_max = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'group_id':group_list} } } }, 'sort':{'sensitive_value':{'order':'desc'}} } try: max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_max)['hits']['hits'] max_sensitive = max_results[0]['_source']['sensitive_value'] except: max_sensitive = 0 #统计 follow_group_sensitive = {'sensitive_info': {current_time: sensitive_value}} penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100 penetration = round(penetration,2) follow_group_sensitive['mark'] = penetration return follow_group_sensitive else: follow_group_sensitive = {} follow_group_sensitive['sensitive_info'] = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':wxbot_id}}, {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}} ] } } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'timestamp':{'order':'asc'}} } search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\ body=query_body)['hits']['hits'] #初始化 ts_list = load_timestamp_list(start_ts, end_ts) for ts in ts_list: follow_group_sensitive['sensitive_info'][ts] = 0 follow_group_sensitive['mark'] = 0 #填充数据 for result in search_results: result = result['_source'] timestamp = result['timestamp'] follow_group_sensitive['sensitive_info'][timestamp] = result['daily_sensitive_num'] follow_group_sensitive['mark'] = result['penetration'] return follow_group_sensitive