def followers_domain_update(): if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) flow_text_index_name_list = get_flow_text_index_list(current_time) query_body = {'query': {'match_all': {}}, 'size': MAX_VALUE} search_results = es_xnr.search(index=weibo_xnr_fans_followers_index_name,\ doc_type=weibo_xnr_fans_followers_index_type,body=query_body)['hits']['hits'] followers_list_all = [] for result in search_results: result = result['_source'] followers_list = result['followers_list'] followers_list_all.extend(followers_list) followers_list_all_set_list = list(set(followers_list_all)) uid_weibo_keywords_dict, keywords_dict_all_users = uid_list_2_uid_keywords_dict( followers_list_all_set_list, flow_text_index_name_list) uids_avtive_list = uid_weibo_keywords_dict.keys( ) # 防止关注列表中有无效uid,或者只有近期活跃的uid才有意义。 ## 领域分类 r_domain = dict() print 'uids_avtive_list::', uids_avtive_list domain, r_domain = domain_classfiy(uids_avtive_list, uid_weibo_keywords_dict) print 'r_domain::', r_domain for uid, domain in r_domain.iteritems(): domain_name = domain_en2ch_dict[domain] _id = uid try: print '_id:::', _id get_result = es_xnr.get(index=user_domain_index_name,doc_type=user_domain_index_type,\ id=_id)['_source'] get_result['domain_name'] = domain_name get_result['update_time'] = int(time.time()) es_xnr.update(index=user_domain_index_name,doc_type=user_domain_index_type,\ id=_id,body={'doc':get_result}) except: item_dict = {} item_dict['uid'] = uid item_dict['domain_name'] = domain_name item_dict['update_time'] = int(time.time()) es_xnr.index(index=user_domain_index_name,doc_type=user_domain_index_type,\ id=_id,body=item_dict)
def get_day_status(xnr_user_no, datetime): task_id = xnr_user_no + '_' + ts2datetime(datetime) try: result = es_xnr.get(index=weibo_community_status_index_name, doc_type=weibo_community_status_index_type, id=task_id)['_source'] status = result['status'] except: status = -1 return status
def xnr_user_no2uid(xnr_user_no): try: result = es_xnr.get(index=weibo_xnr_index_name, doc_type=weibo_xnr_index_type, id=xnr_user_no)['_source'] uid = result['uid'] except: uid = '' return uid
def get_sensitive_info(timestamp,mid=None,text=None): sensitive_info = 0 index_name = flow_text_index_name_pre + ts2datetime(timestamp) if mid: try: #有记录就取 item_result = es_xnr.get(index=index_name,doc_type=flow_text_index_type,id=mid)['_source'] sensitive_info = item_result['sensitive'] return sensitive_info except Exception,e: #没记录,就现算 if text: sensitive_info = compute_sensitive(text)
def get_sensitive_info(timestamp, mid): index_name = facebook_flow_text_index_name_pre + ts2datetime(timestamp) try: item_result = es_xnr.get(index=index_name, doc_type=facebook_flow_text_index_type, id=mid)['_source'] sensitive_info = item_result['sensitive'] except: sensitive_info = 0 return sensitive_info
def influence_trust(uid): try: es_search = es.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,\ id=uid)['_source'] influence_trust = es_search['is_verified'] except: influence_trust = 0 return influence_trust
def get_hot_subopinion(xnr_user_no, task_id): task_id_new = xnr_user_no + '_' + task_id es_task = [] try: es_task = es.get(index=tw_hot_keyword_task_index_name,doc_type=tw_hot_keyword_task_index_type,\ id=task_id_new)['_source'] except: return '尚未提交计算' if es_task: if es_task['compute_status'] != 2: return '正在计算' else: es_result = es.get(index=tw_hot_subopinion_results_index_name,doc_type=tw_hot_subopinion_results_index_type,\ id=task_id_new)['_source'] if es_result: contents = json.loads(es_result['subopinion_tw']) return contents
def change_process_proportion(task_id, proportion): mark = False try: task_exist_result = es_xnr.get(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id)['_source'] except: task_exist_result = {} return 'task is not exist' if task_exist_result != {}: task_exist_result['compute_status'] = proportion es_xnr.update(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body={'doc':task_exist_result}) mark = True return mark
def judge_sensing_sensor(xnr_user_no,uid): exist_item = es_xnr.exists(index=index_sensing,doc_type=type_sensing,id=xnr_user_no) if not exist_item: return False else: get_result = es_xnr.get(index=index_sensing,doc_type=type_sensing,id=xnr_user_no)['_source'] social_sensors = get_result['social_sensors'] if uid in social_sensors: return True else: return False
def get_recommend_at_user(xnr_user_no): #_id = user_no2_id(user_no) es_result = es.get(index=tw_xnr_index_name, doc_type=tw_xnr_index_type, id=xnr_user_no)['_source'] #print 'es_result:::',es_result if es_result: uid = es_result['uid'] daily_interests = es_result['daily_interests'] if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_TW) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts - 24 * 3600) index_name = twitter_flow_text_index_name_pre + datetime nest_query_list = [] daily_interests_list = daily_interests.split('&') es_results_daily = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\ body={'query':{'match_all':{}},'size':200,\ 'sort':{'timestamp':{'order':'desc'}}})['hits']['hits'] uid_list = [] if es_results_daily: for result in es_results_daily: result = result['_source'] uid_list.append(result['uid']) ## 根据uid,从weibo_user中得到 nick_name uid_nick_name_dict = dict() # uid不会变,而nick_name可能会变 es_results_user = es.mget(index=twitter_user_index_name, doc_type=twitter_user_index_type, body={'ids': uid_list})['docs'] i = 0 for result in es_results_user: if result['found'] == True: result = result['_source'] uid = result['uid'] nick_name = result['name'] if nick_name: i += 1 uid_nick_name_dict[uid] = nick_name if i >= DAILY_AT_RECOMMEND_USER_TOP: break return uid_nick_name_dict
def get_tw_xnr_fans_followers(): query_body_tw = { 'query': { 'term': { 'create_status': 2 } }, 'size': MAX_SEARCH_SIZE } tw_xnrs = es_xnr.search(index=tw_xnr_index_name, doc_type=tw_xnr_index_type,\ body=query_body_tw)['hits']['hits'] for tw_xnr in tw_xnrs: root_uid = tw_xnr['_source']['uid'] xnr_user_no = tw_xnr['_source']['xnr_user_no'] query_body = { 'query': { 'term': { 'root_uid': root_uid } }, 'size': MAX_SEARCH_SIZE } tw_results = es_xnr.search(index=twitter_feedback_follow_index_name,doc_type=twitter_feedback_follow_index_type,\ body=query_body)['hits']['hits'] friends_list = [] for tw_result in tw_results: uid = tw_result['_source']['uid'] friends_list.append(uid) try: get_results = es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ id=xnr_user_no)['_source'] es_xnr.update(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':{'followers_list':friends_list}}) except: es_xnr.index(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'followers_list':friends_list})
def load_tw_app_api_info(xnr_user_no): results = es_xnr.get(index=tw_xnr_index_name, doc_type=tw_xnr_index_type, id=xnr_user_no) try: res = results['_source'] info = { 'access_secret': res['access_secret'], 'access_token': res['access_token'], 'consumer_key': res['consumer_key'], 'consumer_secret': res['consumer_secret'], } return info except Exception, e: print e return False
def influence_trust(uid): try: es_search = es.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,\ id=uid)['_source'] if 'category' in es_search: influence_trust = 1 else: influence_trust = 0 except: influence_trust = 0 return influence_trust
def fb_save_to_fans_follow_ES(xnr_user_no,uid,follow_type,trace_type): results = es_xnr.get(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ id=xnr_user_no) results = results["_source"] if follow_type == 'follow': if trace_type == 'trace_follow': # 添加追随关注 try: trace_follow_uids = results['trace_follow_pre_list'] trace_follow_uids_set = set(trace_follow_uids) trace_follow_uids_set.add(uid) trace_follow_uids = list(trace_follow_uids_set) except: trace_follow_uids = [uid] # # 添加普通关注 # try: # followers_uids = results['followers_list'] # followers_uids_set = set(followers_uids) # followers_uids_set.add(uid) # followers_uids = list(followers_uids_set) # except: # followers_uids = [uid] # results['followers_list'] = followers_uids results['trace_follow_list'] = trace_follow_uids es_xnr.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) elif follow_type == 'unfollow': try: followers_uids = results['trace_follow_pre_list'] followers_uids = list(set(followers_uids).difference(set([uid]))) results['trace_follow_pre_list'] = followers_uids es_xnr.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) except: return False return True
def save_user_warning(xnr_user_no,start_time,end_time): #判断数据库是否存在: today_date=ts2datetime(end_time) today_datetime = datetime2ts(today_date) weibo_user_warning_index_name=weibo_user_warning_index_name_pre+today_date if not es_xnr.indices.exists(index=weibo_user_warning_index_name): weibo_user_warning_mappings(weibo_user_warning_index_name) new_user_warning = create_personal_warning(xnr_user_no,start_time,end_time) today_history_user_warning,old_uid_list = lookup_history_user_warming(xnr_user_no,today_datetime,end_time) results = [] if new_user_warning: for item in new_user_warning: id_mark = set_intersection(item['uid'],old_uid_list) if id_mark == 1: #组合,更新数据库 task_id = xnr_user_no+'_'+item['uid'] old_user = es_xnr.get(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,id=task_id)['_source'] old_user['content'] = json.loads(old_user['content']) old_user['content'].extend(item['content']) old_user['user_sensitive'] = old_user['user_sensitive'] + item['user_sensitive'] #old_user['user_influence'] = old_user['user_influence'] + item['user_influence'] try: es_xnr.index(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,body=old_user,id=task_id) mark=True except: mark=False else: #直接存储 task_id=xnr_user_no+'_'+item['uid'] try: es_xnr.index(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,body=item,id=task_id) mark=True except: mark=False results.append(mark) else: pass print 'person_mark::',results return results
def get_xnr_sensitive(xnr_user_no): xnr_sensitive_word = [] try: xnr_result = es_xnr.get(index=weibo_xnr_index_name, doc_type=weibo_xnr_index_type, id=xnr_user_no)['_source'] submitter = xnr_result['submitter'] except: submitter = '' sxnr_type = 'my_xnrs' query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'create_type': sxnr_type } }, { 'term': { 'submitter': submitter } }] } } } }, 'sort': { 'create_time': { 'order': 'asc' } } } if submitter: try: sensitive_result = es_xnr.search( index=weibo_sensitive_words_index_name, doc_type=weibo_sensitive_words_index_type, body=query_body)['hits']['hits'] for item in sensitive_result: xnr_sensitive_word.append(item['_source']['sensitive_words']) except: print 'except!!!-sensitive_words' return xnr_sensitive_word
def save_group_description_results(group_results, decect_task_information): mark = False task_id = decect_task_information['domain_pinyin'] try: item_exist = es.get(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id)['_source'] item_exist['role_distribute'] = json.dumps( group_results['role_distribute']) item_exist['top_keywords'] = json.dumps(group_results['top_keywords']) item_exist['political_side'] = json.dumps( group_results['political_side']) item_exist['topic_preference'] = json.dumps( group_results['topic_preference']) item_exist['compute_status'] = 2 # 存入群体描述 es.update(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body={'doc': item_exist}) except Exception, e: item_exist = dict() item_exist['domain_pinyin'] = json.dumps( decect_task_information['domain_pinyin']) item_exist['domain_name'] = json.dumps( decect_task_information['domain_name']) item_exist['create_type'] = json.dumps( decect_task_information['create_type']) item_exist['create_time'] = json.dumps( decect_task_information['create_time']) item_exist['submitter'] = json.dumps( decect_task_information['submitter']) item_exist['remark'] = json.dumps(decect_task_information['remark']) item_exist['role_distribute'] = json.dumps( group_results['role_distribute']) item_exist['top_keywords'] = json.dumps(group_results['top_keywords']) item_exist['political_side'] = json.dumps( group_results['political_side']) item_exist['topic_preference'] = json.dumps( group_results['topic_preference']) item_exist['compute_status'] = 2 # 存入群体描述 es.index(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body=item_exist)
def judge_trace_follow(xnr_user_no, uid): exist_item = es_xnr.exists(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no) if not exist_item: return False else: get_result = es_xnr.get(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] try: trace_follow_list = get_result['trace_follow_list'] except: trace_follow_list = [] if uid in trace_follow_list: return True else: return False
def delete_xnr_followers(xnr_user_no, follower_uid): xnr_es_result = es_xnr.get(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] user_no = int(xnr_user_no[-4:]) uid = xnr_es_result['uid'] fans_list = xnr_es_result['fans_list'] origin_followers_list = xnr_es_result['followers_list'] origin_followers_list.remove(follower_uid) followers_list = origin_followers_list try: mark=es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no,\ body={"doc":{'user_no':user_no,'uid':uid,'fans_list':fans_list,'followers_list':followers_list}}) mark = True except: mark = False return mark
def save_role_feature_analysis(role_results,role_label,domain,role_id,task_id): mark = False try: item_exist = es_xnr.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id)['_source'] item_exist['role_pinyin'] = role_id item_exist['role_name'] = role_label item_exist['domains'] = domain item_exist['personality'] = json.dumps(role_results['personality']) item_exist['political_side'] = json.dumps(role_results['political_side']) item_exist['geo'] = json.dumps(role_results['geo']) item_exist['active_time'] = json.dumps(list(role_results['active_time'])) item_exist['day_post_num'] = json.dumps(list(role_results['day_post_num'])) item_exist['psy_feature'] = json.dumps(role_results['psy_feature']) item_exist['member_uids'] = json.dumps(role_results['member_uids']) es_xnr.update(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id,body={'doc':item_exist}) item_domain = dict() item_domain['compute_status'] = 3 # 存入角色分析结果 es_xnr.update(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=task_id,body={'doc':item_domain}) except Exception, e: item_exist = dict() item_exist['role_pinyin'] = role_id item_exist['role_name'] = role_label item_exist['domains'] = domain item_exist['personality'] = json.dumps(role_results['personality']) item_exist['political_side'] = json.dumps(role_results['political_side']) item_exist['geo'] = json.dumps(role_results['geo']) item_exist['active_time'] = json.dumps(list(role_results['active_time'])) item_exist['day_post_num'] = json.dumps(list(role_results['day_post_num'])) item_exist['psy_feature'] = json.dumps(role_results['psy_feature']) item_exist['member_uids'] = json.dumps(role_results['member_uids']) es_xnr.index(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id,body=item_exist) item_domain = dict() item_domain['compute_status'] = 3 # 存入角色分析结果 es_xnr.update(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=task_id,body={'doc':item_domain})
def save_detect_results(detect_results, decect_task_information): mark = False task_id = decect_task_information['domain_pinyin'] try: item_exist = es_xnr.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id)['_source'] item_exist['group_size'] = len(detect_results) item_exist['member_uids'] = detect_results item_exist['compute_status'] = 1 # 存入uid es_xnr.update(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id,body={'doc':item_exist}) except Exception, e: item_exist = dict() item_exist['domain_pinyin'] = json.dumps(decect_task_information['domain_pinyin']) item_exist['domain_name'] = json.dumps(decect_task_information['domain_name']) item_exist['create_type'] = json.dumps(decect_task_information['create_type']) item_exist['create_time'] = decect_task_information['create_time'] item_exist['submitter'] = json.dumps(decect_task_information['submitter']) item_exist['remark'] = json.dumps(decect_task_information['remark']) item_exist['group_size'] = len(detect_results) item_exist['member_uids'] = detect_results item_exist['compute_status'] = 1 # 存入uid es_xnr.index(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id,body=item_exist)
def get_bussiness_recomment_tweets(xnr_user_no,sort_item): get_results = es.get(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,id=xnr_user_no)['_source'] monitor_keywords = get_results['monitor_keywords'] monitor_keywords_list = monitor_keywords.split(',') if sort_item == 'timestamp': sort_item_new = 'timestamp' es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new) elif sort_item == 'sensitive_info': sort_item_new = 'sensitive' es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new) elif sort_item == 'sensitive_user': sort_item_new = 'sensitive' es_results = get_tweets_from_user_portrait(monitor_keywords_list,sort_item_new) elif sort_item == 'influence_info': sort_item_new = 'retweeted' es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new) elif sort_item == 'influence_user': sort_item_new = 'user_index' es_results = get_tweets_from_bci(monitor_keywords_list,sort_item_new) return es_results
def get_daily_recommend_tweets(theme,sort_item): if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_FB) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts) index_name = daily_interest_index_name_pre +'_'+ datetime theme_en = daily_ch2en[theme] es_results = es_xnr.get(index=index_name,doc_type=daily_interest_index_type,id=theme_en)['_source'] content = json.loads(es_results['content']) results_all = [] for result in content: #result = result['_source'] uid = result['uid'] nick_name,photo_url = fb_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def influence_cover(uid,current_time): #index_name = twitter_feedback_fans_index_name # 没有列表 try: es_search = es.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,\ id=uid)['_source'] try: followers_count = es_search['followers_count'] except: followers_count = 0 try: status_count = es_search['status_count'] except: status_count = 0 try: friends_count = es_search['friends_count'] except: friends_count = 0 try: favourites_count = es_search['favourites_count'] except: favourites_count = 0 cover_num = followers_count + status_count + friends_count + favourites_count if not cover_num: cover_num = 1 except: cover_num = 1 # 取log之后为0 return cover_num
def getgroup_v2(qq_xnr): group_dict = {} #step0: get qqbot_port if qq_xnr[:4] != 'QXNR': search_result = es.search(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,\ body={'query':{'term':{'qq_number':qq_xnr}}})['hits']['hits'] qq_xnr = search_result[0]['_id'] #try: qq_xnr_es_result = es.get(index=qq_xnr_index_name,\ doc_type=qq_xnr_index_type, id=qq_xnr, _source=True)['_source'] group_info = json.loads(qq_xnr_es_result['group_info']) qqbot_port = qq_xnr_es_result['qqbot_port'] print 'qqbot_port..', qqbot_port p_str = 'qq ' + str(qqbot_port) + ' list group' p = subprocess.Popen(p_str, shell=True, \ stdout=subprocess.PIPE, stderr=subprocess.STDOUT) line_count = 0 for line in p.stdout.readlines(): line_count += 1 #print 'line.==========',line if line_count >= 5 and line_count % 2 == 1: item_line_list = line.split('|') try: #qq_group_number = str(int(item_line_list[2])) qq_uin_number = str(int(item_line_list[7])) #print 'qq_uin_number..',qq_uin_number qq_group_name = item_line_list[4] qq_mark_name = item_line_list[5] # group_dict[qq_group_number] = qq_group_name group_dict[qq_uin_number] = qq_group_name # 如果uin为空,则添加进去uin,如果不为空,则更新群名(因为群名可能修改) for key, value_dict in group_info.iteritems(): mark_name = value_dict['mark_name'] if not qq_mark_name: if qq_mark_name == mark_name: if not qq_group_name in value_dict['group_name']: group_info[key]['group_name'].append( qq_group_name) except: next group_info = json.dumps(group_info) es.update(index=qq_xnr_index_name, doc_type=qq_xnr_index_type, id=qq_xnr, body={'doc': { 'group_info': group_info }}) print 'group_dict::len..', len(group_dict) return group_dict
def executeES(indexName, typeName, listData): #current_time = int(time.time()) #indexName += '_' + ts2datetime(current_time) #print 'listData:',listData for list_data in listData: data = {} jsonData = json.loads(list_data) for key, val in jsonData.items(): # print key, '====', val data[key] = val # data['update_time'] = current_time if indexName != 'weibo_feedback_group': xnr_user_no = uid2xnr_user_no(data["root_uid"]) sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid']) data['sensor_mark'] = sensor_mark trace_follow_mark = judge_trace_follow(xnr_user_no, data['uid']) data['trace_follow_mark'] = trace_follow_mark data['sensitive_info'] = get_sensitive_info( data['timestamp'], data['mid']) data['sensitive_user'] = get_sensitive_user( data['timestamp'], data['uid']) if indexName == 'weibo_feedback_follow': # 修改 _id、保存至fans_followers_es表 _id = data["root_uid"] + '_' + data["mid"] xnr_user_no = uid2xnr_user_no(data["root_uid"]) save_type = 'followers' follow_type = 'follow' if xnr_user_no: save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type, follow_type) save_to_redis_fans_follow(xnr_user_no, data["uid"], save_type) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_fans': _id = data["root_uid"] + '_' + data["mid"] xnr_user_no = uid2xnr_user_no(data["root_uid"]) save_type = 'fans' follow_type = 'follow' if xnr_user_no: save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type, follow_type) save_to_redis_fans_follow(xnr_user_no, data["uid"], save_type) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark try: es.get(index=indexName, doc_type=typeName, id=_id) except: print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_comment': indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) # print 'date!!!!!!!',date_time # print 'indexName_date:::',indexName_date mappings_func = weibo_feedback_comment_mappings _id = data["mid"] # print 'comment_id........',_id mappings_func(date_time) # print 'data:::',data print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_retweet': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_retweet_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_at': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_at_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_like': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_like_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_private': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_private_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) else: _id = data["mid"] print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) # print 'data.........',data # print 'indexName....',indexName # print '_id......',_id # #print 'typeName.....',typeName # print 'es...',es # print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data) print 'update %s ES done' % indexName
def save_event_warning(xnr_user_no,start_time,end_time): #判断数据库是否存在: today_date=ts2datetime(end_time) today_datetime = datetime2ts(today_date) weibo_event_warning_index_name = weibo_event_warning_index_name_pre+today_date if not es_xnr.indices.exists(index=weibo_event_warning_index_name): weibo_event_warning_mappings(weibo_event_warning_index_name) new_event_warning = create_event_warning(xnr_user_no,start_time,end_time) today_history_event_warning,old_name_list = lookup_history_event_warming(xnr_user_no,today_datetime,end_time) print 'warning!!!',len(new_event_warning) results = [] if new_event_warning: for item in new_event_warning: event_mark = set_intersection(item['event_name'],old_name_list) if event_mark == 1: task_id = xnr_user_no+'_'+ item['event_name'] old_event = es_xnr.get(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)['_source'] #用户合并 old_event_main_info = json.loads(old_event['main_user_info']) old_event_uid_list = [user['uid'] for user in old_event_main_info] new_event_main_info = json.loads(item['main_user_info']) new_event_uid_list = [user['uid'] for user in new_event_main_info] add_uid_list = list(set(new_event_uid_list) - set(old_event_uid_list)&set(new_event_uid_list)) new_main_user_info = [] item_main_user_info = json.loads(item['main_user_info']) for uid in add_uid_list: uid_info = [u for u in item_main_user_info if u['uid'] == uid] if uid_info: new_main_user_info.append(uid_info[0]) else: pass old_event['main_user_info'] = json.loads(old_event['main_user_info']) old_event['main_user_info'].extend(new_main_user_info) old_event_weibo_info = json.loads(old_event['main_weibo_info']) old_event_mid_list = [content['mid'] for content in old_event_weibo_info] new_event_weibo_info = json.loads(item['main_weibo_info']) new_event_mid_list = [content['mid'] for content in new_event_weibo_info] add_weibo_list = list(set(new_event_mid_list) - set(new_event_mid_list)&set(old_event_mid_list)) new_main_weibo_info = [] for mid in add_weibo_list: mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid] if mid_info: new_main_weibo_info.append(mid_info[0]) else: pass old_event['main_weibo_info'] = json.loads(old_event['main_weibo_info']) old_event['main_weibo_info'].extend(new_main_weibo_info) old_event['event_influence']=old_event['event_influence']+item['event_influence'] try: es_xnr.update(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id) mark=True except: mark=False else: #直接存储 task_id=xnr_user_no+'_'+ item['event_name'] try: es_xnr.index(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,body=item,id=task_id) mark=True except: mark=False results.append(mark) else: pass print 'event_waring::',results return results
def create_event_warning(xnr_user_no,start_time,end_time): #获取事件名称 today_datetime = start_time hashtag_list = get_hashtag(today_datetime) #print 'hashtag_list::',hashtag_list flow_text_index_name = get_day_flow_text_index_list(today_datetime) #虚拟人的粉丝列表和关注列表 try: es_xnr_result=es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source'] followers_list=es_xnr_result['followers_list'] fans_list=es_xnr_result['fans_list'] except: followers_list=[] fans_list=[] event_warming_list=[] event_num=0 for event_item in hashtag_list: event_sensitive_count=0 event_warming_content=dict() #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间 event_warming_content['event_name']=event_item['event_name'] print 'event_name:',event_item event_num=event_num+1 print 'event_num:::',event_num print 'first_time:::',int(time.time()) event_influence_sum=0 event_time_sum=0 query_body={ 'query':{ # 'bool':{ # 'must':[{'wildcard':{'text':'*'+event_item[0]+'*'}}, # {'range':{'sensitive':{'gte':1}}}] # } 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'hashtag':event_item['event_name']}}, {'range':{'sensitive':{'gte':1}}}, {'range':{'timestamp':{'gte':start_time,'lte':end_time}}} ] } } } }, 'size':MAX_WARMING_SIZE, 'sort':{'sensitive':{'order':'desc'}} } #try: event_results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] print 'event:::',len(event_results),start_time,end_time if event_results: weibo_result=[] fans_num_dict=dict() followers_num_dict=dict() alluser_num_dict=dict() print 'sencond_time:::',int(time.time()) for item in event_results: #print 'event_content:',item['_source']['text'] #统计用户信息 if alluser_num_dict.has_key(str(item['_source']['uid'])): followers_mark=set_intersection(item['_source']['uid'],followers_list) if followers_mark > 0: alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2 else: alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1 else: alluser_num_dict[str(item['_source']['uid'])]=1 #计算影响力 origin_influence_value=(1+item['_source']['comment']+item['_source']['retweeted'])*(1+item['_source']['sensitive']) # fans_value=judge_user_type(item['_source']['uid'],fans_list) followers_value=judge_user_type(item['_source']['uid'],followers_list) item['_source']['weibo_influence_value']=origin_influence_value*(followers_value) item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) weibo_result.append(item['_source']) #统计影响力、时间 event_influence_sum=event_influence_sum+item['_source']['weibo_influence_value'] event_time_sum=event_time_sum+item['_source']['timestamp'] print 'third_time:::',int(time.time()) #典型微博信息 the_weibo_result=remove_repeat_v2(weibo_result) the_weibo_result.sort(key=lambda k:(k.get('weibo_influence_value',0)),reverse=True) event_warming_content['main_weibo_info']=json.dumps(the_weibo_result) #事件影响力和事件时间 number=len(event_results) event_warming_content['event_influence']=event_influence_sum/number event_warming_content['event_time']=event_time_sum/number # except: # event_warming_content['main_weibo_info']=[] # event_warming_content['event_influence']=0 # event_warming_content['event_time']=0 # try: #对用户进行排序 alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True) main_userid_list=[] for i in xrange(0,len(alluser_num_dict)): main_userid_list.append(alluser_num_dict[i][0]) #主要参与用户信息 main_user_info=[] user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list})['docs'] for item in user_es_result: user_dict=dict() if item['found']: user_dict['photo_url']=item['_source']['photo_url'] user_dict['uid']=item['_id'] user_dict['nick_name']=item['_source']['nick_name'] user_dict['favoritesnum']=item['_source']['favoritesnum'] user_dict['fansnum']=item['_source']['fansnum'] else: user_dict['photo_url']='' user_dict['uid']=item['_id'] user_dict['nick_name']='' user_dict['favoritesnum']=0 user_dict['fansnum']=0 main_user_info.append(user_dict) event_warming_content['main_user_info']=json.dumps(main_user_info) # except: # event_warming_content['main_user_info']=[] print 'fourth_time:::',int(time.time()) event_warming_content['xnr_user_no']=xnr_user_no event_warming_content['validity']=0 event_warming_content['timestamp']=today_datetime event_warming_list.append(event_warming_content) else: pass print 'fifth_time:::',int(time.time()) return event_warming_list
def save_to_fans_follow_ES(xnr_user_no, uid, save_type, follow_type, trace_type='ordinary_follow'): if save_type == 'followers': try: results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no) results = results["_source"] if follow_type == 'follow': if trace_type == 'trace_follow': # 添加追随关注 try: trace_follow_uids = results['trace_follow_list'] trace_follow_uids_set = set(trace_follow_uids) trace_follow_uids_set.add(uid) trace_follow_uids = list(trace_follow_uids_set) except: trace_follow_uids = [uid] # 添加普通关注 try: followers_uids = results['followers_list'] followers_uids_set = set(followers_uids) followers_uids_set.add(uid) followers_uids = list(followers_uids_set) except: followers_uids = [uid] results['followers_list'] = followers_uids results['trace_follow_list'] = trace_follow_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) else: try: followers_uids = results['followers_list'] followers_uids_set = set(followers_uids) followers_uids_set.add(uid) followers_uids = list(followers_uids_set) except: followers_uids = [uid] results['followers_list'] = followers_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) elif follow_type == 'unfollow': try: followers_uids = results['followers_list'] followers_uids = list( set(followers_uids).difference(set([uid]))) results['followers_list'] = followers_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) except: return False except: #if follow_type == 'follow': body_info = {} body_info['followers_list'] = [uid] body_info['xnr_user_no'] = xnr_user_no es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no, body=body_info) #elif follow_type == 'unfollow': elif save_type == 'fans': try: results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no) results = results["_source"] try: fans_uids = results['fans_list'] fans_uids_set = set(fans_uids) fans_uids_set.add(uid) fans_uids = list(fans_uids_set) results['fans_list'] = fans_uids except: results['fans_list'] = [uid] es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) except: body_info = {} body_info['fans_list'] = [uid] body_info['xnr_user_no'] = xnr_user_no es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no, body=body_info) return True
else: fb_type = 'stranger' return fb_type ## 判断是否为敏感人物传感器 def judge_fb_sensing_sensor(xnr_user_no,uid): try: exist_item = es_xnr.exists(index=fb_index_sensing,doc_type=fb_type_sensing,id=xnr_user_no) except Exception,e: print e return False if not exist_item: return False else: get_result = es_xnr.get(index=fb_index_sensing,doc_type=fb_type_sensing,id=xnr_user_no)['_source'] social_sensors = get_result['social_sensors'] if uid in social_sensors: return True else: return False ## twitter判断关注类型 def judge_tw_follow_type(xnr_user_no,uid): exist_item = es_xnr.exists(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ id=xnr_user_no) if not exist_item: tw_type = 'stranger' else: es_get = es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ id=xnr_user_no)['_source']