def get_daily_recommend_tweets(theme, sort_item): if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_FB) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts) index_name = daily_interest_index_name_pre + '_' + datetime theme_en = daily_ch2en[theme] es_results = es.get(index=index_name, doc_type=daily_interest_index_type, id=theme_en)['_source'] content = json.loads(es_results['content']) results_all = [] for result in content: #result = result['_source'] uid = result['uid'] nick_name, photo_url = fb_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def get_tweets_from_flow(monitor_keywords_list, sort_item_new): nest_query_list = [] for monitor_keyword in monitor_keywords_list: nest_query_list.append( {'wildcard': { 'keywords_string': '*' + monitor_keyword + '*' }}) query_body = { 'query': { 'bool': { 'should': nest_query_list } }, 'sort': [{ sort_item_new: { 'order': 'desc' } }, { 'timestamp': { 'order': 'desc' } }], 'size': TOP_WEIBOS_LIMIT } if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_FB) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts - 24 * 3600) index_name = facebook_flow_text_index_name_pre + datetime es_results = es.search(index=index_name, doc_type=facebook_flow_text_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=index_name,doc_type=facebook_flow_text_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = fb_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item): topic_field_en = topic_ch2en_dict[topic_field] if sort_item != 'compute_status': query_body = { 'query': { 'bool': { 'must': [{ 'filtered': { 'filter': { 'term': { 'topic_field': topic_field_en } } } }] } }, 'sort': { sort_item: { 'order': 'desc' } }, 'size': TOP_WEIBOS_LIMIT } current_time = time.time() if S_TYPE == 'test': current_time = datetime2ts(S_DATE_FB) fb_social_sensing_index_name = fb_social_sensing_index_name_pre + ts2datetime( current_time) es_results = es.search(index=fb_social_sensing_index_name, doc_type=fb_social_sensing_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=fb_social_sensing_index_name,doc_type=fb_social_sensing_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = fb_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def read_tracing_followers_tweet(): if S_TYPE == 'test': query_body = { 'query':{ 'term':{'xnr_user_no':'FXNR0003'} }, 'size':MAX_SEARCH_SIZE } else: query_body = { 'query':{ 'match_all':{} }, 'size':MAX_SEARCH_SIZE } results = es_xnr.search(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ body=query_body)['hits']['hits'] if results: for result in results: result = result['_source'] xnr_user_no = result['xnr_user_no'] trace_follow_list = result['trace_follow_list'] print 'trace_follow_list:::',trace_follow_list if S_TYPE == 'test': current_time = datetime2ts(S_DATE_FB) #trace_follow_list = TRACE_FOLLOW_LIST else: current_time = int(time.time()) current_date = ts2datetime(current_time) flow_text_index_name = facebook_flow_text_index_name_pre + current_date query_body_flow = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'uid':trace_follow_list} } } }, 'size':MAX_SEARCH_SIZE } results_flow = es_xnr.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body_flow)['hits']['hits'] if results_flow: for result_flow in results_flow: result_flow = result_flow['_source'] fid = result_flow['fid'] #先判断 之前是否已经存过该fid task_id = xnr_user_no + '_' + fid try: # 如果已添加则跳过 es_xnr.get(index=fb_xnr_retweet_timing_list_index_name,doc_type=\ fb_xnr_retweet_timing_list_index_type,id=task_id)['_source'] continue except: # 如果未添加过则加入列表 task_detail = {} task_detail['xnr_user_no'] = xnr_user_no task_detail['fid'] = fid task_detail['text'] = result_flow['text'] task_detail['uid'] = result_flow['uid'] task_detail['nick_name'],task_detail['photo_url'] = fb_uid2nick_name_photo(result_flow['uid']) task_detail['timestamp'] = result_flow['timestamp'] task_detail['timestamp_set'] = result_flow['timestamp'] + random.randint(RETWEET_START_TS,RETWEET_END_TS) task_detail['compute_status'] = 0 es_xnr.index(index=fb_xnr_retweet_timing_list_index_name,doc_type=\ fb_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)