def search_user_type(uid_list): type_list = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, \ body={'ids': uid_list},_source=False, fields=['id', 'verified_type'])['docs'] user_list1 = [] org_list1 = [] for i in type_list: if i['found'] == False: user_list1.append(i['_id']) else: if not i['fields'].has_key('verified_type'): user_list1.append(i['_id']) continue verified_type = i['fields']['verified_type'][0] if int(verified_type) in org_list: org_list1.append(i['_id']) else: user_list1.append(i['_id']) return user_list1, org_list1
def show_weibo_list(message_type,ts,sort_item): query_body = { "query": { "bool":{ "must":[ {"term":{"type": int(message_type)}}, {"term": {"detect_ts": int(ts)}} ] } }, "size": 100, "sort":{sort_item:{"order": "desc"}} } text_results = [] uid_list = [] text_keys = ["text", "retweeted","keywords_string","mid", "comment", "user_fansnum", "timestamp", "geo", "uid"] es_results = es_prediction.search(index="social_sensing_text",doc_type="text", body=query_body)["hits"]["hits"] if not es_results: return [] for item in es_results: item = item["_source"] tmp = dict() for key in text_keys: tmp[key] = item[key] text_results.append(tmp) uid_list.append(item["uid"]) profile_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type, body={"ids":uid_list})["docs"] for i in range(len(uid_list)): tmp_profile = profile_results[i] if tmp_profile["found"]: tmp = dict() tmp["photo_url"] = tmp_profile["_source"]["photo_url"] tmp["nick_name"] = tmp_profile["_source"]["nick_name"] if not tmp["nick_name"]: tmp["nick_name"] = uid_list[i] text_results[i].update(tmp) return text_results
def get_final_submit_user_info(uid_list): final_results = [] try: profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs'] except: profile_results = [] try: bci_history_results = es_bci_history.mget( index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs'] except: bci_history_results = [] #get bci_history max value now_time_ts = time.time() search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY)) bci_key = 'bci_' + str(search_date_ts) query_body = { 'query': { 'match_all': {} }, 'sort': [{ bci_key: { 'order': 'desc' } }], 'size': 1 } #try: bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits'] #except: # bci_max_result = {} if bci_max_result: bci_max_value = bci_max_result[0]['fields'][bci_key][0] else: bci_max_value = MAX_VALUE iter_count = 0 for uid in uid_list: try: profile_item = profile_results[iter_count] except: profile_item = {} try: bci_history_item = bci_history_results[iter_count] except: bci_history_item = {} if profile_item and profile_item['found'] == True: uname = profile_item['_source']['nick_name'] location = profile_item['_source']['user_location'] else: uname = '' location = '' if bci_history_item and bci_history_item['found'] == True: fansnum = bci_history_item['_source']['user_fansnum'] statusnum = bci_history_item['_source']['weibo_month_sum'] try: bci = bci_history_item['_source'][bci_key] normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100 except: normal_bci = '' else: fansnum = '' statusnum = '' normal_bci = '' final_results.append( [uid, uname, location, fansnum, statusnum, normal_bci]) iter_count += 1 return final_results
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] compute_status = input_data['compute_status'] relation_string = input_data['relation_string'] recommend_style = input_data['recommend_style'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' compute_hash_name = 'compute' # submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] invalid_uid_list = [] for line in line_list: uid = line.split('\r')[0] #if len(uid)==10: # uid_list.append(uid) if uid != '': uid_list.append(uid) if len(invalid_uid_list) != 0: return False, 'invalid user info', invalid_uid_list #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] have_in_uid_list = [] try: exist_portrait_result = es_user_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list}, _source=False)['docs'] except: exist_portrait_result = [] if exist_portrait_result: for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) else: have_in_uid_list.append(exist_item['_id']) else: new_uid_list = uid_list #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) print 'new_uid_set:', new_uid_set print 'in_uid_set:', in_uid_set if len(in_uid_set) == 0: return False, 'all user in' #identify the final add user final_submit_user_list = [] for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} if operation_type == 'submit': r.hset( compute_hash_name, in_item, json.dumps([ in_date, compute_status, relation_string, recommend_style, submit_user, 0 ])) r.hset(hashname_submit, in_item, json.dumps(tmp)) # r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400 index_type = 'bci' user_bci_result = es_cluster.mget( index=index_name, doc_type=index_type, body={'ids': uid_list}, _source=True)['docs'] #INFLUENCE,fans,status user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list}, _source=True)['docs'] #个人姓名,注册地 # bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] # sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] # bci_history_dict = bci_history_result[i] # sensitive_history_dict = sensitive_history_result[i] #print sensitive_history_dict try: bci_source = bci_dict['_source'] except: bci_source = None
def current_status(mid): es_results = es_prediction.get(index="social_sensing_text", doc_type="text", id=mid)["_source"] uid = es_results["uid"] ts = es_results["timestamp"] print "mid result: ", es_results query_body = { "query": { "bool": { "must": [{ "term": { "root_mid": mid } }, { "term": { "message_type": 3 } }] } }, "aggs": { "hot_uid": { "terms": { "field": "directed_uid", "size": 11 } } } } index_list = [] for i in range(2): index_name = flow_text_index_name_pre + ts2datetime(ts) if es_flow_text.indices.exists(index=index_name): index_list.append(index_name) ts = ts + 3600 * 24 results = es_flow_text.search( index=index_list, doc_type=flow_text_index_type, body=query_body)["aggregations"]["hot_uid"]["buckets"] retweet_dict = dict() for item in results: iter_uid = item["key"] if str(iter_uid) == str(uid): continue else: retweet_dict[str(iter_uid)] = item["doc_count"] print "retweet_dict: ", retweet_dict query_body = { "query": { "bool": { "must": [{ "term": { "root_mid": mid } }, { "term": { "message_type": 2 } }] } }, "aggs": { "hot_uid": { "terms": { "field": "directed_uid", "size": 11 } } } } index_name = flow_text_index_name_pre + ts2datetime(ts) results = es_flow_text.search( index=index_list, doc_type=flow_text_index_type, body=query_body)["aggregations"]["hot_uid"]["buckets"] comment_dict = dict() for item in results: iter_uid = str(item["key"]) if iter_uid == str(uid): continue else: comment_dict[iter_uid] = item["doc_count"] print "comment_dict: ", comment_dict # user_profile uid_list = list(set(comment_dict.keys()) | set(retweet_dict.keys())) profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list})["docs"] profile_dict = dict() for item in profile_results: if item["found"]: item = item["_source"] iter_uid = str(item["uid"]) tmp = dict() tmp["nick_name"] = item["nick_name"] if not tmp["nick_name"]: tmp["nick_name"] = iter_uid tmp["photo_url"] = item["photo_url"] profile_dict[iter_uid] = tmp else: tmp = dict() tmp["nick_name"] = item["_id"] tmp["photo_url"] = "" profile_dict[iter_uid] = tmp hot_retweet_list = [] retweet_uid_list = retweet_dict.keys() retweet_list = es_flow_text.search(index=index_list, doc_type="text", body={ "query": { "bool": { "must": [{ "terms": { "uid": retweet_uid_list } }, { "term": { "root_mid": mid } }] } }, "size": 100 })["hits"]["hits"] in_set = set() for item in retweet_list: item = item["_source"] iter_uid = str(item["uid"]) if iter_uid in in_set: continue else: in_set.add(iter_uid) item["retweeted"] = retweet_dict[iter_uid] item["comment"] = query_retweeted(iter_uid, mid, ts, 2) # 获取转发微博的评论量 item.update(profile_dict[iter_uid]) hot_retweet_list.append(item) hot_retweet_list = sorted(hot_retweet_list, key=lambda x: x["retweeted"], reverse=True) hot_comment_list = [] comment_uid_list = comment_dict.keys() comment_list = es_flow_text.search(index=index_list, doc_type="text", body={ "query": { "bool": { "must": [{ "terms": { "uid": comment_uid_list } }, { "term": { "root_mid": mid } }] } }, "size": 100 })["hits"]["hits"] in_set = set() for item in comment_list: item = item["_source"] iter_uid = str(item["uid"]) if iter_uid in in_set: continue else: in_set.add(iter_uid) item["comment"] = comment_dict[iter_uid] item["retweeted"] = query_retweeted(iter_uid, mid, ts, 3) # 获取转发微博的评论量 item.update(profile_dict[iter_uid]) hot_comment_list.append(item) hot_comment_list = sorted(hot_comment_list, key=lambda x: x["comment"], reverse=True) results = dict() results["hot_retweeted"] = hot_retweet_list results["hot_comment"] = hot_comment_list return results