def search_influence_detail(uid_list, index_name, doctype): result = es.mget(index=index_name, doc_type=doctype, body={"ids": uid_list}, _source=True)["docs"] return result[0]['_source']
def get_user_detail(date, input_result, status): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': results.append([uid, uname, location, fansnum, statusnum, influence]) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i*DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait]) return result
def get_user_detail(date, input_result, status, user_type="influence", auth=""): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: sensitive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
sensitive_string = "sensitive_score_" + tmp_ts query_sensitive_body = { "query":{ "match_all":{} }, "size":1, "sort":{sensitive_string:{"order":"desc"}} } try: top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400 index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] bci_history_dict = bci_history_result[i] sensitive_history_dict = sensitive_history_result[i] #print sensitive_history_dict try: bci_source = bci_dict['_source'] except: bci_source = None
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i * DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str( iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget( index=bci_index_name, doc_type="bci", body={'ids': submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget( index='weibo_user', doc_type='user', body={'ids': submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log( influence / max_evaluate_influ['user_index'] * 9 + 1, 10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([ iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait ]) return result
def get_user_detail(date, input_result, status, user_type="influence", auth=""): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: senstive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_retweeted_mid, message_type, default_number=20): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must": [ ] } } } }, "size":100000, "sort":{"user_fansnum":{"order":"desc"}} } #详细影响到的人 date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date origin_retweeted_uid = [] # influenced user uid_list retweeted_retweeted_uid = [] origin_comment_uid = [] retweeted_comment_uid = [] query_origin = copy.deepcopy(query_body) query_retweeted = copy.deepcopy(query_body) if origin_retweeted_mid: # 所有转发该条原创微博的用户 query_origin["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": origin_retweeted_mid}}) query_origin["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}}, {"term":{"root_uid": uid}}]) origin_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_origin, fields=["uid"])["hits"]["hits"] if origin_retweeted_result: for item in origin_retweeted_result: origin_retweeted_uid.append(item["fields"]["uid"][0]) if retweeted_retweeted_mid: # 所有评论该条原创微博的用户 query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": retweeted_retweeted_mid}}) query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}},{"term": {"directed_uid": uid}}]) retweeted_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_retweeted, fields=["uid"])["hits"]["hits"] if retweeted_retweeted_result: for item in retweeted_retweeted_result: retweeted_retweeted_uid.append(item["fields"]["uid"][0]) retweeted_uid_list = [] # all retweeted user list retweeted_results = {} # statistics of all retweeted uid information retweeted_domain = {} retweeted_topic = {} retweeted_geo = {} bci_results = {} in_portrait = [] out_portrait = [] average_influence = 0 total_influence = 0 count = 0 all_uid_set = set(origin_retweeted_uid) | set(retweeted_retweeted_uid) retweeted_uid_list.extend(origin_retweeted_uid) retweeted_uid_list.extend(retweeted_retweeted_uid) retweeted_uid_list = list(set(retweeted_uid_list) - set([uid])) # filter uids if retweeted_uid_list: user_portrait_result = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": retweeted_uid_list}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"] bci_index = "bci_" + date.replace('-', '') bci_results = es_cluster.mget(index=bci_index, doc_type="bci", body={"ids":retweeted_uid_list}, fields=['user_index'])["docs"] for item in user_portrait_result: if item["found"]: temp = [] count += 1 temp.append(item['_id']) temp.append(item["fields"]["importance"][0]) in_portrait.append(temp) temp_domain = item["fields"]["domain"][0].split('&') temp_topic = item["fields"]["topic_string"][0].split('&') temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys() #total_influence += item["fields"]["influence"][0] retweeted_domain = aggregation(temp_domain, retweeted_domain) retweeted_topic = aggregation(temp_topic, retweeted_topic) retweeted_geo = aggregation(temp_geo, retweeted_geo) else: out_portrait.append(item['_id']) retweeted_domain = proportion(retweeted_domain) retweeted_topic = proportion(retweeted_topic) retweeted_geo = proportion(retweeted_geo) if bci_results: total_influence = 0 for item in bci_results: if item['found']: total_influence += item['fields']['user_index'][0] try: average_influence = total_influence/len(retweeted_uid_list) except: average_influence = 0 sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True) retweeted_results["domian"] = sorted_retweeted_domain[:5] retweeted_results["topic"] = sorted_retweeted_topic[:5] retweeted_results["geo"] = sorted_retweeted_geo[:5] retweeted_results["influence"] = average_influence in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True) temp_list = [] for item in in_portrait: temp_list.append(item[0]) retweeted_results['in_portrait_number'] = len(temp_list) retweeted_results['out_portrait_number'] = len(out_portrait) in_portrait_url = get_user_url(temp_list[:default_number]) out_portrait_url = get_user_url(out_portrait[:default_number]) retweeted_results["in_portrait"] = in_portrait_url retweeted_results["out_portrait"] = out_portrait_url retweeted_results["total_number"] = len(temp_list) + len(out_portrait) return retweeted_results