def recommentation_in_auto(seatch_date, submit_user): results = [] #run type if RUN_TYPE == 1: now_date = ts2datetime(time.time() - DAY) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) recomment_hash_name = 'recomment_' + now_date + '_auto' recomment_influence_hash_name = 'recomment_' + now_date + '_influence' recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive' recomment_compute_hash_name = 'compute' #step1: get auto auto_result = r.hget(recomment_hash_name, 'auto') if auto_result: auto_user_list = json.loads(auto_result) else: auto_user_list = [] #step2: get admin user result admin_result = r.hget(recomment_hash_name, submit_user) if admin_result: admin_user_list = json.loads(admin_result) else: admin_user_list = [] #step3: get union user and filter compute/influence/sensitive union_user_auto_set = set(auto_user_list) | set(admin_user_list) influence_user = set(r.hkeys(recomment_influence_hash_name)) sensitive_user = set(r.hkeys(recomment_sensitive_hash_name)) compute_user = set(r.hkeys(recomment_compute_hash_name)) filter_union_user = union_user_auto_set - (influence_user | sensitive_user | compute_user) auto_user_list = list(filter_union_user) #step4: get user detail results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto') return results
def recommentation_in(input_ts, recomment_type, submit_user): date = ts2datetime(input_ts) recomment_results = [] # read from redis results = [] hash_name = 'recomment_'+str(date) + "_" + recomment_type identify_in_hashname = "identify_in_" + str(date) submit_user_recomment = "recomment_" + submit_user + "_" + str(date) # 用户自推荐名单 results = r.hgetall(hash_name) if not results: return [] # search from user_profile to rich the show information recommend_list = set(r.hkeys(hash_name)) identify_in_list = set(r.hkeys("compute")) submit_user_recomment = set(r.hkeys(submit_user_recomment)) recomment_results = list(recommend_list - identify_in_list) recomment_results = list(set(recomment_results) - submit_user_recomment) if recomment_results: results = get_user_detail(date, recomment_results, 'show_in', recomment_type) else: results = [] #test ''' f = open('/home/user_portrait_0320/revised_user_portrait/user_portrait/user_portrait/recommentation/influence_recommend.csv', 'wb') writer = csv.writer(f) for item_result in results: writer.writerow(item_result) f.close() ''' return results
def recommentation_in(input_ts, recomment_type, submit_user): date = ts2datetime(input_ts) recomment_results = [] # read from redis results = [] hash_name = 'recomment_' + str(date) + "_" + recomment_type identify_in_hashname = "identify_in_" + str(date) submit_user_recomment = "recomment_" + submit_user + "_" + str( date) # 用户自推荐名单 results = r.hgetall(hash_name) if not results: return [] # search from user_profile to rich the show information recommend_list = set(r.hkeys(hash_name)) identify_in_list = set(r.hkeys("compute")) submit_user_recomment = set(r.hkeys(submit_user_recomment)) recomment_results = list(recommend_list - identify_in_list) recomment_results = list(set(recomment_results) - submit_user_recomment) if recomment_results: results = get_user_detail(date, recomment_results, 'show_in', recomment_type) else: results = [] return results
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i*DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait]) return result
def identify_compute(data): results = False compute_status = 1 hash_name = 'compute' uid2compute = r.hgetall(hash_name) for item in data: uid = item[1] result = r.hget(hash_name, uid) in_date = json.loads(result)[0] r.hset(hash_name, uid, json.dumps([in_date, compute_status])) return True
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search( index=profile_index_name, doc_type=profile_index_type, body={'query': { 'terms': { 'nick_name': uname_list } }}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [ exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found'] == False ] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list_pre = upload_data.split('\n') uname_list = [item.split('\r')[0] for item in uname_list_pre] uid_list = [] have_in_user_list = [] invalid_user_list = [] valid_uname_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) uname = profile_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) if len(invalid_user_list) != 0: return False, 'invalid user info', invalid_user_list #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] if not new_uid_list: return False, 'all user in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return False, 'all user in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify final submit user list final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_user_list, have_in_user_list, final_submit_user_list
def show_in_history(date): results = [] hash_name = 'recomment_'+str(date) r_results = r.hgetall(hash_name) if r_results: results = get_user_detail(date, r_results, 'show_in_history') return results
def show_in_history(date, user_type): results = [] #hash_name = 'recomment_'+str(date) identify_in_hashname = "identify_in_" + str(date) r_results = r.hgetall(identify_in_hashname) if r_results: results = get_user_detail(date, r_results, 'show_in_history', user_type) return results
def submit_identify_in_url(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] #step1: get uid list from input_data url url_list_pre = upload_data.split('\n') url_list = [item.split('\r')[0] for item in url_list_pre] uid_list = [] invalid_uid_list = [] have_in_uid_list = [] for url_item in url_list: try: #url_item = 'http://weibo.com/p/1002065727942146/album?.....' url_list = url_item.split('/') uid = url_list[4][-10:] uid_list.append(uid) except: invalid_uid_list.append(url_item) if len(invalid_uid_list)!=0: return False, 'invalid user info', invalid_uid_list #step2: identify uid list is not exist in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if len(in_uid_list)==0: return False, 'all user in' #step3: save hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify the final submit user final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
def new_identify_in(data, date, submit_user): in_status = 1 compute_status = 0 hashname_submit = "submit_recomment_" + date hashname_influence = "recomment_" + date + "_influence" hashname_sensitive = "recomment_" + date + "_sensitive" submit_user_recomment = "recomment_" + submit_user + "_" + str( date) # 用户自推荐名单 auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) # 系统自动推荐名单 for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] #status = item[2] if uid in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = (tmp['operation']).split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = "&".join(new_list) else: tmp = {"system": "0", "operation": submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, "0") return True
def get_attr(date): results = {} overview_result = r.hgetall(hash_name) for item in overview_result: value = overview_result[item] if isinstance(value, str): value = json.loads(value) results[item] = value return results
def identify_in(data): in_status = 1 compute_status = 0 compute_hash_name = 'compute' for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] status = item[2] value_string = [] identify_in_hashname = "identify_in_" + str(date) r.hset(identify_in_hashname, uid, in_status) if status == '1': in_date = date compute_status = '1' elif status == '2': in_date = date compute_status = '2' r.hset(compute_hash_name, uid, json.dumps([in_date, compute_status])) return True
def recommentation_in(input_ts, recomment_type): date = ts2datetime(input_ts) recomment_results = [] # read from redis results = [] hash_name = 'recomment_'+str(date) + "_" + recomment_type identify_in_hashname = "identify_in_" + str(date) results = r.hgetall(hash_name) if not results: return [] # search from user_profile to rich the show information recommend_list = set(r.hkeys(hash_name)) identify_in_list = set(r.hkeys("compute")) recomment_results = list(recommend_list - identify_in_list) if recomment_results: results = get_user_detail(date, recomment_results, 'show_in', recomment_type) else: results = [] return results
def identify_in(data): #in_status = 1 compute_status = 0 in_hash_name = 'recomment_' compute_hash_name = 'compute' for item in data: date = item[0] # identify the date form '2013-09-01' with web in_hash_key = in_hash_name + str(date) uid = item[1] status = item[2] value_string = [] r.hset(in_hash_key, uid, status) if status == '1': in_date = date compute_status = '1' elif status == '2': in_date = date compute_status = '2' r.hset(compute_hash_name, uid, json.dumps([in_date, compute_status])) return True
def new_identify_in(data, date, submit_user): in_status = 1 compute_status = 0 hashname_submit = "submit_recomment_" + date hashname_influence = "recomment_" + date + "_influence" hashname_sensitive = "recomment_" + date + "_sensitive" auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) # 系统自动推荐名单 for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] #status = item[2] if uid in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = (tmp['operation']).split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = "&".join(new_list) else: tmp = {"system":"0", "operation":submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) return True
def admin_recommentation_in(input_ts): date = ts2datetime(input_ts) recomment_results = [] identify_in_hashname = "identify_in_" + str(date) # read from redis results = [] hashname_submit = "submit_recomment_" + date results = r.hgetall(hashname_submit) if not results: return [] # search from user_profile to rich the show information submit_set = set(r.hkeys(hashname_submit)) idntify_in_set = set(r.hkeys("compute")) # 已入库用户名单 recomment_results = list(submit_set - idntify_in_set) #过滤一下 if recomment_results: results = get_user_detail(date, recomment_results, 'show_in', "sensitive", "admin") sorted_results = sorted(results, key=lambda x:x[-1], reverse=True) results = sorted_results else: results = [] return results
def recommentation_in_auto(search_date, submit_user): results = [] #run type ''' if RUN_TYPE == 1: now_date = ts2datetime(time.time() - DAY) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) ''' now_date = search_date recomment_hash_name = 'recomment_' + now_date + '_auto' recomment_influence_hash_name = 'recomment_' + now_date + '_influence' recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive' recomment_compute_hash_name = 'compute' #step1: get auto auto_result = r.hget(recomment_hash_name, 'auto') if auto_result: auto_user_list = json.loads(auto_result) else: auto_user_list = [] #step2: get admin user result admin_result = r.hget(recomment_hash_name, submit_user) if admin_result: admin_user_list = json.loads(admin_result) else: admin_user_list = [] #step3: get union user and filter compute/influence/sensitive union_user_auto_set = set(auto_user_list) | set(admin_user_list) influence_user = set(r.hkeys(recomment_influence_hash_name)) sensitive_user = set(r.hkeys(recomment_sensitive_hash_name)) compute_user = set(r.hkeys(recomment_compute_hash_name)) filter_union_user = union_user_auto_set - (influence_user | sensitive_user | compute_user) auto_user_list = list(filter_union_user) #step4: get user detail results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto') return results
def get_attr(date): results = {} overview_result = r.hgetall(hash_name) #print 'overview_result:', overview_result for item in overview_result: value = overview_result[item] if isinstance(value, str): value = json.loads(value) ''' if item in rank_field: value = value[:5] ''' results[item] = value #print 'overview result:', results return results
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] for line in line_list: uid = line[:10] if len(uid) == 10: uid_list.append(uid) #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list}, _source=False)['docs'] for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashtname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def recommentation_in(input_ts): date = ts2datetime(input_ts) recomment_results = [] # read from redis results = [] hash_name = 'recomment_'+str(date) results = r.hgetall(hash_name) if not results: return results # search from user_profile to rich the show information for item in results: status = results[item] if status=='0': recomment_results.append(item) if recomment_results: results = get_user_detail(date, recomment_results, 'show_in') else: results = [] return results
def submit_identify_in_url(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] #step1: get uid list from input_data url url_list = upload_data.split('\n') uid_list = [] for url_item in url_list: #url_item = 'weibo.com/p/1002065727942146/album?.....' url_list = url_item.split('/') uid = url_list[2][-10:] uid_list.append(uid) #step2: identify uid list is not exist in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) #step3: save hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] for line in line_list: uid = line[:10] if len(uid)==10: uid_list.append(uid) #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs'] for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashtname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation':submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def show_compute(date): results = [] hash_name = 'compute' r_results = r.hgetall(hash_name) input_data = {} #search user profile to inrich information if r_results and date!='all': for user in r_results: item = r_results[user] in_date = json.loads(item)[0] if in_date == date: input_data[user] = item if input_data: results = get_user_detail(date, input_data, 'show_compute') else: results = [] return results elif r_results and date=='all': results = get_user_detail(date, r_results, 'show_compute') return results else: return results
def get_user_detail(date, input_result, status, user_type="influence", auth=""): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: senstive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def admin_delete_submit_in(date, uid_list): status = True identify_in_hashname = 'identify_in_' + str(date) for uid in uid_list: r.hdel(identify_in_hashname, uid) return status
def get_retweeted_top(): overview_result = r.hgetall(hash_name) retweeted_top = json.loads(overview_result['top_retweeted_user']) return retweeted_top
def get_comment_top(): overview_result = r.hgetall(hash_name) comment_top = json.loads(overview_result['top_comment_user']) return comment_top
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] invalid_uid_list = [] for line in line_list: uid = line.split('\r')[0] #if len(uid)==10: # uid_list.append(uid) if uid != '': uid_list.append(uid) if len(invalid_uid_list)!=0: return False, 'invalid user info', invalid_uid_list #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] have_in_uid_list = [] try: exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs'] except: exist_portrait_result = [] if exist_portrait_result: for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) else: have_in_uid_list.append(exist_item['_id']) else: new_uid_list = uid_list #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) print 'new_uid_set:', new_uid_set print 'in_uid_set:', in_uid_set if len(in_uid_set)==0: return False, 'all user in' #identify the final add user final_submit_user_list = [] for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation':submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
def get_importance_top(): overview_result = r.hgetall(hash_name) importance_top = json.loads(overview_result['top_importance']) return importance_top
sensitive_words = sensitive_dict.keys() else: sensitive_words = [] if sensitive_history_dict.get('fields',0): #print sensitive_history_dict['fields'][sensitive_string][0] #print top_sensitive sensitive_value = math.log(sensitive_history_dict['fields'][sensitive_string][0]/float(top_sensitive)*9+1, 10)*100 #print "sensitive_value", sensitive_value else: sensitive_value = 0 results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words, sensitive_value]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive":
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i * DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str( iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget( index=bci_index_name, doc_type="bci", body={'ids': submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget( index='weibo_user', doc_type='user', body={'ids': submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log( influence / max_evaluate_influ['user_index'] * 9 + 1, 10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([ iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait ]) return result
def get_online_pattern(): overview_result = r.hgetall(hash_name) online_pattern = json.loads(overview_result['online_pattern_top']) return online_pattern
def get_geo_top(): overview_result = r.hgetall(hash_name) geo_top = json.loads(overview_result['activity_geo_top']) return geo_top
def get_hashtag_top(): overview_result = r.hgetall(hash_name) hashtag_top = json.loads(overview_result['hashtag_top']) return hashtag_top
def get_activeness_top(): overview_result = r.hgetall(hash_name) activeness_top = json.loads(overview_result['top_activeness']) return activeness_top
def get_user_detail(date, input_result, status, user_type="influence", auth=""): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: sensitive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results