def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search( index=profile_index_name, doc_type=profile_index_type, body={'query': { 'terms': { 'nick_name': uname_list } }}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [ exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found'] == False ] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def new_identify_in(data, date, submit_user): in_status = 1 compute_status = 0 hashname_submit = "submit_recomment_" + date hashname_influence = "recomment_" + date + "_influence" hashname_sensitive = "recomment_" + date + "_sensitive" submit_user_recomment = "recomment_" + submit_user + "_" + str( date) # 用户自推荐名单 auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) # 系统自动推荐名单 for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] #status = item[2] if uid in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = (tmp['operation']).split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = "&".join(new_list) else: tmp = {"system": "0", "operation": submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, "0") return True
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list_pre = upload_data.split('\n') uname_list = [item.split('\r')[0] for item in uname_list_pre] uid_list = [] have_in_user_list = [] invalid_user_list = [] valid_uname_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) uname = profile_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) if len(invalid_user_list) != 0: return False, 'invalid user info', invalid_user_list #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] if not new_uid_list: return False, 'all user in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return False, 'all user in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify final submit user list final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_user_list, have_in_user_list, final_submit_user_list
def submit_identify_in_url(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] #step1: get uid list from input_data url url_list_pre = upload_data.split('\n') url_list = [item.split('\r')[0] for item in url_list_pre] uid_list = [] invalid_uid_list = [] have_in_uid_list = [] for url_item in url_list: try: #url_item = 'http://weibo.com/p/1002065727942146/album?.....' url_list = url_item.split('/') uid = url_list[4][-10:] uid_list.append(uid) except: invalid_uid_list.append(url_item) if len(invalid_uid_list)!=0: return False, 'invalid user info', invalid_uid_list #step2: identify uid list is not exist in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if len(in_uid_list)==0: return False, 'all user in' #step3: save hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify the final submit user final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
def identify_compute(data): results = False compute_status = 1 hash_name = 'compute' uid2compute = r.hgetall(hash_name) for item in data: uid = item[1] result = r.hget(hash_name, uid) in_date = json.loads(result)[0] r.hset(hash_name, uid, json.dumps([in_date, compute_status])) return True
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def identify_in(data): in_status = 1 compute_status = 0 compute_hash_name = 'compute' for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] status = item[2] value_string = [] identify_in_hashname = "identify_in_" + str(date) r.hset(identify_in_hashname, uid, in_status) if status == '1': in_date = date compute_status = '1' elif status == '2': in_date = date compute_status = '2' r.hset(compute_hash_name, uid, json.dumps([in_date, compute_status])) return True
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] for line in line_list: uid = line[:10] if len(uid) == 10: uid_list.append(uid) #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list}, _source=False)['docs'] for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashtname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def identify_in(data): #in_status = 1 compute_status = 0 in_hash_name = 'recomment_' compute_hash_name = 'compute' for item in data: date = item[0] # identify the date form '2013-09-01' with web in_hash_key = in_hash_name + str(date) uid = item[1] status = item[2] value_string = [] r.hset(in_hash_key, uid, status) if status == '1': in_date = date compute_status = '1' elif status == '2': in_date = date compute_status = '2' r.hset(compute_hash_name, uid, json.dumps([in_date, compute_status])) return True
def new_identify_in(data, date, submit_user): in_status = 1 compute_status = 0 hashname_submit = "submit_recomment_" + date hashname_influence = "recomment_" + date + "_influence" hashname_sensitive = "recomment_" + date + "_sensitive" auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) # 系统自动推荐名单 for item in data: date = item[0] # identify the date form '2013-09-01' with web uid = item[1] #status = item[2] if uid in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = (tmp['operation']).split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = "&".join(new_list) else: tmp = {"system":"0", "operation":submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) return True
def submit_identify_in_url(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] #step1: get uid list from input_data url url_list = upload_data.split('\n') uid_list = [] for url_item in url_list: #url_item = 'weibo.com/p/1002065727942146/album?.....' url_list = url_item.split('/') uid = url_list[2][-10:] uid_list.append(uid) #step2: identify uid list is not exist in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) #step3: save hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] for line in line_list: uid = line[:10] if len(uid)==10: uid_list.append(uid) #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs'] for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashtname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation':submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] invalid_uid_list = [] for line in line_list: uid = line.split('\r')[0] #if len(uid)==10: # uid_list.append(uid) if uid != '': uid_list.append(uid) if len(invalid_uid_list)!=0: return False, 'invalid user info', invalid_uid_list #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] have_in_uid_list = [] try: exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs'] except: exist_portrait_result = [] if exist_portrait_result: for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) else: have_in_uid_list.append(exist_item['_id']) else: new_uid_list = uid_list #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) print 'new_uid_set:', new_uid_set print 'in_uid_set:', in_uid_set if len(in_uid_set)==0: return False, 'all user in' #identify the final add user final_submit_user_list = [] for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation':submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list