def main(): # read the uid list uid_list = read_uid_list() # get user weibo 7day {user:[weibos]} user_weibo_dict = read_user_weibo(uid_list) uid_list = user_weibo_dict.keys() #print 'uid_list:', len(uid_list) #print 'user weibo dict:', len(user_weibo_dict) flow_result = get_flow_information(uid_list) register_result = get_profile_information(uid_list) # compute text attribute bulk_action = [] for user in user_weibo_dict: weibo_list = user_weibo_dict[user] uname = weibo_list[0]['uname'] results = compute_text_attribute(user, weibo_list) results['uid'] = str(user) flow_dict = flow_result[str(user)] results = dict(results, **flow_dict) # deal to the bulk action user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['activity_geo']} evaluation_index = get_evaluate_index(user_info, status='insert') results = dict(results, **evaluation_index) #print 'register_result:', register_result register_dict = register_result[str(user)] results = dict(results, **register_dict) action = {'index':{'_id': str(user)}} bulk_action.extend([action, results]) status = save_user_results(bulk_action) return True # save by bulk
def update_atttribute_week(): # scan the user_portrait and bulk action to update status = False results = {} count = 0 index_name = 'user_portriat' index_type = 'user' s_re = scan(es, query={'query':{'match_all':{}}, 'size':1000}, index=index_name, doc_type=index_type) while True: bulk_action = [] uid_list = [] while True: try: scan_re = s_re.next()['_source'] count += 1 except StopIteration: print 'all done' sys.exit(0) except Exception, r: print Exception, r sys.exit(0) uid = scan_re['uid'] uid_list.append(uid) if count%1000==0: break if uid_list: # get user list weibo dict from weibo api user_weibo_dict = read_user_weibo(uid_list) status = compute2in(uid_list, user_weibo_dict, status='update') print 'status:', status
def compute_attribute(uid_list=[]): # test user_weibo_dict = read_user_weibo(uid_list) uid_list = user_weibo_dict.keys() flow_result = get_flow_information(uid_list) register_result = get_profile_information(uid_list) bulk_action = [] count = 0 count_list = set() for user in uid_list: weibo_list = user_weibo_dict[user] uname = weibo_list[0]['uname'] results = compute_text_attribute(user, weibo_list) results['uname'] = uname results['uid'] = str(user) flow_dict = flow_result[str(user)] results.update(flow_dict) user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['geo_string']} evaluation_index = get_evaluate_index(user_info, status='insert') results.update(evaluation_index) register_dict = register_result[user] results.update(register_dict) action = {'index':{'_id':str(user)}} bulk_action.extend([action, results]) count_list.add(user) count += 1 if count % 200 == 0: es.bulk(bulk_action, index=index_name, doc_type="user", timeout=60) bulk_action = [] print count if bulk_action: status = save_user_results(bulk_action) return "1"
def update_portrait(): user_weibo_dict = read_user_weibo() uid_list = user_weibo_dict.keys() flow_result = get_flow_information(uid_list) bulk_action = [] count = 0 for user in uid_list: action = {'update':{'_id': str(user)}} result = {'doc':flow_result[user]} bulk_action.extend([action, result]) count += 1 if count % 500 == 0: es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60) bulk_action = [] print count if bulk_action: es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60) return '1'