def main():
    # read the uid list
    uid_list = read_uid_list()
    # get user weibo 7day {user:[weibos]}
    user_weibo_dict = read_user_weibo(uid_list)
    uid_list = user_weibo_dict.keys()
    #print 'uid_list:', len(uid_list)
    #print 'user weibo dict:', len(user_weibo_dict)
    flow_result = get_flow_information(uid_list)
    register_result = get_profile_information(uid_list)
    # compute text attribute
    bulk_action = []
    for user in user_weibo_dict:
        weibo_list = user_weibo_dict[user]
        uname = weibo_list[0]['uname']
        results = compute_text_attribute(user, weibo_list)
        results['uid'] = str(user)
        flow_dict = flow_result[str(user)]
        results = dict(results, **flow_dict)
        # deal to the bulk action
        user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['activity_geo']}
        evaluation_index = get_evaluate_index(user_info, status='insert')
        results = dict(results, **evaluation_index)
        #print 'register_result:', register_result
        register_dict = register_result[str(user)]
        results = dict(results, **register_dict)
        action = {'index':{'_id': str(user)}}
        bulk_action.extend([action, results])
    status = save_user_results(bulk_action)
    return True # save by bulk
示例#2
0
def update_atttribute_week():
    # scan the user_portrait and bulk action to update
    status = False
    results = {}
    count = 0
    index_name = 'user_portriat'
    index_type = 'user'
    s_re = scan(es, query={'query':{'match_all':{}}, 'size':1000}, index=index_name, doc_type=index_type)
    while True:
        bulk_action = []
        uid_list = []
        while True:
            try:
                scan_re = s_re.next()['_source']
                count += 1
            except StopIteration:
                print 'all done'
                sys.exit(0)
            except Exception, r:
                print Exception, r
                sys.exit(0)
            uid = scan_re['uid']
            uid_list.append(uid)
            if count%1000==0:
                break
        if uid_list:
            # get user list weibo dict from weibo api
            user_weibo_dict = read_user_weibo(uid_list)
            status = compute2in(uid_list, user_weibo_dict, status='update')
            print 'status:', status
示例#3
0
def compute_attribute(uid_list=[]):
    # test
    user_weibo_dict = read_user_weibo(uid_list)
    uid_list = user_weibo_dict.keys()
    flow_result = get_flow_information(uid_list)
    register_result = get_profile_information(uid_list)
    bulk_action = []
    count = 0
    count_list = set()
    for user in uid_list:
        weibo_list = user_weibo_dict[user]
        uname = weibo_list[0]['uname']
        results = compute_text_attribute(user, weibo_list)
        results['uname'] = uname
        results['uid'] = str(user)
        flow_dict = flow_result[str(user)]
        results.update(flow_dict)
        user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['geo_string']}
        evaluation_index = get_evaluate_index(user_info, status='insert')
        results.update(evaluation_index)
        register_dict = register_result[user]
        results.update(register_dict)
        action = {'index':{'_id':str(user)}}
        bulk_action.extend([action, results])
        count_list.add(user)
        count += 1
        if count % 200 == 0:
            es.bulk(bulk_action, index=index_name, doc_type="user", timeout=60)
            bulk_action = []
            print count
    if bulk_action:
        status = save_user_results(bulk_action)
    return "1"
示例#4
0
def update_portrait():
    user_weibo_dict = read_user_weibo()
    uid_list = user_weibo_dict.keys()
    flow_result = get_flow_information(uid_list)
    bulk_action = []
    count = 0
    for user in uid_list:
        action = {'update':{'_id': str(user)}}
        result = {'doc':flow_result[user]}
        bulk_action.extend([action, result])
        count += 1
        if count % 500 == 0:
            es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60)
            bulk_action = []
            print count
    if bulk_action:
        es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60)
    return '1'