示例#1
0
def deal_bulk_action(user_info_list, fansnum_max):
    start_ts = time.time()
    uid_list = user_info_list.keys()
    #acquire bulk user weibo data
    if WEIBO_API_INPUT_TYPE == 0:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
            uid_list)
    else:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
            uid_list)
    #compute attribute--keywords, topic, online_pattern
    #get user topic results by bulk action
    topic_results_dict, topic_results_label = topic_classfiy(
        uid_list, user_keywords_dict)
    #get bulk action
    bulk_action = []
    for uid in uid_list:
        results = {}
        results['uid'] = uid
        #add user topic attribute
        user_topic_dict = topic_results_dict[uid]
        user_label_dict = topic_results_label[uid]
        results['topic'] = json.dumps(user_topic_dict)
        results['topic_string'] = topic_en2ch(user_label_dict)
        #add user keywords attribute
        keywords_dict = user_keywords_dict[uid]
        keywords_top50 = sorted(keywords_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)[:50]
        keywords_top50_string = '&'.join(
            [keyword_item[0] for keyword_item in keywords_top50])
        results['keywords'] = json.dumps(keywords_top50)
        results['keywords_string'] = keywords_top50_string
        #add online_pattern
        user_online_pattern = online_pattern_dict[uid]
        results['online_pattern'] = json.dumps(user_online_pattern)
        try:
            results['online_pattern_aggs'] = '&'.join(
                user_online_pattern.keys())
        except:
            results['online_pattern_aggs'] = ''
        #add user importance
        user_domain = user_info_list[uid]['domain'].encode('utf-8')
        user_fansnum = user_info_list[uid]['fansnum']
        results['importance'] = get_importance(user_domain,
                                               results['topic_string'],
                                               user_fansnum, fansnum_max)
        #bulk action
        action = {'update': {'_id': uid}}
        bulk_action.extend([action, {'doc': results}])
    es_user_portrait.bulk(bulk_action,
                          index=portrait_index_name,
                          doc_type=portrait_index_type)
    end_ts = time.time()
    #log_should_delete
    #print '%s sec count %s' % (end_ts - start_ts, len(uid_list))
    #log_should_delete
    start_ts = end_ts
示例#2
0
def deal_bulk_action(user_info_list, fansnum_max):
    start_ts = time.time()
    uid_list = user_info_list.keys()
    # acquire bulk user weibo data
    if WEIBO_API_INPUT_TYPE == 0:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
            uid_list
        )
    else:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(uid_list)
    # compute attribute--keywords, topic, online_pattern
    # get user topic results by bulk action
    topic_results_dict, topic_results_label = topic_classfiy(uid_list, user_keywords_dict)
    # get bulk action
    bulk_action = []
    for uid in uid_list:
        results = {}
        results["uid"] = uid
        # add user topic attribute
        user_topic_dict = topic_results_dict[uid]
        user_label_dict = topic_results_label[uid]
        results["topic"] = json.dumps(user_topic_dict)
        results["topic_string"] = topic_en2ch(user_label_dict)
        # add user keywords attribute
        keywords_dict = user_keywords_dict[uid]
        keywords_top50 = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True)[:50]
        keywords_top50_string = "&".join([keyword_item[0] for keyword_item in keywords_top50])
        results["keywords"] = json.dumps(keywords_top50)
        results["keywords_string"] = keywords_top50_string
        # add online_pattern
        user_online_pattern = online_pattern_dict[uid]
        results["online_pattern"] = json.dumps(user_online_pattern)
        try:
            results["online_pattern_aggs"] = "&".join(user_online_pattern.keys())
        except:
            results["online_pattern_aggs"] = ""
        # add user importance
        user_domain = user_info_list[uid]["domain"].encode("utf-8")
        user_fansnum = user_info_list[uid]["fansnum"]
        results["importance"] = get_importance(user_domain, results["topic_string"], user_fansnum, fansnum_max)
        # bulk action
        action = {"update": {"_id": uid}}
        bulk_action.extend([action, {"doc": results}])
    es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type)
    end_ts = time.time()
    # log_should_delete
    # print '%s sec count %s' % (end_ts - start_ts, len(uid_list))
    # log_should_delete
    start_ts = end_ts
def deal_bulk_action(user_info_list, fansnum_max):
    start_ts = time.time()
    uid_list = user_info_list.keys()
    #acquire bulk user weibo data
    if WEIBO_API_INPUT_TYPE == 0:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(uid_list)
    else:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(uid_list)
    #compute attribute--keywords, topic, online_pattern            
    #get user topic results by bulk action
    topic_results_dict, topic_results_label = topic_classfiy(uid_list, user_keywords_dict)
    
    #update school attribute---is_school/school_string/school_dict
    school_results_dict = get_school(uid_list)
    #get bulk action
    bulk_action = []
    for uid in uid_list:
        results = {}
        results['uid'] = uid
        results['is_school'] = school_results_dict[uid]['is_school']
        results['school_string'] = school_results_dict[uid]['school_string']
        results['school_dict'] = school_results_dict[uid]['school_dict']
        #print 'is_school, school_string, school_dict:', results['is_school'],type(results['is_school']) ,results['school_string'],type(results['school_string']), results['school_dict'], type(results['school_dict'])
        #add user topic attribute
        user_topic_dict = topic_results_dict[uid]
        user_label_dict = topic_results_label[uid]
        results['topic'] = json.dumps(user_topic_dict)
        results['topic_string'] = topic_en2ch(user_label_dict)
        #add user keywords attribute
        try:
            keywords_dict = user_keywords_dict[uid]
        except:
            keywords_dict = {}
        keywords_top50 = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:50]
        keywords_top50_string = '&'.join([keyword_item[0] for keyword_item in keywords_top50])
        results['keywords'] = json.dumps(keywords_top50)
        results['keywords_string'] = keywords_top50_string
        #add online_pattern
        try:
            user_online_pattern = json.dumps(online_pattern_dict[uid])
        except:
            user_online_pattern = json.dumps({})
        results['online_pattern'] = user_online_pattern
        try:
            results['online_pattern_aggs'] = '&'.join(user_online_pattern.keys())
        except:
            results['online_pattern_aggs'] = ''
        #add user importance
        user_domain = user_info_list[uid]['domain'].encode('utf-8')
        user_fansnum = user_info_list[uid]['fansnum']
        results['importance'] = get_importance(user_domain, results['topic_string'], user_fansnum, fansnum_max)
        
        #bulk action
        action = {'update':{'_id': uid}}
        bulk_action.extend([action, {'doc': results}])
    #print 'bulk_action:', bulk_action
    es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type)
    end_ts = time.time()
    #log_should_delete
    #print '%s sec count %s' % (end_ts - start_ts, len(uid_list))
    #log_should_delete
    start_ts = end_ts
示例#4
0
def deal_bulk_action(user_info_list, fansnum_max):
    start_ts = time.time()
    uid_list = user_info_list.keys()
    #acquire bulk user weibo data
    if WEIBO_API_INPUT_TYPE == 0:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(uid_list)
    else:
        user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(uid_list)
    #compute attribute--keywords, topic, online_pattern            
    #get user topic results by bulk action
    topic_results_dict, topic_results_label = topic_classfiy(uid_list, user_keywords_dict)
    domain_results = domain_classfiy(uid_list, user_keywords_dict)
    politics_results = political_classify(uid_list, user_keywords_dict)

    #update school attribute---is_school/school_string/school_dict
    #school_results_dict = get_school(uid_list)
    #get bulk action
    bulk_action = []
    for uid in uid_list:
        results = {}
        results['uid'] = uid
        #results['is_school'] = school_results_dict[uid]['is_school']
        #results['school_string'] = school_results_dict[uid]['school_string']
        #results['school_dict'] = school_results_dict[uid]['school_dict']
        #print 'is_school, school_string, school_dict:', results['is_school'],type(results['is_school']) ,results['school_string'],type(results['school_string']), results['school_dict'], type(results['school_dict'])
        #add user topic attribute
        user_topic_dict = topic_results_dict[uid]
        user_label_dict = topic_results_label[uid]
        results['topic'] = json.dumps(user_topic_dict)
        results['topic_string'] = topic_en2ch(user_label_dict)

        #add user domain attribute
        user_domain_dict = domain_results[uid]
        domain_list = domain_en2ch(user_domain_dict)
        if domain_list:
            results['domain_list'] = json.dumps(domain_list)
            results['domain'] = domain_list[0]
        else:
            results['domain'] = "其他"
            results['domain_list'] = json.dumps(["其他"])

        politics_label = politics_results[uid]
        results['politics'] = politics_en2ch(politics_label)

        #add user keywords attribute
        try:
            keywords_dict = user_keywords_dict[uid]
        except:
            keywords_dict = {}
        keywords_top50 = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:50]
        keywords_top50_string = '&'.join([keyword_item[0] for keyword_item in keywords_top50])
        results['keywords'] = json.dumps(keywords_top50)
        results['keywords_string'] = keywords_top50_string
        #add online_pattern
        try:
            user_online_pattern = json.dumps(online_pattern_dict[uid])
        except:
            user_online_pattern = json.dumps({})
        results['online_pattern'] = user_online_pattern
        try:
            results['online_pattern_aggs'] = '&'.join(user_online_pattern.keys())
        except:
            results['online_pattern_aggs'] = ''
        #add user importance
        user_domain = user_info_list[uid]['domain'].encode('utf-8')
        user_fansnum = user_info_list[uid]['fansnum']
        results['importance'] = get_importance(user_domain, results['topic_string'], user_fansnum, fansnum_max)
        # politics
        politics_label = politics_results[user]
        results['politics'] = politics_en2ch(politics_label)
        #bulk action
        action = {'update':{'_id': uid}}
        bulk_action.extend([action, {'doc': results}])
    print 'bulk_action:', bulk_action
    #es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type)
    end_ts = time.time()
    #log_should_delete
    #print '%s sec count %s' % (end_ts - start_ts, len(uid_list))
    #log_should_delete
    start_ts = end_ts