def filter_recommend(top_user_set):
    recommend_keys = r.hkeys('recommend')
    recommend_list = []
    for key in recommend_keys:
        recommend_list.extend(json.loads(r.hget('recommend', key)))
    results = set(top_user_set) - set(recommend_list)

    return results
def filter_recommend(top_user_set):
    recommend_keys = r.hkeys('recommend')
    recommend_list = []
    for key in recommend_keys:
        recommend_list.extend(json.loads(r.hget('recommend', key)))
    results = set(top_user_set) - set(recommend_list)

    return results
Пример #3
0
def change_status_computed(mapping_dict):
    hash_name = 'compute'
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '4'
        in_date = user_list[0]
        new_mapping_dict[uid] = json.dumps(user_list)
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_' + str(in_date)
        sensitive_hashname = 'identify_in_sensitive_' + str(in_date)
        manual_hashname = "identify_in_manual_" + str(in_date)
        tmp = r.hget(influence_hashname, uid)
        tmp1 = r.hget(sensitive_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '4')
        elif tmp1:
            r.hset(sensitive_hashname, uid, '4')
        else:
            r.hset(manual_hashname, uid, '4')
    r.hmset(hash_name, new_mapping_dict)
def change_status_computed(mapping_dict):
    hash_name = 'compute'
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '4'
        in_date = user_list[0]
        new_mapping_dict[uid] = json.dumps(user_list)
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_'+str(in_date)
        sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
        manual_hashname = "identify_in_manual_"+str(in_date)
        tmp = r.hget(influence_hashname, uid)
        tmp1 = r.hget(sensitive_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '4')
        elif tmp1:
            r.hset(sensitive_hashname, uid, '4')
        else:
            r.hset(manual_hashname, uid, '4')
    r.hmset(hash_name, new_mapping_dict)
def change_status_compute_fail(mapping_dict):
    hash_name = 'compute'
    status = 1
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '1'
        new_mapping_dict[uid] = json.dumps(user_list)
        in_date = user_list[0]
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_'+str(in_date)
        sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
        tmp = r.hget(influence_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '1')
        else:
            r.hset(sensitive_hashname, uid, '1')
    r.hmset(hash_name, new_mapping_dict)
Пример #6
0
def get_operate_information():
    result = dict()
    now_ts = time.time()
    date = ts2datetime(now_ts - 24*3600)
    #test
    date = '2013-09-07'
    delete_date = ''.join(date.split('-'))
    #test
    #delete_date = '20150727'
    result['in_count'] = len(r_recomment.hkeys('recomment_'+str(date)))
    out_count_list = r_recomment.hget('recommend_delete_list', delete_date)
    #print 'out_count_list:', out_count_list
    if out_count_list:
        result['out_count'] = len(json.loads(out_count_list))
    else:
        result['out_count'] = 0
    compute_list = r_recomment.hkeys('compute')
    '''
    if compute_list:
        result['compute'] = len(compute_list)
    '''
    #print 'operate compute:', result
    return result
Пример #7
0
def get_operate_information():
    result = dict()
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    date = ts2datetime(now_ts - DAY)
    delete_date = ''.join(date.split('-'))
    result['in_count'] = len(r_recomment.hkeys('recomment_' + str(date)))
    out_count_list = r_recomment.hget('recommend_delete_list', delete_date)
    if out_count_list:
        result['out_count'] = len(json.loads(out_count_list))
    else:
        result['out_count'] = 0
    compute_list = r_recomment.hkeys('compute')

    if compute_list:
        result['compute'] = len(compute_list)
    else:
        result['compute'] = 0

    return result
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2':
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing
            #revise identify_in_date
            influence_hashname = 'identify_in_influence_'+str(in_date)
            sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
            tmp = r.hget(influence_hashname, uid)
            if tmp:
                r.hset(influence_hashname, uid, '3')
            else:
                r.hset(sensitive_hashname, uid, '3')
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            #mark status from 1 to 3 as identify_compute to computing
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
            
            if compute_status==True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #deal user no weibo to compute portrait attribute
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps([in_date, '2'])
                r.hmset(change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}
            
    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
        if compute_status==True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #deal user no weibo to compute portrait attribute
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '2'])
            r.hmset(change_mapping_dict)
Пример #9
0
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2':
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date,
                                            '3'])  # mark status:3 computing
            #revise identify_in_date
            influence_hashname = 'identify_in_influence_' + str(in_date)
            sensitive_hashname = 'identify_in_sensitive_' + str(in_date)
            manual_hashname = "identify_in_manual_" + str(in_date)
            tmp = r.hget(influence_hashname, uid)
            tmp1 = r.hget(sensitive_hashname, uid)
            if tmp:
                r.hset(influence_hashname, uid, '3')
            elif tmp1:
                r.hset(sensitive_hashname, uid, '3')
            else:
                r.hset(manual_hashname, uid, '3')
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            #mark status from 1 to 3 as identify_compute to computing
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts)

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #deal user no weibo to compute portrait attribute
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(
                    user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps(
                        [in_date, '2'])
                r.hmset(change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict,
                                                     user_weibo_dict,
                                                     online_pattern_dict,
                                                     character_start_ts)
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #deal user no weibo to compute portrait attribute
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(
                user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '2'])
            r.hmset(change_mapping_dict)
# every 30 minutes reexecute this program

import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute
reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
Пример #11
0
                    sensitive_words = sensitive_dict.keys()
                else:
                    sensitive_words = []
                if sensitive_history_dict.get('fields',0):
                    #print sensitive_history_dict['fields'][sensitive_string][0]
                    #print top_sensitive
                    sensitive_value = math.log(sensitive_history_dict['fields'][sensitive_string][0]/float(top_sensitive)*9+1, 10)*100
                    #print "sensitive_value", sensitive_value
                else:
                    sensitive_value = 0
                results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words, sensitive_value])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence])
                if auth:
                    hashname_submit = "submit_recomment_" + date
                    tmp_data = json.loads(r.hget(hashname_submit, uid))
                    recommend_list = (tmp_data['operation']).split('&')
                    admin_list = []
                    admin_list.append(tmp_data['system'])
                    admin_list.append(list(set(recommend_list)))
                    admin_list.append(len(recommend_list))
    return results

def get_evaluate_max(index_name):
    max_result = {}
    index_type = 'bci'
    evaluate_index = ['user_index']
    for evaluate in evaluate_index:
        query_body = {
        'query':{
            'match_all':{}
import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute

reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()-24*3600).replace('-', '')
temp = r.hget('compute_appoint', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    identify_in_dict = dict()
    #test
    count = 0
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if int(status) == 1: #imme
            #test
            count += 1
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing
            #revise identify_in_date
            influence_hashname = 'identify_in_influence_'+str(in_date)
            sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
            manual_hashname = "identify_in_manual_"+str(in_date)
            tmp = r.hget(influence_hashname, uid)
            tmp1 = r.hget(sensitive_hashname, uid)
            if tmp:
                r.hset(influence_hashname, uid, '3')
            elif tmp1:
                r.hset(sensitive_hashname, uid, '3')
            else:
                r.hset(manual_hashname, uid, '3')
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            print iter_user_list
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
            
            if compute_status==True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)
            
            #when uid user no weibo at latest week to change compute status to 1
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps([in_date, '1'])
                r.hmset('compute', change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}
            
    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        print 'iter_user_list:', iter_user_list
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
        #compute text attribute
        print 'user_weibo_dict:', len(user_weibo_dict)
        compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
        if compute_status==True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #when uid user no weibo at latest week to change compute status to 1
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '1'])
            r.hmset('compute', change_mapping_dict)
Пример #14
0
# every 30 minutes reexecute this program

import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute
reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_' + str(date), uid,
                   '3')  # finish comoute