Exemplo n.º 1
0
def get_user_geo(uid):
    result = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = dict()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(now_date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('new_ip_'+str(ts), uid)
        if results:
            ip_dict = json.loads(results)
            for ip in ip_dict:
                ip_count = len(ip_dict[ip].split('&'))
                try:
                    user_ip_result[ip] += ip_count
                except:
                    user_ip_result[ip] = ip_count
    user_geo_dict = ip2geo(user_ip_result)
    user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)

    return user_geo_result
Exemplo n.º 2
0
def get_user_weibo(uid):
    result = []
    #use to test
    datestr = '2013-09-02'
    end_ts = datetime2ts(datestr)
    #real way to get datestr and ts_segment
    '''
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    ts_segment = (int((now_ts - now_date_ts) / 3600)) % 24
    end_ts = now_date_ts + ts_segment * 3600
    '''
    file_list = set(os.listdir(DEFAULT_LEVELDBPATH))
    for i in range(24 * 7, 0, -1):
        ts = end_ts - i * 3600
        datestr = ts2datetime(ts)
        ts_segment = (int((ts - datetime2ts(datestr)) / 3600)) % 24 + 1
        leveldb_folder = datestr + str(ts_segment)

        if leveldb_folder in file_list:
            leveldb_bucket = dynamic_leveldb(leveldb_folder)
            try:
                user_weibo = leveldb_bucket.Get(uid)
                weibo_list = json.loads(user_weibo)
                result.extend(weibo_list)
            except:
                pass

    return result
Exemplo n.º 3
0
def get_user_ip(uid):
    flow_text_index_list = []
    now_timestamp = datetime2ts(ts2datetime(time.time()))
    if RUN_TYPE == 0:
        now_timestamp = datetime2ts(RUN_TEST_TIME)
    for i in range(7, 0, -1):
        iter_date = ts2datetime(now_timestamp - DAY * i)
        flow_text_index_list.append(flow_text_index_name_pre + iter_date)

    weibo_all = es_flow_text.search(index=flow_text_index_list,
                                    doc_type=flow_text_index_type,
                                    body={
                                        'query': {
                                            'filtered': {
                                                'filter': {
                                                    'term': {
                                                        'uid': uid
                                                    }
                                                }
                                            }
                                        },
                                        'size': 10,
                                    })['hits']['hits']
    ip = weibo_all[0]["_source"]["ip"]
    return ip
Exemplo n.º 4
0
def search_sentiment_all_portrait(start_date, end_date, time_segment):
    sentiment_ts_count_dict = {}
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)
    search_date_list = []
    domain_list = domain_en2ch_dict.keys()
    for i in range(start_ts, end_ts + DAY, DAY):
        iter_date = ts2datetime(i)
        search_date_list.append(iter_date)
    for sentiment in sentiment_type_list:
        sentiment_ts_count_dict[sentiment] = []
        for date_item in search_date_list:
            ts_count_result_list = []
            for domain in domain_list:
                iter_r_name = r_domain_sentiment_pre + date_item + '_' + sentiment + '_' + domain
                #get ts_count_dict in one day
                ts_count_result = R_DOMAIN_SENTIMENT.hgetall(iter_r_name)
                ts_count_result_list.append(ts_count_result)
            #union all domain to get all portrait
            all_ts_count_result = union_dict(ts_count_result_list)
            #get x and y list by timesegment
            new_ts_count_dict = get_new_ts_count_dict(all_ts_count_result, time_segment, date_item)
            sort_new_ts_count = sorted(new_ts_count_dict.items(), key=lambda x:x[0])
            sentiment_ts_count_dict[sentiment].extend(sort_new_ts_count)
    return sentiment_ts_count_dict
Exemplo n.º 5
0
def new_get_influence_trend(uid, time_segment):
    results = {}
    try:
        influence_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_INFLUENCE, doc_type=COPY_USER_PORTRAIT_INFLUENCE_TYPE,\
                id=uid)['_source']
        print ES_COPY_USER_PORTRAIT, COPY_USER_PORTRAIT_INFLUENCE, COPY_USER_PORTRAIT_INFLUENCE_TYPE, uid
        print influence_history
    except:
        influence_history = {}
    if influence_history:
        results = get_evaluate_trend(influence_history, 'bci')
    else:
        results = {}
    print results
    #deal results for situation---server power off
    new_time_list = []
    new_count_list = []
    new_results = {}
    now_time_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_time_ts))
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    for i in range(time_segment, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        try:
            date_count = results[iter_date_ts]
        except:
            date_count = 0
        new_time_list.append(iter_date_ts)
        new_count_list.append(date_count)
    new_results = {'timeline': new_time_list, 'evaluate_index': new_count_list}
    return new_results
Exemplo n.º 6
0
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    tmp_ts = str(datetime2ts(date) - DAY)
    sensitive_string = "sensitive_score_" + tmp_ts
    query_sensitive_body = {
        "query":{
            "match_all":{}
        },
        "size":1,
        "sort":{sensitive_string:{"order":"desc"}}
    }
    try:
        top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
        top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
    except Exception, reason:
        print Exception, reason
        top_sensitive = 400
Exemplo n.º 7
0
def get_psycho_status(uid_list):
    results = {}
    uid_sentiment_dict = {}
    #time for es_flow_text
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #run_type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0, WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False,  fields=['uid', 'sentiment'])['hits']['hits']
        except:
            flow_text_exist = []
        for flow_text_item in flow_text_exist:
            uid = flow_text_item['fields']['uid'][0]
            sentiment = flow_text_item['fields']['sentiment'][0]
            if uid in uid_sentiment_dict:
                try:
                    uid_sentiment_dict[uid][str(sentiment)] += 1
                except:
                    uid_sentiment_dict[uid][str(sentiment)] = 1
            else:
                uid_sentiment_dict[uid] = {str(sentiment): 1}
    #compute first and second psycho_status
    for uid in uid_list:
        results[uid] = {'first': {}, 'second': {}}
        try:
            user_sentiment_result = uid_sentiment_dict[uid]
        except:
            user_sentiment_result = {}
        all_count = sum(user_sentiment_result.values())
        #compute second level sentiment---negative type sentiment
        second_sentiment_count_list = [
            user_sentiment_result[item] for item in user_sentiment_result
            if item in SENTIMENT_SECOND
        ]
        second_sentiment_all_count = sum(second_sentiment_count_list)
        for sentiment_item in SENTIMENT_SECOND:
            try:
                results[uid]['second'][sentiment_item] = float(
                    user_sentiment_result[sentiment_item]) / all_count
            except:
                results[uid]['second'][sentiment_item] = 0
        #compute first level sentiment---middle, postive, negative
        user_sentiment_result['7'] = second_sentiment_all_count
        for sentiment_item in SENTIMENT_FIRST:
            try:
                sentiment_ratio = float(
                    user_sentiment_result[sentiment_item]) / all_count
            except:
                sentiment_ratio = 0
            results[uid]['first'][sentiment_item] = sentiment_ratio

    return results
Exemplo n.º 8
0
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    tmp_ts = str(datetime2ts(date) - DAY)
    sensitive_string = "sensitive_score_" + tmp_ts
    query_sensitive_body = {
        "query":{
            "match_all":{}
        },
        "size":1,
        "sort":{sensitive_string:{"order":"desc"}}
    }
    try:
        top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
        top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
    except Exception, reason:
        print Exception, reason
        top_sensitive = 400
Exemplo n.º 9
0
def get_user_geo(uid):
    result = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = dict()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(now_date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster2.hget('new_ip_'+str(ts), uid)
        if results:
            ip_dict = json.loads(results)
            for ip in ip_dict:
                ip_count = len(ip_dict[ip].split('&'))
                try:
                    user_ip_result[ip] += ip_count
                except:
                    user_ip_result[ip] = ip_count
    user_geo_dict = ip2geo(user_ip_result)
    user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)

    return user_geo_result
Exemplo n.º 10
0
def get_user_geo(uid):
    result = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = dict()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    ts = datetime2ts(now_date)
    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('ip_'+str(ts), uid)
        if results:
            ip_dict = json.loads(results)
            for ip in ip_dict:
                try:
                    user_ip_result[ip] += ip_dict[ip]
                except:
                    user_ip_result[ip] = ip_dict[ip]
    #print 'user_ip_result:', user_ip_result
    user_geo_dict = ip2geo(user_ip_result)
    user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)

    return user_geo_result
Exemplo n.º 11
0
def search_sentiment_all_portrait(start_date, end_date, time_segment):
    sentiment_ts_count_dict = {}
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)
    search_date_list = []
    domain_list = domain_en2ch_dict.keys()
    for i in range(start_ts, end_ts + DAY, DAY):
        iter_date = ts2datetime(i)
        search_date_list.append(iter_date)
    for sentiment in sentiment_type_list:
        sentiment_ts_count_dict[sentiment] = []
        for date_item in search_date_list:
            ts_count_result_list = []
            for domain in domain_list:
                iter_r_name = r_domain_sentiment_pre + date_item + '_' + sentiment + '_' + domain
                #get ts_count_dict in one day
                ts_count_result = R_DOMAIN_SENTIMENT.hgetall(iter_r_name)
                ts_count_result_list.append(ts_count_result)
            #union all domain to get all portrait
            all_ts_count_result = union_dict(ts_count_result_list)
            #get x and y list by timesegment
            new_ts_count_dict = get_new_ts_count_dict(all_ts_count_result, time_segment, date_item)
            sort_new_ts_count = sorted(new_ts_count_dict.items(), key=lambda x:x[0])
            sentiment_ts_count_dict[sentiment].extend(sort_new_ts_count)
    return sentiment_ts_count_dict
Exemplo n.º 12
0
def get_user_weibo(uid):
    result = []
    #use to test
    datestr = '2013-09-02'
    end_ts = datetime2ts(datestr)
    #real way to get datestr and ts_segment
    '''
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    ts_segment = (int((now_ts - now_date_ts) / 3600)) % 24
    end_ts = now_date_ts + ts_segment * 3600
    '''
    file_list = set(os.listdir(DEFAULT_LEVELDBPATH))
    for i in range(24*7, 0, -1):
        ts = end_ts - i * 3600
        datestr = ts2datetime(ts)
        ts_segment = (int((ts - datetime2ts(datestr)) / 3600)) % 24 + 1
        leveldb_folder = datestr + str(ts_segment)
        
        if leveldb_folder in file_list:
            leveldb_bucket = dynamic_leveldb(leveldb_folder)
            try:
                user_weibo = leveldb_bucket.Get(uid)
                weibo_list = json.loads(user_weibo)
                result.extend(weibo_list)
            except:
                pass
            

    return result
Exemplo n.º 13
0
def show_user_operation_index(admin_user, start_ts, end_ts):
    results = {}
    if admin_user == '' and start_ts == '' and end_ts == '':
        query_body = {
        'query': {
            'match_all':{}
            },
        'size': MAX_VALUE
        }
    elif admin_user and start_ts and end_ts:
        start = datetime2ts(start_ts) - 1
        end = datetime2ts(end_ts) + DAY
        query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term': {'admin_user': admin_user}},
                    {'range':{'timestamp': {'gte': start, 'lt': end}}}
                    ]
                }
            },
         'size': MAX_VALUE
        }
    elif admin_user and (start_ts == '' or end_ts == ''):
        query_body = {
        'query':{
            'term':{'admin_user': admin_user}
            },
        'size': MAX_VALUE
        }
    elif admin_user == '' and start_ts != '' and end_ts != '':
        start = datetime2ts(start_ts) - 1
        end = datetime2ts(end_ts) + DAY
        query_body = {
        'query':{
            'range':{'timestamp':{'gte': start, 'lt': end}}
            },
        'size': MAX_VALUE
        }
    try:
        results = es_operation.search(index=operation_index_name, doc_type=operation_index_type, body=query_body)['hits']['hits']
    except:
        results = []
    return_results = []
    all_operation_dict = {}
    stat_operation_list = ['rank_count', 'compute_count', 'sentiment_count',\
    'recomment_count', 'detect_count', 'analysis_count', 'tag_count', 'network_count', 'sensing_count']
    for item in results:
        source = item['_source']
        return_results.append(source)
        for stat_item in stat_operation_list:
            try:
                all_operation_dict[stat_item] += source[stat_item]
            except:
                all_operation_dict[stat_item] = source[stat_item]
    sort_return_results = sorted(return_results, key=lambda x:x['timestamp'], reverse=False)
    if sort_return_results:
        sort_return_results.append(all_operation_dict)
    return sort_return_results
Exemplo n.º 14
0
def get_influence_content(uid, timestamp_from, timestamp_to):
    weibo_list = []
    #split timestamp range to new_range_dict_list
    from_date_ts = datetime2ts(ts2datetime(timestamp_from))
    to_date_ts = datetime2ts(ts2datetime(timestamp_to))
    new_range_dict_list = []
    if from_date_ts != to_date_ts:
        iter_date_ts = from_date_ts
        while iter_date_ts < to_date_ts:
            iter_next_date_ts = iter_date_ts + DAY
            new_range_dict_list.append({
                'range': {
                    'timestamp': {
                        'gte': iter_date_ts,
                        'lt': iter_next_date_ts
                    }
                }
            })
            iter_date_ts = iter_next_date_ts
        if new_range_dict_list[0]['range']['timestamp']['gte'] < timestamp_from:
            new_range_dict_list[0]['range']['timestamp'][
                'gte'] = timestamp_from
        if new_range_dict_list[-1]['range']['timestamp']['lt'] > timestamp_to:
            new_range_dict_list[-1]['range']['timestamp']['lt'] = timestamp_to
    else:
        new_range_dict_list = [{
            'range': {
                'timestamp': {
                    'gte': timestamp_from,
                    'lt': timestamp_to
                }
            }
        }]
    #iter date to search flow_text
    iter_result = []
    for range_item in new_range_dict_list:
        range_from_ts = range_item['range']['timestamp']['gte']
        range_from_date = ts2datetime(range_from_ts)
        flow_text_index_name = flow_text_index_name_pre + range_from_date
        query = []
        query.append({'term': {'uid': uid}})
        query.append(range_item)
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'bool':{'must': query}}, 'sort':[{'timestamp':'asc'}]})['hits']['hits']
        except:
            flow_text_exist = []
        iter_result.extend(flow_text_exist)
    # get weibo list
    for item in flow_text_exist:
        source = item['_source']
        weibo = {}
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        weibo['geo'] = '\t'.join(source['geo'].split('&'))
        weibo_list.append(weibo)

    return weibo_list
Exemplo n.º 15
0
def adsRec(uid, queryInterval=HOUR * 24):
    '''
    从广告表中读取当前时间点前一段时间queryInterval内的广微博,得到其中的广告部分
    然后根据用户的key_word信息得到推荐的广告。
    :param uid: 用户ID
    :param queryInterval: 查询之前多久的广告
    :return: 广告微博列表,按照相关度(感兴趣程度)排序
    '''

    # 运行状态,
    # 0 ->  当前为2013-9-8 00:00:00
    # 1 ->  当前时间
    now_date = ts2datetime(time.time()) if RUN_TYPE == 1 else ts2datetime(
        datetime2ts(RUN_TEST_TIME) - DAY)

    # 获取用户的偏好
    try:
        print uid
        user_portrait_result = es_user_portrait. \
            get_source(index=portrait_index_name, doc_type=profile_index_type, id=uid)
    except:
        return None

    user_key_words = set(user_portrait_result["keywords_string"].split("&"))

    # 直接从广告表中读取并计算
    ads_weibo_all = es_ads_weibo.search(
        index=ads_weibo_index_name,
        doc_type=ads_weibo_index_type,
        body={
            'query': {
                "filtered": {
                    "filter": {
                        "range": {
                            "timestamp": {
                                "gte": datetime2ts(now_date) - queryInterval
                            }
                        }
                    }
                }
            },
            'size': 2000,
        })['hits']['hits']

    random.shuffle(ads_weibo_all)
    ads_weibo_all = ads_weibo_all[:800]

    # 根据权重得到不同类别上词语的权重TFIDF
    topic_word_weight_dic = construct_topic_word_weight_dic(
        ADS_TOPIC_TFIDF_DIR)

    # 根据用户发微博的keywords得到用户在广告的topic上的分布
    # 因为已有的topic不太适合广告的分类
    user_topic_dic = construct_topic_feature_dic(user_key_words,
                                                 topic_word_weight_dic)

    ads_weibo_prefer = adsPreferred(user_topic_dic, ads_weibo_all,
                                    topic_word_weight_dic, 30)
    return ads_weibo_prefer
Exemplo n.º 16
0
def compare_user_activity(uid_list):
    result = {} # output data: {user:[weibo_status]}, {user:[(date,weibo)]}, ts_list
    timesegment_result = {}
    now_ts = time.time()
    date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1,8):
        ts = ts - DAY
        hash_name = 'activity_' + str(ts)
        r_result = r_cluster.hmget(hash_name, uid_list)
        if r_result:
            count = 0
            for r_item in r_result:
                if r_item:
                    r_item = json.loads(r_item)
                if uid_list[count] not in result:
                    result[uid_list[count]] = {}
                if uid_list[count] not in timesegment_result:
                    timesegment_result[uid_list[count]] = {}
                count += 1
                if r_item:
                    time_result = dict()
                    for segment in r_item:
                        try:
                            result[uid_list[count-1]][int(segment)/16*15*60*16+ts] += r_item[segment]
                        except:
                            result[uid_list[count-1]][int(segment)/16*15*60*16+ts] = r_item[segment]
                        try:
                            timesegment_result[uid_list[count-1]][int(segment)/16*15*60*16] += r_item[segment]
                        except:
                            timesegment_result[uid_list[count-1]][int(segment)/16*15*60*16] = r_item[segment]

    user_list = {}
    user_timesegment_list = {}
    ts_list = []
    for user in result:
        timesegment_dict = timesegment_result[user]
        sort_segment = sorted(timesegment_dict.items(), key=lambda x:x[1], reverse=True)
        segment_top = sort_segment[:3]
        user_timesegment_list[user] = segment_top
        user_dict = result[user]
        for i in range(0, 42):
            timestamp = ts + 15*60*16*i
            if len(ts_list)<42:
                ts_list.append(timestamp)
            try:
                count = user_dict[timestamp]
            except:
                count = 0
            try:
                user_list[user].append(count)
            except:
                user_list[user] = [count]
    return user_list, user_timesegment_list, ts_list
Exemplo n.º 17
0
def compare_user_activity(uid_list):
    result = {} # output data: {user:[weibo_status]}, {user:[(date,weibo)]}, ts_list
    timesegment_result = {}
    now_ts = time.time()
    date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1,8):
        ts = ts - DAY
        hash_name = 'activity_' + str(ts)
        r_result = r_cluster.hmget(hash_name, uid_list)
        if r_result:
            count = 0
            for r_item in r_result:
                if r_item:
                    r_item = json.loads(r_item)
                if uid_list[count] not in result:
                    result[uid_list[count]] = {}
                if uid_list[count] not in timesegment_result:
                    timesegment_result[uid_list[count]] = {}
                count += 1
                if r_item:
                    time_result = dict()
                    for segment in r_item:
                        try:
                            result[uid_list[count-1]][int(segment)/16*15*60*16+ts] += r_item[segment]
                        except:
                            result[uid_list[count-1]][int(segment)/16*15*60*16+ts] = r_item[segment]
                        try:
                            timesegment_result[uid_list[count-1]][int(segment)/16*15*60*16] += r_item[segment]
                        except:
                            timesegment_result[uid_list[count-1]][int(segment)/16*15*60*16] = r_item[segment]

    user_list = {}
    user_timesegment_list = {}
    ts_list = []
    for user in result:
        timesegment_dict = timesegment_result[user]
        sort_segment = sorted(timesegment_dict.items(), key=lambda x:x[1], reverse=True)
        segment_top = sort_segment[:3]
        user_timesegment_list[user] = segment_top
        user_dict = result[user]
        for i in range(0, 42):
            timestamp = ts + 15*60*16*i
            if len(ts_list)<42:
                ts_list.append(timestamp)
            try:
                count = user_dict[timestamp]
            except:
                count = 0
            try:
                user_list[user].append(count)
            except:
                user_list[user] = [count]
    return user_list, user_timesegment_list, ts_list
Exemplo n.º 18
0
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    r_beigin_ts = datetime2ts(R_BEGIN_TIME)
    db_number = ((date_ts - r_beigin_ts) / (DAY * 7)) % 2 + 1
    # run_type
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemplo n.º 19
0
def get_psycho_status(uid_list):
    results = {}
    uid_sentiment_dict = {}
    #time for es_flow_text
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #run_type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0, WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False,  fields=['uid', 'sentiment'])['hits']['hits']
        except:
            flow_text_exist = []
        for flow_text_item in flow_text_exist:
            uid = flow_text_item['fields']['uid'][0]
            sentiment = flow_text_item['fields']['sentiment'][0]
            if uid in uid_sentiment_dict:
                try:
                    uid_sentiment_dict[uid][str(sentiment)] += 1
                except:
                    uid_sentiment_dict[uid][str(sentiment)] = 1
            else:
                uid_sentiment_dict[uid] = {str(sentiment): 1}
    #compute first and second psycho_status
    for uid in uid_list:
        results[uid] = {'first':{}, 'second':{}}
        try:
            user_sentiment_result = uid_sentiment_dict[uid]
        except:
            user_sentiment_result = {}
        all_count = sum(user_sentiment_result.values())
        #compute second level sentiment---negative type sentiment
        second_sentiment_count_list = [user_sentiment_result[item] for item in user_sentiment_result if item in SENTIMENT_SECOND]
        second_sentiment_all_count = sum(second_sentiment_count_list)
        for sentiment_item in SENTIMENT_SECOND:
            try:
                results[uid]['second'][sentiment_item] = float(user_sentiment_result[sentiment_item]) / all_count
            except:
                results[uid]['second'][sentiment_item] = 0
        #compute first level sentiment---middle, postive, negative
        user_sentiment_result['7'] = second_sentiment_all_count
        for sentiment_item in SENTIMENT_FIRST:
            try:
                sentiment_ratio = float(user_sentiment_result[sentiment_item]) / all_count
            except:
                sentiment_ratio = 0
            results[uid]['first'][sentiment_item] = sentiment_ratio

    return results
Exemplo n.º 20
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
Exemplo n.º 21
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
Exemplo n.º 22
0
def new_get_user_location(uid):
    results = {}
    now_date = ts2datetime(time.time())
    now_date_ts = datetime2ts(now_date)
    #run type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME) - DAY
        now_date = ts2datetime(now_date_ts)
    #now ip
    try:
        ip_time_string = r_cluster.hget('new_ip_'+str(now_date_ts), uid)
    except Exception, e:
        raise e
Exemplo n.º 23
0
def new_get_user_location(uid):
    results = {}
    now_date = ts2datetime(time.time())
    now_date_ts = datetime2ts(now_date)
    #run type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME) - DAY
        now_date = ts2datetime(now_date_ts)
    #now ip
    try:
        ip_time_string = r_cluster.hget('new_ip_'+str(now_date_ts), uid)
    except Exception, e:
        raise e
def read_flow_text_sentiment(uid_list):
    """
        读取用户微博(返回结果有微博情绪标签):
        输入数据:uid_list(字符串型列表)
        输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
        word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
        weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
    """
    word_dict = dict()  # 词频字典
    weibo_list = []  # 微博列表
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    now_date_ts = datetime2ts("2013-09-08")
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0, WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        print flow_text_index_name
        try:
            flow_text_exist = es_flow_text.search(
                index=flow_text_index_name,
                doc_type=flow_text_index_type,
                body={"query": {"filtered": {"filter": {"terms": {"uid": uid_list}}}}, "size": MAX_VALUE},
                _source=False,
                fields=["text", "uid", "sentiment", "keywords_dict", "timestamp"],
            )["hits"]["hits"]
        except:
            flow_text_exist = []

        for flow_text_item in flow_text_exist:
            uid = flow_text_item["fields"]["uid"][0].encode("utf-8")
            text = flow_text_item["fields"]["text"][0].encode("utf-8")
            sentiment = int(flow_text_item["fields"]["sentiment"][0])
            ts = flow_text_item["fields"]["timestamp"][0]
            keywords_dict = json.loads(flow_text_item["fields"]["keywords_dict"][0])
            keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
            keywords_dict = eval(keywords_dict)

            if word_dict.has_key(uid):
                item_dict = Counter(word_dict[uid])
                keywords_dict = Counter(keywords_dict)
                item_dict = dict(item_dict + keywords_dict)
                word_dict[uid] = item_dict
            else:
                word_dict[uid] = keywords_dict

            weibo_list.append([uid, text, sentiment, ts])

    return word_dict, weibo_list
Exemplo n.º 25
0
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(
        portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(),
                               key=lambda x: x[1],
                               reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
Exemplo n.º 26
0
def search_mention(now_ts, uid):
    date = ts2datetime(now_ts)
    ts = datetime2ts(date)
    #print 'at date-ts:', ts
    stat_results = dict()
    results = dict()
    for i in range(1,8):
        ts = ts - 24 * 3600
        try:
            result_string = r_cluster.hget('at_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for at_uid in result_dict:
            try:
                stat_results[at_uid] += result_dict[at_uid]
            except:
                stat_results[at_uid] = result_dict[at_uid]
    
    for at_uid in stat_results:
        # search uid
        '''
        uname = search_uid2uname(at_uid)
        if not uname:
        '''    
        uid = ''
        count = stat_results[at_uid]
        results[at_uid] = [uid, count]
    if results:
        sort_results = sorted(results.items(), key=lambda x:x[1][1], reverse=True)
        return [sort_results[:20], len(results)]
    else:
        return [None, 0]
Exemplo n.º 27
0
def ajax_revise_task():
    task_name = request.args.get('task_name', '')  # must
    finish = request.args.get("finish", "10")
    stop_time = request.args.get('stop_time', '')  # timestamp

    now_ts = datetime2ts("2013-09-06")
    #now_ts = time.time()
    if stop_time and stop_time < now_ts:
        return json.dumps([])

    if task_name:
        task_detail = es.get(index=index_manage_sensing_task,
                             doc_type=task_doc_type,
                             id=task_name)['_source']
        if stop_time:
            task_detail['stop_time'] = stop_time
        if int(finish) == 0:
            task_detail['finish'] = finish
            task_detail['processing_status'] = "1"  # 重启时将处理状态改为
        if stop_time or int(finish) == 0:
            es.index(index=index_manage_sensing_task,
                     doc_type=task_doc_type,
                     id=task_name,
                     body=task_detail)
            return json.dumps(['1'])
    return json.dumps([])
Exemplo n.º 28
0
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = ((date_ts - r_beigin_ts) / (DAY * 7)) % 2 + 1
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemplo n.º 29
0
def recommentation_in_auto(seatch_date, submit_user):
    results = []
    #run type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time() - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    recomment_influence_hash_name = 'recomment_' + now_date + '_influence'
    recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive'
    recomment_compute_hash_name = 'compute'
    #step1: get auto
    auto_result = r.hget(recomment_hash_name, 'auto')
    if auto_result:
        auto_user_list = json.loads(auto_result)
    else:
        auto_user_list = []
    #step2: get admin user result
    admin_result = r.hget(recomment_hash_name, submit_user)
    if admin_result:
        admin_user_list = json.loads(admin_result)
    else:
        admin_user_list = []
    #step3: get union user and filter compute/influence/sensitive
    union_user_auto_set = set(auto_user_list) | set(admin_user_list)
    influence_user = set(r.hkeys(recomment_influence_hash_name))
    sensitive_user = set(r.hkeys(recomment_sensitive_hash_name))
    compute_user = set(r.hkeys(recomment_compute_hash_name))
    filter_union_user = union_user_auto_set - (influence_user | sensitive_user | compute_user)
    auto_user_list = list(filter_union_user)
    #step4: get user detail
    results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto')
    return results
Exemplo n.º 30
0
def new_get_activeness_trend(uid, time_segment):
    results = {}
    try:
        activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE,\
                id=uid)['_source']
    except:
        activeness_history = {}
    if activeness_history:
        results = get_evaluate_trend(activeness_history, 'activeness')
    else:
        results = {}
    #deal results for situation---server power off
    new_time_list = []
    new_count_list = []
    new_results = {}
    now_time_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_time_ts))
    for i in range(time_segment, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        try:
            date_count = results[iter_date_ts]
        except:
            date_count = 0
        new_time_list.append(iter_date_ts)
        new_count_list.append(date_count)
    new_results = {'timeline': new_time_list, 'evaluate_index': new_count_list}
    return new_results
Exemplo n.º 31
0
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = ((date_ts - r_beigin_ts) / (DAY * 7)) % 2 + 1
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemplo n.º 32
0
def get_geo_track(uid):
    date_results = [] # {'2013-09-01':[(geo1, count1),(geo2, count2)], '2013-09-02'...}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #test
    now_date = '2013-09-08'
    ts = datetime2ts(now_date)
    city_list = []
    city_set = set()
    for i in range(7, 0, -1):
        timestamp = ts - i*24*3600
        #print 'timestamp:', ts2datetime(timestamp)
        ip_dict = dict()
        results = r_cluster.hget('ip_'+str(timestamp), uid)
        ip_dict = dict()
        date = ts2datetime(timestamp)
        date_key = '-'.join(date.split('-')[1:])
        if results:
            ip_dict = json.loads(results)
            geo_dict = ip_dict2geo(ip_dict)
            city_list.extend(geo_dict.keys())
            sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
            date_results.append([date_key, sort_geo_dict[:2]])
        else:
            date_results.append([date_key, []])

    print 'results:', date_results
    city_set = set(city_list)
    geo_conclusion = get_geo_conclusion(uid, city_set)
    return [date_results, geo_conclusion]
Exemplo n.º 33
0
def search_location(now_ts, uid):
    date = ts2datetime(now_ts)
    #print 'date:', date
    ts = datetime2ts(date)
    #print 'date-ts:', ts
    stat_results = dict()
    results = dict()
    for i in range(1, 8):
        ts = ts - 24 * 3600
        #print 'for-ts:', ts
        result_string = r_cluster.hget('ip_' + str(ts), str(uid))
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for ip in result_dict:
            try:
                stat_results[ip] += result_dict[ip]
            except:
                stat_results[ip] = result_dict[ip]
    for ip in stat_results:
        city = ip2city(ip)
        if city:
            try:
                results[city][ip] = stat_results[ip]
            except:
                results[city] = {ip: stat_results[ip]}
                

    description = active_geo_description(results)
    results['description'] = description
    #print 'location results:', results
    return results
Exemplo n.º 34
0
def new_get_activeness_trend(uid, time_segment):
    results = {}
    try:
        activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE,\
                id=uid)['_source']
    except:
        activeness_history = {}
    if activeness_history:
        results = get_evaluate_trend(activeness_history, 'activeness')
    else:
        results = {}
    #deal results for situation---server power off
    new_time_list = []
    new_count_list = []
    new_results = {}
    now_time_ts = time.time()
    now_date_ts  = datetime2ts(ts2datetime(now_time_ts))
    for i in range(time_segment, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        try:
            date_count = results[iter_date_ts]
        except:
            date_count = 0
        new_time_list.append(iter_date_ts)
        new_count_list.append(date_count)
    new_results = {'timeline': new_time_list, 'evaluate_index': new_count_list}
    return new_results
Exemplo n.º 35
0
def search_sentiment_all_keywords_task(submit_date, keywords_string, submit_user, start_date, end_date, status):
    results = []
    query_list = []
    if submit_date:
        submit_ts_start = datetime2ts(submit_date)
        submit_ts_end = submit_ts_start + DAY
        query_list.append({'range': {'submit_ts': {'gte': submit_ts_start, 'lt':submit_ts_end}}})
    if keywords_string:
        keywords_list = keywords_string.split(',')
        query_list.append({'terms':{'query_keywords': keywords_list}})
    if submit_user:
        query_list.append({'term': {'submit_user': submit_user}})
    if start_date:
        start_s_ts = datetime2ts(start_date)
        if end_date:
            start_e_ts = datetime2ts(end_date)
        else:
            start_e_ts = start_s_ts + DAY * 30
        start_date_nest_body_list = [ts2datetime(ts) for ts in range(start_s_ts, start_e_ts + DAY, DAY)]
        query_list.append({'terms':{'start_date': start_date_nest_body_list}})
    if end_date:
        end_e_ts = datetime2ts(end_date)
        if start_date:
            end_s_ts = datetime2ts(start_date)
        else:
            end_s_ts = end_e_ts - DAY * 30
        end_date_nest_body_list = [ts2datetime(ts) for ts in range(end_s_ts, end_e_ts + DAY, DAY)]
        query_list.append({'terms': {'end_date': end_date_nest_body_list}})
    if status:
        query_list.append({'term': {'status': status}})
    try:
        task_results = es_sentiment_task.search(index=sentiment_keywords_index_name, \
                doc_type=sentiment_keywords_index_type, body={'query':{'bool':{'must':query_list}}})['hits']['hits']
    except:
        task_results = []
    for task_item in task_results:
        task_source = task_item['_source']
        task_id = task_source['task_id']
        start_date = task_source['start_date']
        end_date = task_source['end_date']
        keywords = task_source['query_keywords']
        submit_ts = ts2date(task_source['submit_ts'])
        status = task_source['status']
        segment = task_source['segment']
        results.append([task_id, start_date, end_date, keywords, submit_ts, status, segment])

    return results
Exemplo n.º 36
0
def get_influence_content(uid, timestamp_from, timestamp_to):
    weibo_list = []
    #split timestamp range to new_range_dict_list
    from_date_ts = datetime2ts(ts2datetime(timestamp_from))
    to_date_ts = datetime2ts(ts2datetime(timestamp_to))
    new_range_dict_list = []
    if from_date_ts != to_date_ts:
        iter_date_ts = from_date_ts
        while iter_date_ts < to_date_ts:
            iter_next_date_ts = iter_date_ts + DAY
            new_range_dict_list.append({'range':{'timestamp':{'gte':iter_date_ts, 'lt':iter_next_date_ts}}})
            iter_date_ts = iter_next_date_ts
        if new_range_dict_list[0]['range']['timestamp']['gte'] < timestamp_from:
            new_range_dict_list[0]['range']['timestamp']['gte'] = timestamp_from
        if new_range_dict_list[-1]['range']['timestamp']['lt'] > timestamp_to:
            new_range_dict_list[-1]['range']['timestamp']['lt'] = timestamp_to
    else:
        new_range_dict_list = [{'range':{'timestamp':{'gte':timestamp_from, 'lt':timestamp_to}}}]
    #iter date to search flow_text
    iter_result = []
    for range_item in new_range_dict_list:
        range_from_ts = range_item['range']['timestamp']['gte']
        range_from_date = ts2datetime(range_from_ts)
        flow_text_index_name = flow_text_index_name_pre + range_from_date
        query = []
        query.append({'term':{'uid':uid}})
        query.append(range_item)
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'bool':{'must': query}}, 'sort':[{'timestamp':'asc'}]})['hits']['hits']
        except:
            flow_text_exist = []
        iter_result.extend(flow_text_exist)
    # get weibo list
    for item in flow_text_exist:
        source = item['_source']
        weibo = {}
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        if source['geo']:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        else:
            weibo['geo'] = ''
        weibo_list.append(weibo)
        
    return weibo_list
Exemplo n.º 37
0
def search_sentiment_all_keywords_task(submit_date, keywords_string, submit_user, start_date, end_date, status):
    results = []
    query_list = []
    if submit_date:
        submit_ts_start = datetime2ts(submit_date)
        submit_ts_end = submit_ts_start + DAY
        query_list.append({'range': {'submit_ts': {'gte': submit_ts_start, 'lt':submit_ts_end}}})
    if keywords_string:
        keywords_list = keywords_string.split(',')
        query_list.append({'terms':{'query_keywords': keywords_list}})
    if submit_user:
        query_list.append({'term': {'submit_user': submit_user}})
    if start_date:
        start_s_ts = datetime2ts(start_date)
        if end_date:
            start_e_ts = datetime2ts(end_date)
        else:
            start_e_ts = start_s_ts + DAY * 30
        start_date_nest_body_list = [ts2datetime(ts) for ts in range(start_s_ts, start_e_ts + DAY, DAY)]
        query_list.append({'terms':{'start_date': start_date_nest_body_list}})
    if end_date:
        end_e_ts = datetime2ts(end_date)
        if start_date:
            end_s_ts = datetime2ts(start_date)
        else:
            end_s_ts = end_e_ts - DAY * 30
        end_date_nest_body_list = [ts2datetime(ts) for ts in range(end_s_ts, end_e_ts + DAY, DAY)]
        query_list.append({'terms': {'end_date': end_date_mest_body_list}})
    if status:
        query_list.append({'term': {'status': status}})
    try:
        task_results = es_sentiment_task.search(index=sentiment_keywords_index_name, \
                doc_type=sentiment_keywords_index_type, body={'query':{'bool':{'must':query_list}}})['hits']['hits']
    except:
        task_results = []
    for task_item in task_results:
        task_source = task_item['_source']
        task_id = task_source['task_id']
        start_date = task_source['start_date']
        end_date = task_source['end_date']
        keywords = task_source['query_keywords']
        submit_ts = ts2date(task_source['submit_ts'])
        status = task_source['status']
        segment = task_source['segment']
        results.append([task_id, start_date, end_date, keywords, submit_ts, status, segment])

    return results
Exemplo n.º 38
0
def get_user_geo(uid, dropped_geos=u"中国&美国"):
    """
    :param uid: 用户的id
    :param dropped_geos: &分割的地点,因为geo中都包含中国
    :return: geo 位置的set
    """
    dropped_geos = set(dropped_geos.split("&"))
    # 获取用户的偏好
    try:
        user_portrait_result = es_user_portrait. \
            get_source(index=portrait_index_name, doc_type=profile_index_type, id=uid)
    except NotFoundError:
        user_portrait_result = None

    # portrait表中存在geo信息
    if user_portrait_result and len(user_portrait_result["activity_geo"]) > 0:
        geos = user_portrait_result["activity_geo"] - dropped_geos

    # 不存在geo信息,获取之前发去的微博提取
    else:
        flow_text_index_list = []
        now_timestamp = datetime2ts(ts2datetime(time.time()))
        if RUN_TYPE == 0:
            now_timestamp = datetime2ts(RUN_TEST_TIME)
        for i in range(7, 0, -1):
            iter_date = ts2datetime(now_timestamp - DAY * i)
            flow_text_index_list.append(flow_text_index_name_pre + iter_date)

        weibo_all = es_flow_text.search(index=flow_text_index_list,
                                        doc_type=flow_text_index_type,
                                        body={
                                            'query': {
                                                'filtered': {
                                                    'filter': {
                                                        'term': {
                                                            'uid': uid
                                                        }
                                                    }
                                                }
                                            },
                                            'size': 2000,
                                        })['hits']['hits']
        geos = set()
        for temp in weibo_all:
            geos |= set(temp["_source"]["geo"].split("&"))

    return geos
Exemplo n.º 39
0
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = 2 - (((date_ts - begin_ts) / (DAY * 7))) % 2
    #run_type
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemplo n.º 40
0
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = 2 - (((date_ts - begin_ts) / (DAY * 7))) % 2
    #run_type
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemplo n.º 41
0
def new_get_user_weibo(uid, sort_type):
    results = []
    weibo_list = []
    now_date = ts2datetime(time.time())
    #run_type
    if RUN_TYPE == 0:
        now_date = RUN_TEST_TIME
        sort_type = 'timestamp'
    #step1:get user name
    try:
        user_profile_result = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type,\
                id=uid, _source=False, fields=['nick_name'])
    except:
        user_profile_result = {}
    if user_profile_result:
        uname = user_profile_result['fields']['nick_name'][0]
    else:
        uname = ''
    #step2:get user weibo
    for i in range(7, 0, -1):
        iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        index_name = flow_text_index_name_pre + iter_date
        try:
            weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'term': {'uid': uid}}}}, 'sort':sort_type, 'size':100})['hits']['hits']
        except:
            weibo_result = []
        if weibo_result:
            weibo_list.extend(weibo_result)
    print 'weibo_list:', weibo_list[0]
    sort_weibo_list = sorted(weibo_list,
                             key=lambda x: x['_source'][sort_type],
                             reverse=True)[:100]
    for weibo_item in sort_weibo_list:
        source = weibo_item['_source']
        mid = source['mid']
        uid = source['uid']
        text = source['text']
        ip = source['geo']
        timestamp = source['timestamp']
        date = ts2date(timestamp)
        sentiment = source['sentiment']
        #run_type
        if RUN_TYPE == 1:
            retweet_count = source['retweet_count']
            comment_count = source['comment_count']
            sensitive_score = source['sensitive']
        else:
            retweet_count = 0
            comment_count = 0
            sensitive_score = 0
        city = ip2city(ip)
        results.append([
            mid, uid, text, ip, city, timestamp, date, retweet_count,
            comment_count, sensitive_score
        ])

    return results
Exemplo n.º 42
0
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i*DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs']
            user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait])

    return result    
Exemplo n.º 43
0
def read_flow_text_sentiment(uid_list):
    '''
        读取用户微博(返回结果有微博情绪标签):
        输入数据:uid_list(字符串型列表)
        输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
        word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
        weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
    '''
    word_dict = dict()#词频字典
    weibo_list = []#微博列表
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    now_date_ts = datetime2ts('2013-09-08')
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0,WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        print flow_text_index_name
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False,  fields=['text','uid','sentiment','keywords_dict','timestamp'])['hits']['hits']
        except:
            flow_text_exist = []

        for flow_text_item in flow_text_exist:
            uid = flow_text_item['fields']['uid'][0].encode('utf-8')
            text = flow_text_item['fields']['text'][0].encode('utf-8')
            sentiment = int(flow_text_item['fields']['sentiment'][0])
            ts = flow_text_item['fields']['timestamp'][0]
            keywords_dict = json.loads(flow_text_item['fields']['keywords_dict'][0])
            keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
            keywords_dict = eval(keywords_dict)

            if word_dict.has_key(uid):
                item_dict = Counter(word_dict[uid])
                keywords_dict = Counter(keywords_dict)
                item_dict = dict(item_dict + keywords_dict)
                word_dict[uid] = item_dict
            else:
                word_dict[uid] = keywords_dict

            weibo_list.append([uid,text,sentiment,ts])
            
    return  word_dict,weibo_list
Exemplo n.º 44
0
def read_flow_text_sentiment(uid_list):
    '''
        读取用户微博(返回结果有微博情绪标签):
        输入数据:uid_list(字符串型列表)
        输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
        word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
        weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
    '''
    word_dict = dict()#词频字典
    weibo_list = []#微博列表
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    now_date_ts = datetime2ts('2013-09-08')
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0,WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        print flow_text_index_name
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False,  fields=['text','uid','sentiment','keywords_dict','timestamp'])['hits']['hits']
        except:
            flow_text_exist = []

        for flow_text_item in flow_text_exist:
            uid = flow_text_item['fields']['uid'][0].encode('utf-8')
            text = flow_text_item['fields']['text'][0].encode('utf-8')
            sentiment = int(flow_text_item['fields']['sentiment'][0])
            ts = flow_text_item['fields']['timestamp'][0]
            keywords_dict = json.loads(flow_text_item['fields']['keywords_dict'][0])
            keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
            keywords_dict = eval(keywords_dict)

            if word_dict.has_key(uid):
                item_dict = Counter(word_dict[uid])
                keywords_dict = Counter(keywords_dict)
                item_dict = dict(item_dict + keywords_dict)
                word_dict[uid] = item_dict
            else:
                word_dict[uid] = keywords_dict

            weibo_list.append([uid,text,sentiment,ts])
            
    return  word_dict,weibo_list
Exemplo n.º 45
0
def cctv_video_rec(uid, k=10):
    flow_text_index_list = []
    now_timestamp = datetime2ts(ts2datetime(time.time()))
    if RUN_TYPE == 0:
        now_timestamp = datetime2ts(RUN_TEST_TIME)
    for i in range(7, 0, -1):
        iter_date = ts2datetime(now_timestamp - DAY * i)
        flow_text_index_list.append(flow_text_index_name_pre + iter_date)

    weibo_all = es_flow_text.search(index=flow_text_index_list,
                                    doc_type=flow_text_index_type,
                                    body={
                                        'query': {
                                            'filtered': {
                                                'filter': {
                                                    'term': {
                                                        'uid': uid
                                                    }
                                                }
                                            }
                                        },
                                        'size': 100,
                                    })['hits']['hits']
    user_words = set()
    for weibo in weibo_all:
        weibo_text = weibo["_source"]["ip"]
        user_words |= set(jieba.cut(weibo_text))

    rio_dict = load_topic_video_dict(RIO_VIDEO_INFO_FILE)
    tiger_videos = load_videos(TIGER_VIDEO_INFO_FILE)

    ret_dict = dict()
    ret_dict["tiger"] = random.sample(tiger_videos, k)

    user_pref_topic = set(rio_dict.keys()) & user_words
    # 若找不到,随机分配topic
    if len(user_pref_topic) == 0:
        user_pref_topic = set(random.sample(rio_dict.keys(), k))
    ret_dict["rio"] = list()
    for topic in user_pref_topic:
        ret_dict["rio"].extend(rio_dict[topic])
        if len(ret_dict["rio"]) >= k:
            ret_dict["rio"] = ret_dict["rio"][:k]
            break
    return ret_dict
def get_text_index(date):
    now_ts = datetime2ts(date)
    index_list = []
    for i in range(7):
        ts = now_ts - i*DAY
        tmp_index = pre_text_index + ts2datetime(ts)
        index_list.append(tmp_index)

    return index_list
Exemplo n.º 47
0
def search_weibo(root_uid,uid,mtype):
    query_body = {
        #'query':{
            'filter':{
                'bool':{
                    'must':[{'term':{'uid':uid}},
                            {'term':{'message_type':mtype}}],
                    'should':[{'term':{'root_uid':root_uid}},
                              {'term':{'directed_uid':root_uid}}],
                }
            }
        #}
    }
    index_list = []
    for i in range(7, 0, -1):
        if RUN_TYPE == 1:
            iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        else:
            iter_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - i * DAY) 
        index_list.append(flow_text_index_name_pre + iter_date)
    results = es_flow_text.search(index=index_list,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
    weibo = {}
    f_result = []

    if len(results) > 0:
        for result in results:
            #print type(result),result
            weibo['last_text'] = [result['_source']['text'],result['_source']['text'],result['_source']['timestamp']]
            mid = result['_source']['root_mid']
            # print mid
            len_pre = len(flow_text_index_name_pre)
            index = result['_index'][len_pre:]
            root_index = []
            for j in range(0,7):   #一周的,一个月的话就0,30
                iter_date = ts2datetime(datetime2ts(index) - j * DAY) 
                root_index.append(flow_text_index_name_pre + iter_date)
            results0 = es_flow_text.search(index=root_index,doc_type=flow_text_index_type,body={'query':{'term':{'mid':mid}}})['hits']['hits']
            if len(results0)>0:
                for result0 in results0:
                    weibo['ori_text'] = [result0['_source']['text'],result0['_source']['timestamp']]
                    f_result.append(weibo)
                    weibo={}
    return f_result
Exemplo n.º 48
0
def ajax_show_in_history():
    results = {}
    date = request.args.get('date', '')
    input_ts = datetime2ts(date)
    now_ts = time.time()
    now_ts = test_time
    if now_ts - 24*3600*7 > input_ts:
        return None
    else:
        results = show_in_history(date)
    return json.dumps(results)
Exemplo n.º 49
0
def get_user_hashtag(uid):
    user_hashtag_result = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    ts = datetime2ts(now_date)
    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('hashtag_'+str(ts), uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                try:
                    user_hashtag_result[hashtag] += hashtag_dict[hashtag]
                except:
                    user_hashtag_result[hashtag] = hashtag_dict[hashtag]
    sort_hashtag_dict = sorted(user_hashtag_result.items(), key=lambda x:x[1], reverse=True)

    return sort_hashtag_dict
Exemplo n.º 50
0
def get_user_trend(uid):
    activity_result = dict()
    now_ts = time.time()
    date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    timestamp = ts
    results = dict()
    for i in range(1, 8):
        ts = timestamp - 24 * 3600 * i
        try:
            result_string = r_cluster.hget('activity_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for time_segment in result_dict:
            try:
                results[int(time_segment) / 16 * 15 * 60 * 16 +
                        ts] += result_dict[time_segment]
            except:
                results[int(time_segment) / 16 * 15 * 60 * 16 +
                        ts] = result_dict[time_segment]

    trend_list = []
    for i in range(1, 8):
        ts = timestamp - i * 24 * 3600
        for j in range(0, 6):
            time_seg = ts + j * 15 * 60 * 16
            if time_seg in results:
                trend_list.append((time_seg, results[time_seg]))
            else:
                trend_list.append((time_seg, 0))
    sort_trend_list = sorted(trend_list, key=lambda x: x[0], reverse=True)
    x_axis = [item[0] for item in sort_trend_list]
    y_axis = [item[1] for item in sort_trend_list]
    return [x_axis, y_axis]
Exemplo n.º 51
0
def search_sentiment_all(start_date, end_date, time_segment):
    results = {}
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)
    search_date_list = []
    for i in range(start_ts, end_ts + DAY, DAY):
        iter_date = ts2datetime(i)
        search_date_list.append(iter_date)
    sentiment_ts_count_dict = {}
    for sentiment in sentiment_type_list:
        sentiment_ts_count_dict[sentiment] = []
        for date_item in search_date_list:
            iter_r_name = date_item + '_' + sentiment + '_all'
            #get ts_count_dict in one day
            ts_count_result = R_SENTIMENT_ALL.hgetall(iter_r_name)
            #get x and y list by timesegment
            new_ts_count_dict = get_new_ts_count_dict(ts_count_result, time_segment, date_item)
            sort_new_ts_count = sorted(new_ts_count_dict.items(), key=lambda x:x[0])
            sentiment_ts_count_dict[sentiment].extend(sort_new_ts_count)

    return sentiment_ts_count_dict
Exemplo n.º 52
0
def search_detect_task(task_name, submit_date, state, process, detect_type, submit_user):
    results = []
    query = [{'match':{'task_type': 'detect'}}]
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard':{'task_name': '*'+item+'*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_from = submit_date_ts
        submit_date_to = submit_date_ts + DAY
        query.append({'range':{'submit_date':{'gte':submit_date_from, 'lt':submit_date_to}}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*'+item+'*'}})
            condition_num += 1
    if process:
        query.append({'range':{'detect_process':{'from': int(process), 'to': MAX_PROCESS}}})
        condition_num += 1
    if detect_type:
        
        detect_type_list = detect_type.split(',')
        nest_body_list = []
        for type_item in detect_type_list:
            nest_body_list.append({'wildcard':{'detect_type': '*'+type_item+'*'}})
        query.append({'bool':{'should': nest_body_list}})
        
        condition_num += 1
    if submit_user:
        query.append({'term':{'submit_user': submit_user}})
        condition_num += 1
    try:
        search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                    body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_result = []
    #get group information table
    for group_item in search_result:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']

        results.append([task_name, submit_user, submit_date, detect_type, state, process])
        
    return results
Exemplo n.º 53
0
def search_mention(uid):
    now_date_ts = datetime2ts(ts2datetime(time.time()))
    #run type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    day_result_dict_list = []
    for i in range(7,0, -1):
        iter_ts = now_date_ts - i * DAY
        try:
            result_string = r_cluster.hget('at_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        day_result_dict = json.loads(results_string)
        day_result_dict_list.append(day_result_dict)
    if day_result_dict_list:
        week_result_dict = union_dict(day_result_dict_list)
    else:
        week_result_dict = {}
    return week_result_dict 
Exemplo n.º 54
0
def search_sentiment_all(start_date, end_date, time_segment):
    results = {}
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)
    search_date_list = []
    for i in range(start_ts, end_ts + DAY, DAY):
        iter_date = ts2datetime(i)
        search_date_list.append(iter_date)
    sentiment_ts_count_dict = {}
    for sentiment in sentiment_type_list:
        sentiment_ts_count_dict[sentiment] = []
        for date_item in search_date_list:
            iter_r_name = date_item + '_' + sentiment + '_all'
            #get ts_count_dict in one day
            ts_count_result = R_SENTIMENT_ALL.hgetall(iter_r_name)
            #get x and y list by timesegment
            new_ts_count_dict = get_new_ts_count_dict(ts_count_result, time_segment, date_item)
            sort_new_ts_count = sorted(new_ts_count_dict.items(), key=lambda x:x[0])
            sentiment_ts_count_dict[sentiment].extend(sort_new_ts_count)

    return sentiment_ts_count_dict
Exemplo n.º 55
0
def new_get_user_weibo(uid, sort_type):
    results = []
    weibo_list = []
    now_date = ts2datetime(time.time())
    #run_type
    if RUN_TYPE == 0:
        now_date = RUN_TEST_TIME
        sort_type = 'timestamp'
    #step1:get user name
    try:
        user_profile_result = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type,\
                id=uid, _source=False, fields=['nick_name'])
    except:
        user_profile_result = {}
    if user_profile_result:
        uname = user_profile_result['fields']['nick_name'][0]
    else:
        uname = ''
    #step2:get user weibo
    for i in range(7, 0, -1):
        iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        index_name = flow_text_index_name_pre + iter_date
        try:
            weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'term': {'uid': uid}}}}, 'sort':sort_type, 'size':100})['hits']['hits']
        except:
            weibo_result = []
        if weibo_result:
            weibo_list.extend(weibo_result)
    print 'weibo_list:', weibo_list[0]
    sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
    for weibo_item in sort_weibo_list:
        source = weibo_item['_source']
        mid = source['mid']
        uid = source['uid']
        text = source['text']
        ip = source['geo']
        timestamp = source['timestamp']
        date = ts2date(timestamp)
        sentiment = source['sentiment']
        #run_type
        if RUN_TYPE == 1:
            retweet_count = source['retweet_count']
            comment_count = source['comment_count']
            sensitive_score = source['sensitive']
        else:
            retweet_count = 0
            comment_count = 0
            sensitive_score = 0
        city = ip2city(ip)
        results.append([mid, uid, text, ip, city,timestamp, date, retweet_count, comment_count, sensitive_score])

    return results
Exemplo n.º 56
0
def search_mention(uid):
    now_date_ts = datetime2ts(ts2datetime(time.time()))
    #run type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    day_result_dict_list = []
    for i in range(7,0, -1):
        iter_ts = now_date_ts - i * DAY
        try:
            result_string = r_cluster.hget('at_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        day_result_dict = json.loads(results_string)
        day_result_dict_list.append(day_result_dict)
    if day_result_dict_list:
        week_result_dict = union_dict(day_result_dict_list)
    else:
        week_result_dict = {}
    return week_result_dict 
Exemplo n.º 57
0
def search_detect_task(task_name, submit_date, state, process, detect_type, submit_user):
    results = []
    query = [{'match':{'task_type': 'detect'}}]
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard':{'task_name': '*'+item+'*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_from = submit_date_ts
        submit_date_to = submit_date_ts + DAY
        query.append({'range':{'submit_date':{'gte':submit_date_from, 'lt':submit_date_to}}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*'+item+'*'}})
            condition_num += 1
    if process:
        query.append({'range':{'detect_process':{'from': int(process), 'to': MAX_PROCESS}}})
        condition_num += 1
    if detect_type:
        
        detect_type_list = detect_type.split(',')
        nest_body_list = []
        for type_item in detect_type_list:
            nest_body_list.append({'wildcard':{'detect_type': '*'+type_item+'*'}})
        query.append({'bool':{'should': nest_body_list}})
        
        condition_num += 1
    if submit_user:
        query.append({'wildcard':{'submit_user': '******'+submit_user+'*'}})
        condition_num += 1
    try:
        search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                    body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_result = []
    #get group information table
    for group_item in search_result:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']

        results.append([task_name, submit_user, submit_date, detect_type, state, process])
        
    return results
Exemplo n.º 58
0
def get_user_hashtag(uid):
    user_hashtag_result = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(now_date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('hashtag_'+str(ts), uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                try:
                    user_hashtag_result[hashtag] += hashtag_dict[hashtag]
                except:
                    user_hashtag_result[hashtag] = hashtag_dict[hashtag]
    sort_hashtag_dict = sorted(user_hashtag_result.items(), key=lambda x:x[1], reverse=True)

    return sort_hashtag_dict
Exemplo n.º 59
0
def get_user_trend(uid):
    activity_result = dict()
    now_ts = time.time()
    date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    timestamp = ts
    results = dict()
    for i in range(1, 8):
        ts = timestamp - 24*3600*i
        try:
            result_string = r_cluster.hget('activity_'+str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for time_segment in result_dict:
            try:
                results[int(time_segment)/16*15*60*16+ts] += result_dict[time_segment]
            except:
                results[int(time_segment)/16*15*60*16+ts] = result_dict[time_segment]
    
    trend_list = []
    for i in range(1, 8):
        ts = timestamp - i*24*3600
        for j in range(0, 6):
            time_seg = ts + j*15*60*16
            if time_seg in results:
                trend_list.append((time_seg, results[time_seg]))
            else:
                trend_list.append((time_seg, 0))
    sort_trend_list = sorted(trend_list, key=lambda x:x[0], reverse=True)
    x_axis = [item[0] for item in sort_trend_list]
    y_axis = [item[1] for item in sort_trend_list]
    return [x_axis, y_axis]
Exemplo n.º 60
0
def get_user_hashtag(uid):
    user_hashtag_result = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(now_date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('hashtag_'+str(ts), uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                try:
                    user_hashtag_result[hashtag] += hashtag_dict[hashtag]
                except:
                    user_hashtag_result[hashtag] = hashtag_dict[hashtag]
    sort_hashtag_dict = sorted(user_hashtag_result.items(), key=lambda x:x[1], reverse=True)

    return sort_hashtag_dict