Пример #1
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait',
                             doc_type='user',
                             id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci',
                                   id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get(
            's_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get(
            's_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(
            influence_results.get('s_comment_weibo_number', 0))
        results[
            'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get(
                's_retweeted_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get(
                's_origin_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_comment_total_number'] = influence_results.get(
                's_origin_weibo_comment_total_number', 0)
        results[
            'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get(
                's_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get(
        'origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get(
        'retweeted_weibo_number',
        0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(
        item.get('comment_weibo_number', 0)) + int(
            results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get(
        'origin_weibo_retweeted_total_number',
        0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get(
        'origin_weibo_comment_total_number',
        0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get(
        'retweeted_weibo_retweeted_total_number',
        0) + results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get(
        'retweeted_weibo_comment_total_number',
        0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait",
                                  doc_type='user',
                                  id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo = portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(),
                                          key=lambda x: x[0],
                                          reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(),
                                    key=lambda x: x[1],
                                    reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(
                portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(
                            hashtag_dict_detail.items(),
                            key=lambda x: x[1],
                            reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(
                hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(),
                                     key=lambda x: x[0],
                                     reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
Пример #2
0
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result['_source']
    #return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        #return_results.update(sensitive_attribute(uid))
        return_results['user_type'] = 1
        return_results['sensitive'] = 1
    else:
        return_results['user_type'] = 0
        return_results['sensitive'] = 0

    if results['photo_url'] == 0:
        results['photo_url'] = 'unknown'
    if results['location'] == 0:
        results['location'] = 'unknown'
    return_results['photo_url'] = results['photo_url']
    return_results['uid'] = results['uid']
    return_results['uname'] = results['uname']
    if return_results['uname'] == 0:
        return_results['uname'] = 'unknown'
    return_results['location'] = results['location']
    return_results['fansnum'] = results['fansnum']
    return_results['friendsnum'] = results['friendsnum']
    return_results['gender'] = results['gender']
    return_results['psycho_status'] = json.loads(results['psycho_status'])

    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        return_results['keywords'] = sort_word_list
    else:
        return_results['keywords'] = []

    return_results['retweet'] = search_retweet(uid, 0)
    return_results['follow'] = search_follower(uid, 0)
    return_results['at'] = search_mention(uid, 0)

    if results['ip'] and results['geo_activity']:
        ip_dict = json.loads(results['ip'])
        geo_dict = json.loads(results['geo_activity'])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results['geo_description'] = geo_description
    else:
        return_results['geo_description'] = ''

    geo_top = []
    temp_geo = {}

    if results['geo_activity']:
        geo_dict = json.loads(results['geo_activity'])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
            for i in range(7):
                ts = ts + 24 * 3600
                date = ts2datetime(ts).replace('-', '')
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        geo_list = geo_dict.values()
        for k, v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(),
                               key=lambda x: x[1],
                               reverse=True)
        return_results['top_activity_geo'] = sort_geo_dict
        return_results['activity_geo_distribute'] = geo_top
    else:
        return_results['top_activity_geo'] = []
        return_results['activity_geo_distribute'] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results['hashtag'] = hashtag_dict
    '''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    '''

    # topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
        return_results['topic'] = sort_topic_dict[:5]
    else:
        return_results['topic'] = []

    # domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        return_results['domain'] = domain_list
    else:
        return_results['domain'] = []
    '''
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    '''

    # on_line pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(),
                                          key=lambda x: x[1],
                                          reverse=True)
        return_results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        return_results['online_pattern'] = []
    '''
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    '''

    # self_state
    try:
        profile_result = es_user_profile.get(index='weibo_user',
                                             doc_type='user',
                                             id=uid)
        self_state = profile_result['_source'].get('description', '')
        return_results['description'] = self_state
    except:
        return_results['description'] = ''
    if results['importance']:
        query_body = {
            'query': {
                'range': {
                    'importance': {
                        'from': results['importance'],
                        'to': 100000
                    }
                }
            }
        }
        importance_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            return_results['importance_rank'] = importance_rank['count']
        else:
            return_results['importance_rank'] = 0
    else:
        return_results['importance_rank'] = 0
    return_results['importance'] = results['importance']

    if results['activeness']:
        query_body = {
            'query': {
                'range': {
                    'activeness': {
                        'from': results['activeness'],
                        'to': 10000
                    }
                }
            }
        }
        activeness_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        print activeness_rank
        if activeness_rank['_shards']['successful'] != 0:
            return_results['activeness_rank'] = activeness_rank['count']
        else:
            return_results['activeness_rank'] = 0
    else:
        return_results['activeness_rank'] = 0
    return_results['activeness'] = results['activeness']

    if results['influence']:
        query_body = {
            'query': {
                'range': {
                    'influence': {
                        'from': results['influence'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['influence_rank'] = influence_rank['count']
        else:
            return_results['influence_rank'] = 0
    else:
        return_results['influence_rank'] = 0
    return_results['influence'] = results['influence']

    if results['sensitive']:
        query_body = {
            'query': {
                'range': {
                    'sensitive': {
                        'from': results['sensitive'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['sensitive_rank'] = influence_rank['count']
        else:
            return_results['sensitive_rank'] = 0
    else:
        return_results['sensitive_rank'] = 0
    return_results['sensitive'] = results['sensitive']

    query_body = {'query': {"match_all": {}}}
    all_count = es.count(index='sensitive_user_portrait',
                         doc_type='user',
                         body=query_body)
    if all_count['_shards']['successful'] != 0:
        return_results['all_count'] = all_count['count']
    else:
        print 'es_sensitive_user_portrait error'
        return_results['all_count'] = 0

    # link
    link_ratio = results['link']
    return_results['link'] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results['time_description'] = active_time_description(weibo_trend)
    return_results['time_trend'] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        date = ts2datetime(ts + i * 24 * 3600).replace('-', '')
        detail = [0] * 10
        try:
            item = es.get(index=date, doc_type='bci', id=uid)['_source']
            '''
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            '''
            if 1:
                detail[0] = item.get('origin_weibo_number', 0)
                detail[1] = item.get('retweeted_weibo_number', 0)
                detail[2] = item.get(
                    'origin_weibo_retweeted_total_number', 0) + item.get(
                        'retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get(
                    'origin_weibo_comment_total_number', 0) + item.get(
                        'retweeted_weibo_comment_total_number', 0)
                retweeted_id = item.get('origin_weibo_top_retweeted_id', '0')
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=retweeted_id)['_source']['text']
                    except:
                        detail[5] = ''
                else:
                    detail[5] = ''
                detail[6] = item.get('origin_weibo_retweeted_top_number', 0)
                detail[7] = item.get('origin_weibo_top_comment_id', '0')
                if detail[7]:
                    try:
                        detail[8] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=detail[7])['_source']['text']
                    except:
                        detail[8] = ''
                else:
                    detail[8] = ''
                detail[9] = item.get('origin_weibo_comment_top_number', 0)
                attention_number = detail[2] + detail[3]
                attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1
            influence_value.append([date, item['user_index']])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results['influence_trend'] = influence_value
    return_results['common_influence_detail'] = influence_detail
    return_results['attention_degree'] = attention_value

    return return_results
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time()-24*3600).replace('-', '')
    date = '20130907' # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci', id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get('s_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get('s_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(influence_results.get('s_comment_weibo_number', 0))
        results['sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get('s_retweeted_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_retweeted_total_number'] = influence_results.get('s_origin_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_comment_total_number'] = influence_results.get('s_origin_weibo_comment_total_number', 0) 
        results['sensitive_retweeted_weibo_comment_total_number'] = influence_results.get('s_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get('origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get('retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(item.get('comment_weibo_number', 0)) + int(results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get('origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get('origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get('retweeted_weibo_retweeted_total_number', 0)+ results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get('retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8*24*3600
    for i in range(1,8):
        ts = ts + 24*3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo =  portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x:x[0], reverse=False)
            sensitive_geo_list = []
            for k,v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(), key=lambda x:x[1], reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x:x[1], reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time()-24*3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date,{})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key = lambda x:x[0], reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(), key = lambda x:x[0], reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result['_source']
    #return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        #return_results.update(sensitive_attribute(uid))
        return_results['user_type'] = 1
        return_results['sensitive'] = 1
    else:
        return_results['user_type'] = 0
        return_results['sensitive'] = 0

    if results['photo_url'] == 0:
        results['photo_url'] = 'unknown'
    if results['location'] == 0:
        results['location'] = 'unknown'
    return_results['photo_url'] = results['photo_url']
    return_results['uid'] = results['uid']
    return_results['uname'] = results['uname']
    if return_results['uname'] == 0:
        return_results['uname'] = 'unknown'
    return_results['location'] = results['location']
    return_results['fansnum'] = results['fansnum']
    return_results['friendsnum'] = results['friendsnum']
    return_results['gender'] = results['gender']
    return_results['psycho_status'] = json.loads(results['psycho_status'])

    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['keywords'] = sort_word_list
    else:
        return_results['keywords'] = []


    return_results['retweet'] = search_retweet(uid, 0)
    return_results['follow'] = search_follower(uid, 0)
    return_results['at'] = search_mention(uid, 0)

    if results['ip'] and results['geo_activity']:
        ip_dict = json.loads(results['ip'])
        geo_dict = json.loads(results['geo_activity'])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results['geo_description'] = geo_description
    else:
        return_results['geo_description'] = ''

    geo_top = []
    temp_geo = {}

    if results['geo_activity']:
        geo_dict = json.loads(results['geo_activity'])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts('2013-09-08') - 8*24*3600
            for i in range(7):
                ts = ts + 24*3600
                date = ts2datetime(ts).replace('-', '')
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(), key=lambda x:x[0], reverse=False)
        geo_list = geo_dict.values()
        for k,v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x:x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(), key=lambda x:x[1], reverse=True)
        return_results['top_activity_geo'] = sort_geo_dict
        return_results['activity_geo_distribute'] = geo_top
    else:
        return_results['top_activity_geo'] = []
        return_results['activity_geo_distribute'] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results['hashtag'] = hashtag_dict

    '''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    '''

    # topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['topic'] = sort_topic_dict[:5]
    else:
        return_results['topic'] = []

    # domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        return_results['domain'] = domain_list
    else:
        return_results['domain'] = []
    '''
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    '''

    # on_line pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        return_results['online_pattern'] = []



    '''
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    '''

    # self_state
    try:
        profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)
        self_state = profile_result['_source'].get('description', '')
        return_results['description'] = self_state
    except:
        return_results['description'] = ''
    if results['importance']:
        query_body = {
            'query':{
                'range':{
                    'importance':{
                        'from':results['importance'],
                        'to': 100000
                    }
                }
            }
        }
        importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            return_results['importance_rank'] = importance_rank['count']
        else:
            return_results['importance_rank'] = 0
    else:
        return_results['importance_rank'] = 0
    return_results['importance'] = results['importance']

    if results['activeness']:
        query_body = {
            'query':{
                'range':{
                    'activeness':{
                        'from':results['activeness'],
                        'to': 10000
                    }
                }
            }
        }
        activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if activeness_rank['_shards']['successful'] != 0:
            return_results['activeness_rank'] = activeness_rank['count']
        else:
            return_results['activeness_rank'] = 0
    else:
        return_results['activeness_rank'] = 0
    return_results['activeness'] = results['activeness']

    if results['influence']:
        query_body = {
            'query':{
                'range':{
                    'influence':{
                        'from':results['influence'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['influence_rank'] = influence_rank['count']
        else:
            return_results['influence_rank'] = 0
    else:
        return_results['influence_rank'] = 0
    return_results['influence'] = results['influence']


    if results['sensitive']:
        query_body = {
            'query':{
                'range':{
                    'sensitive':{
                        'from':results['sensitive'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['sensitive_rank'] = influence_rank['count']
        else:
            return_results['sensitive_rank'] = 0
    else:
        return_results['sensitive_rank'] = 0
    return_results['sensitive'] = results['sensitive']

    query_body = {
        'query':{
            "match_all":{}
        }
    }
    all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
    if all_count['_shards']['successful'] != 0:
        return_results['all_count'] = all_count['count']
    else:
        print 'es_sensitive_user_portrait error'
        return_results['all_count'] = 0

    # link
    link_ratio = results['link']
    return_results['link'] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results['time_description'] = active_time_description(weibo_trend)
    return_results['time_trend'] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8*24*3600
    for i in range(1,8):
        date = ts2datetime(ts + i*24*3600).replace('-', '')
        detail = [0]*10
        try:
            item = es.get(index=date, doc_type='bci', id=uid)['_source']
            '''
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            '''
            if 1:
                detail[0] = item.get('origin_weibo_number', 0)
                detail[1] = item.get('retweeted_weibo_number', 0)
                detail[2] = item.get('origin_weibo_retweeted_total_number', 0) + item.get('retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('origin_weibo_comment_total_number', 0) + item.get('retweeted_weibo_comment_total_number', 0)
                retweeted_id = item.get('origin_weibo_top_retweeted_id', '0')
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text']
                    except:
                        detail[5] = ''
                else:
                    detail[5] = ''
                detail[6] = item.get('origin_weibo_retweeted_top_number', 0)
                detail[7] = item.get('origin_weibo_top_comment_id', '0')
                if detail[7]:
                    try:
                        detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text']
                    except:
                        detail[8] = ''
                else:
                    detail[8] = ''
                detail[9] = item.get('origin_weibo_comment_top_number', 0)
                attention_number = detail[2] + detail[3]
                attention = 2/(1+math.exp(-0.005*attention_number)) - 1
            influence_value.append([date, item['user_index']])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results['influence_trend'] = influence_value
    return_results['common_influence_detail'] = influence_detail
    return_results['attention_degree'] = attention_value

    return return_results
Пример #5
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results["utype"] = 0
        return results
    results["utype"] = 1

    results["uid"] = uid
    portrait_result = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]
    results["uname"] = portrait_result["uname"]
    if portrait_result["uname"] == 0:
        results["uname"] = "unknown"
    if portrait_result["photo_url"] == 0:
        portrait_result["photo_url"] = "unknown"
    if portrait_result["location"] == 0:
        portrait_result["location"] = "unknown"
    results["photo_url"] = portrait_result["photo_url"]

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace("-", "")
    date = "20130907"  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type="bci", id=uid)["_source"]
        results["sensitive_origin_weibo_number"] = influence_results.get("s_origin_weibo_number", 0)
        results["sensitive_retweeted_weibo_number"] = influence_results.get("s_retweeted_weibo_number", 0)
        results["sensitive_comment_weibo_number"] = int(influence_results.get("s_comment_weibo_number", 0))
        results["sensitive_retweeted_weibo_retweeted_total_number"] = influence_results.get(
            "s_retweeted_weibo_retweeted_total_number", 0
        )
        results["sensitive_origin_weibo_retweeted_total_number"] = influence_results.get(
            "s_origin_weibo_retweeted_total_number", 0
        )
        results["sensitive_origin_weibo_comment_total_number"] = influence_results.get(
            "s_origin_weibo_comment_total_number", 0
        )
        results["sensitive_retweeted_weibo_comment_total_number"] = influence_results.get(
            "s_retweeted_weibo_comment_total_number", 0
        )
    except:
        results["sensitive_origin_weibo_number"] = 0
        results["sensitive_retweeted_weibo_number"] = 0
        results["sensitive_comment_weibo_number"] = 0
        results["sensitive_origin_weibo_retweeted_total_number"] = 0
        results["sensitive_origin_weibo_comment_total_number"] = 0
        results["sensitive_retweeted_weibo_retweeted_total_number"] = 0
        results["sensitive_retweeted_weibo_comment_total_number"] = 0

    try:
        item = es.get(index=date, doc_type="bci", id=uid)["_source"]
    except:
        item = {}
    results["origin_weibo_total_number"] = item.get("origin_weibo_number", 0) + results["sensitive_origin_weibo_number"]
    results["retweeted_weibo_total_number"] = (
        item.get("retweeted_weibo_number", 0) + results["sensitive_retweeted_weibo_number"]
    )
    results["comment_weibo_total_number"] = int(item.get("comment_weibo_number", 0)) + int(
        results["sensitive_comment_weibo_number"]
    )
    results["origin_weibo_retweeted_total_number"] = (
        item.get("origin_weibo_retweeted_total_number", 0) + results["sensitive_origin_weibo_retweeted_total_number"]
    )
    results["origin_weibo_comment_total_number"] = (
        item.get("origin_weibo_comment_total_number", 0) + results["sensitive_origin_weibo_comment_total_number"]
    )
    results["retweeted_weibo_retweeted_total_number"] = (
        item.get("retweeted_weibo_retweeted_total_number", 0)
        + results["sensitive_retweeted_weibo_retweeted_total_number"]
    )
    results["retweeted_weibo_comment_total_number"] = (
        item.get("retweeted_weibo_comment_total_number", 0) + results["sensitive_retweeted_weibo_comment_total_number"]
    )

    results["sensitive_text"] = sort_sensitive_text(uid)

    results["sensitive_geo_distribute"] = []
    results["sensitive_time_distribute"] = get_user_trend(uid)[1]
    results["sensitive_hashtag"] = []
    results["sensitive_words"] = []
    results["sensitive_hashtag_dict"] = []
    results["sensitive_words_dict"] = []
    results["sensitive_hashtag_description"] = ""

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results["negetive_index"] = float(emotion_number[2]) / (emotion_number[2] + emotion_number[1] + emotion_number[0])
    results["negetive_influence"] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0]
    )
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace("-", "")
    return_sentiment = dict()
    return_sentiment["positive"] = []
    return_sentiment["neutral"] = []
    return_sentiment["negetive"] = []
    ts = time.time()
    ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace("-", "")
        temp = sentiment_dict.get(date, {})
        return_sentiment["positive"].append([temp.get("positive", 0), date])
        return_sentiment["negetive"].append([temp.get("negetive", 0), date])
        return_sentiment["neutral"].append([temp.get("neutral", 0), date])
    results["sentiment_trend"] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]
        results["politics_trend"] = portrait_results["politics_trend"]
        results["domain"] = portrait_results["domain"]
        results["sensitive"] = portrait_results["sensitive"]
        temp_hashtag = portrait_results["sensitive_hashtag_dict"]
        temp_sensitive_words = portrait_results["sensitive_words_dict"]
        temp_sensitive_geo = portrait_results["sensitive_geo_activity"]
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results["sensitive_geo_distribute"] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(portrait_results["sensitive_hashtag_dict"])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results["sensitive_hashtag_description"] = hashtag_description(hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace("-", "")
        date = "20130907"
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results["today_sensitive_words"] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results["sensitive_hashtag"] = sorted_hashtag
        results["sensitive_words"] = new_sorted_dict
        results["sensitive_hashtag_dict"] = sorted_hashtag_dict
        results["sensitive_words_dict"] = sorted_words_dict

    results["sensitive_retweet"] = search_retweet(uid, 1)
    results["sensitive_follow"] = search_follower(uid, 1)
    results["sensitive_at"] = search_mention(uid, 1)

    return results
Пример #6
0
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result["_source"]
    # return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        # return_results.update(sensitive_attribute(uid))
        return_results["user_type"] = 1
        return_results["sensitive"] = 1
    else:
        return_results["user_type"] = 0
        return_results["sensitive"] = 0

    if results["photo_url"] == 0:
        results["photo_url"] = "unknown"
    if results["location"] == 0:
        results["location"] = "unknown"
    return_results["photo_url"] = results["photo_url"]
    return_results["uid"] = results["uid"]
    return_results["uname"] = results["uname"]
    if return_results["uname"] == 0:
        return_results["uname"] = "unknown"
    return_results["location"] = results["location"]
    return_results["fansnum"] = results["fansnum"]
    return_results["friendsnum"] = results["friendsnum"]
    return_results["gender"] = results["gender"]
    return_results["psycho_status"] = json.loads(results["psycho_status"])

    keyword_list = []
    if results["keywords"]:
        keywords_dict = json.loads(results["keywords"])
        sort_word_list = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True)
        return_results["keywords"] = sort_word_list
    else:
        return_results["keywords"] = []

    return_results["retweet"] = search_retweet(uid, 0)
    return_results["follow"] = search_follower(uid, 0)
    return_results["at"] = search_mention(uid, 0)

    if results["ip"] and results["geo_activity"]:
        ip_dict = json.loads(results["ip"])
        geo_dict = json.loads(results["geo_activity"])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results["geo_description"] = geo_description
    else:
        return_results["geo_description"] = ""

    geo_top = []
    temp_geo = {}

    if results["geo_activity"]:
        geo_dict = json.loads(results["geo_activity"])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
            for i in range(7):
                ts = ts + 24 * 3600
                date = ts2datetime(ts).replace("-", "")
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(), key=lambda x: x[0], reverse=False)
        geo_list = geo_dict.values()
        for k, v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(), key=lambda x: x[1], reverse=True)
        return_results["top_activity_geo"] = sort_geo_dict
        return_results["activity_geo_distribute"] = geo_top
    else:
        return_results["top_activity_geo"] = []
        return_results["activity_geo_distribute"] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results["hashtag"] = hashtag_dict

    """
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    """

    # topic
    if results["topic"]:
        topic_dict = json.loads(results["topic"])
        sort_topic_dict = sorted(topic_dict.items(), key=lambda x: x[1], reverse=True)
        return_results["topic"] = sort_topic_dict[:5]
    else:
        return_results["topic"] = []

    # domain
    if results["domain"]:
        domain_string = results["domain"]
        domain_list = domain_string.split("_")
        return_results["domain"] = domain_list
    else:
        return_results["domain"] = []
    """
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    """

    # on_line pattern
    if results["online_pattern"]:
        online_pattern_dict = json.loads(results["online_pattern"])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x: x[1], reverse=True)
        return_results["online_pattern"] = sort_online_pattern_dict[:5]
    else:
        return_results["online_pattern"] = []

    """
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    """

    # self_state
    try:
        profile_result = es_user_profile.get(index="weibo_user", doc_type="user", id=uid)
        self_state = profile_result["_source"].get("description", "")
        return_results["description"] = self_state
    except:
        return_results["description"] = ""
    if results["importance"]:
        query_body = {"query": {"range": {"importance": {"from": results["importance"], "to": 100000}}}}
        importance_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)
        if importance_rank["_shards"]["successful"] != 0:
            return_results["importance_rank"] = importance_rank["count"]
        else:
            return_results["importance_rank"] = 0
    else:
        return_results["importance_rank"] = 0
    return_results["importance"] = results["importance"]

    if results["activeness"]:
        query_body = {"query": {"range": {"activeness": {"from": results["activeness"], "to": 10000}}}}
        activeness_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)
        print activeness_rank
        if activeness_rank["_shards"]["successful"] != 0:
            return_results["activeness_rank"] = activeness_rank["count"]
        else:
            return_results["activeness_rank"] = 0
    else:
        return_results["activeness_rank"] = 0
    return_results["activeness"] = results["activeness"]

    if results["influence"]:
        query_body = {"query": {"range": {"influence": {"from": results["influence"], "to": 100000}}}}
        influence_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)
        if influence_rank["_shards"]["successful"] != 0:
            return_results["influence_rank"] = influence_rank["count"]
        else:
            return_results["influence_rank"] = 0
    else:
        return_results["influence_rank"] = 0
    return_results["influence"] = results["influence"]

    if results["sensitive"]:
        query_body = {"query": {"range": {"sensitive": {"from": results["sensitive"], "to": 100000}}}}
        influence_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)
        if influence_rank["_shards"]["successful"] != 0:
            return_results["sensitive_rank"] = influence_rank["count"]
        else:
            return_results["sensitive_rank"] = 0
    else:
        return_results["sensitive_rank"] = 0
    return_results["sensitive"] = results["sensitive"]

    query_body = {"query": {"match_all": {}}}
    all_count = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)
    if all_count["_shards"]["successful"] != 0:
        return_results["all_count"] = all_count["count"]
    else:
        print "es_sensitive_user_portrait error"
        return_results["all_count"] = 0

    # link
    link_ratio = results["link"]
    return_results["link"] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results["time_description"] = active_time_description(weibo_trend)
    return_results["time_trend"] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
    for i in range(1, 8):
        date = ts2datetime(ts + i * 24 * 3600).replace("-", "")
        detail = [0] * 10
        try:
            item = es.get(index=date, doc_type="bci", id=uid)["_source"]
            """
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            """
            if 1:
                detail[0] = item.get("origin_weibo_number", 0)
                detail[1] = item.get("retweeted_weibo_number", 0)
                detail[2] = item.get("origin_weibo_retweeted_total_number", 0) + item.get(
                    "retweeted_weibo_retweeted_total_number", 0
                )
                detail[3] = item.get("origin_weibo_comment_total_number", 0) + item.get(
                    "retweeted_weibo_comment_total_number", 0
                )
                retweeted_id = item.get("origin_weibo_top_retweeted_id", "0")
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index="sensitive_user_text", doc_type="user", id=retweeted_id)["_source"][
                            "text"
                        ]
                    except:
                        detail[5] = ""
                else:
                    detail[5] = ""
                detail[6] = item.get("origin_weibo_retweeted_top_number", 0)
                detail[7] = item.get("origin_weibo_top_comment_id", "0")
                if detail[7]:
                    try:
                        detail[8] = es.get(index="sensitive_user_text", doc_type="user", id=detail[7])["_source"][
                            "text"
                        ]
                    except:
                        detail[8] = ""
                else:
                    detail[8] = ""
                detail[9] = item.get("origin_weibo_comment_top_number", 0)
                attention_number = detail[2] + detail[3]
                attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1
            influence_value.append([date, item["user_index"]])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results["influence_trend"] = influence_value
    return_results["common_influence_detail"] = influence_detail
    return_results["attention_degree"] = attention_value

    return return_results