Пример #1
0
def get_tweets_from_user_portrait(monitor_keywords_list, sort_item_new):

    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': {
            sort_item_new: {
                'order': 'desc'
            }
        },
        'size': USER_POETRAIT_NUMBER
    }
    #print 'query_body:::',query_body
    es_results_portrait = es_tw_user_portrait.search(
        index=tw_portrait_index_name,
        doc_type=tw_portrait_index_type,
        body=query_body)['hits']['hits']

    uid_set = set()

    if es_results_portrait:
        for result in es_results_portrait:
            uid = result['_id']
            # result = result['_source']
            # uid = result['uid']
            uid_set.add(uid)
    uid_list = list(uid_set)

    es_results = uid_lists2tw_from_flow_text(monitor_keywords_list, uid_list)

    return es_results
Пример #2
0
def update_influence(uid_list=[]):
    if not uid_list:
        uid_list = load_uid_list()
    tw_bci_index_list = get_tw_bci_index_list(load_timestamp())
    tw_influence_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "sort": {"timestamp": {"order": "desc"}},
        "fields": ["influence", "uid"]
    }
    user_influence = {}
    for index_name in tw_bci_index_list:
        try:
            search_results = es.search(index=index_name, doc_type=tw_bci_index_type, body=tw_influence_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_influence:
                    user_influence[uid] = {
                        'influence_list': []
                    }
                if content.has_key('influence'):
                    user_influence[uid]['influence_list'].append(float(content.get('influence')[0]))
        except Exception,e:
            print e
Пример #3
0
def update_keywords(uid_list=[]):
    if not uid_list:
        uid_list = load_uid_list()
    tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp())
    keywords_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "sort": {"timestamp": {"order": "desc"}},
        "fields": ["keywords_dict", "uid"]
    }
    user_keywords = {}
    for index_name in tw_flow_text_index_list:
        try:
            search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=keywords_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_keywords:
                    user_keywords[uid] = {
                        'keywords': {}
                    }
                if content.has_key('keywords_dict'):
                    user_keywords[uid]['keywords'] = merge_dict(user_keywords[uid]['keywords'], json.loads(content['keywords_dict'][0]))
        except Exception,e:
            print e
Пример #4
0
def load_uid_list():
    uid_list = []
    uid_list_query_body = {'size': MAX_SEARCH_SIZE}
    try:
        search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=uid_list_query_body)['hits']['hits']
        for item in search_results:
            uid_list.append(item['_source']['uid'])
    except Exception,e:
        print e
Пример #5
0
def update_domain(uid_list=[]):
    if not uid_list:
        uid_list = load_uid_list()
    tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp())
    user_domain_data = {}
    #load num of text
    count_result = count_text_num(uid_list, tw_flow_text_index_list)
    #load baseinfo
    tw_user_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "fields": ["location", "username", "description", "uid"]
    }
    try:
        search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=tw_user_query_body)['hits']['hits']
        for item in search_results:
            content = item['fields']
            uid = content['uid'][0]
            if not uid in user_domain_data:
                text_num = count_result[uid]
                user_domain_data[uid] = {
                    'location': '',
                    'username': '',
                    'description': '',
                    'number_of_text': text_num
                }
            if content.has_key('location'):
                location = content.get('location')[0]
            else:
                location = ''
            if content.has_key('description'):
                description = content.get('description')[0][:1000]
            else:
                description = ''
            if content.has_key('username'):
                username = content.get('username')[0]
            else:
                username = '' 
            user_domain_data[uid]['location'] = location
            user_domain_data[uid]['username'] = username
            user_domain_data[uid]['description'] = description
    except Exception,e:
        print e
Пример #6
0
def update_sentiment(uid_list=[]):
    '''
    SENTIMENT_DICT_NEW = {'0':u'中性', '1':u'积极', '2':u'生气', '3':'焦虑', \
         '4':u'悲伤', '5':u'厌恶', '6':u'消极其他', '7':u'消极'}
    '''
    if not uid_list:
        uid_list = load_uid_list()
    tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp())
    sentiment_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "sort": {"timestamp": {"order": "desc"}},
        "fields": ["sentiment", "uid"]
    }
    user_sentiment = {}
    for index_name in tw_flow_text_index_list:
        try:
            search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=sentiment_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_sentiment:
                    user_sentiment[uid] = {
                        'sentiment_list': []
                    }
                if content.has_key('sentiment'):
                    user_sentiment[uid]['sentiment_list'].append(int(content.get('sentiment')[0]))        
        except Exception,e:
            print e
Пример #7
0
def update_hashtag(uid_list=[]):
    if not uid_list:
        uid_list = load_uid_list()
    tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp())
    keywords_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "sort": {"timestamp": {"order": "desc"}},
        "fields": ["hashtag", "uid"]
    }
    user_hashtag = {}
    for index_name in tw_flow_text_index_list:
        try:
            search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=keywords_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_hashtag:
                    user_hashtag[uid] = {
                        'hashtag_list': []
                    }
                if content.has_key('hashtag'):
                    hashtag = content['hashtag'][0]
                    if hashtag:
                        hashtag_list = hashtag.split('&') 
                        user_hashtag[uid]['hashtag_list'].extend(hashtag_list)
        except Exception,e:
            print e
Пример #8
0
def update_baseinfo(uid_list=[]):
    user_baseinfo = {}
    fb_user_query_body = {
        'query':{
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            {"terms": {"uid": uid_list}},
                        ]
                     }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "fields": ["location", "userscreenname", "original_profile_image_url", "followers_count", "status_count", "followers_count", "friends_count", "is_verified", "username", "uid"]
    }
    search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=fb_user_query_body)['hits']['hits']
    for item in search_results:
        content = item['fields']
        uid = content['uid'][0]
        if not uid in user_baseinfo:
            user_baseinfo[uid] = {
                'uid': str(uid),
                'uname': '',
                'location': '',
                'verified':'',
                'statusnum': 0,
                'friendsnum': 0,
                'fansnum': 0,
                'photo_url': '',
                'screenname': ''
            }
        location = ''
        if content.has_key('location'):
            location = content.get('location')[0]
        uname = ''
        if content.has_key('username'):
            uname = content.get('username')[0]
        photo_url = ''
        if content.has_key('original_profile_image_url'):
            photo_url = content.get('original_profile_image_url')[0]
        verified = ''
        if content.has_key('is_verified'):
            verified = str(content.get('is_verified')[0])
        statusnum = ''
        if content.has_key('status_count'):
            statusnum = content.get('status_count')[0]
        friendsnum = ''
        if content.has_key('friends_count'):
            friendsnum = content.get('friends_count')[0]
        fansnum = ''
        if content.has_key('followers_count'):
            fansnum = content.get('followers_count')[0]
        screenname = ''
        if content.has_key('userscreenname'):
            screenname = content.get('userscreenname')[0]

        user_baseinfo[uid]['location'] = location
        user_baseinfo[uid]['uname'] = uname
        user_baseinfo[uid]['photo_url'] = photo_url
        user_baseinfo[uid]['verified'] = verified
        user_baseinfo[uid]['statusnum'] = statusnum
        user_baseinfo[uid]['friendsnum'] = friendsnum
        user_baseinfo[uid]['fansnum'] = fansnum
        user_baseinfo[uid]['screenname'] = screenname
    for uid in uid_list:
        if not uid in user_baseinfo:
            user_baseinfo[uid] = {
                'uid': str(uid),
                'uname': '',
                'location': '',
                'verified':'',
                'statusnum': 0,
                'friendsnum': 0,
                'fansnum': 0,
                'photo_url': '',
                'screenname': ''
            }
    return save_data2es(user_baseinfo)