Exemplo n.º 1
0
def extract_tweet(t):
    """
    t is a dict representing a Twitter tweet as returned by the API

    returns: a dict for the tweet with filtered attributes
    """
    d = {}
    rt = t.get('retweeted_status')
    for f in TWEET_FIELDS:
        if f == 'user_id':
            d['user_id'] = t['user']['id']
        elif f == 'source':
            x = re.search(r'(?<=>).+?(?=<\/a>)', t['source'])
            d['source'] = x.group() if x else t['source']
        elif f == 'created_at':
            d['created_at'] = convert_timestamp(t['created_at'])
        elif f == 'text':
            d['text'] = re.sub("\s+", ' ', t['text']).strip()
        elif f == 'retweeted_status_id':
            d['retweeted_status_id'] = rt['id'] if rt else None
        elif f == 'retweeted_status_user_id':
            d['retweeted_status_user_id'] = rt['user']['id'] if rt else None
        elif f == 'retweeted_status_user_screen_name':
            # note that the screen_name of retweeted user is extracted from
            # the tweet's text
            d['retweeted_status_user_screen_name'] = (
                re.search(r'(?<=^RT @)\w+(?=:)', t['text']).group()) if rt else None
        else:
            d[f] = t[f]
    return d
Exemplo n.º 2
0
def extract_twitter_profile(profile):
    """
    profile is a dict representing a Twitter user profile as returned by the API

    returns: a dict with filtered attributes
    """
    d = {}
    for att in TWITTER_PROFILE_FIELDS:
        if att == 'created_at':
            d['created_at'] = convert_timestamp(profile['created_at'])
        else:
            d[att] = profile[att]
    return d