Exemplo n.º 1
0
def search_tweets(t, r, keywords, level="country"):
 #   redisId = tu.getRedisIdByScreenName(keywords, 'index')
    if level == "country":
        index_name = keywords
    else:
        index_name = "%s %s" % (level, keywords)
    redisTweetId = tu.getRedisIdByScreenName(index_name, 'search')
    params = {"q": keywords,
              "count": 100}
    search_results = tu.makeTwitterRequest(t.search.tweets, **params)
    tweets = search_results['statuses']

    for i in range(MAX_PAGES - 1):
        print "page %d" % (i + 1)
        next_results = search_results['search_metadata'].get('next_results')
        if next_results is None:
            break
        kwargs = dict([kv.split('=') for kv in next_results[1:].split('&')])
        max_id = str(long(kwargs['max_id']) - 1)
        kwargs['max_id'] = max_id
        search_results = tu.makeTwitterRequest(t.search.tweets,
                                               **kwargs)
        tweets += search_results['statuses']

        if len(search_results['statuses']) == 0:
            break
        print "Fetched %d tweets so far" % len(tweets)

    for t in tweets:
        r.sadd(redisTweetId, t)
    return tweets
Exemplo n.º 2
0
    def handle(self, *args, **options):
 
        # Get the configuration parameters

        c = config.Config()
        d = c.cfg

        # Load the classifier

        f = open('bayesclass.pickle')
        classifier = pickle.load(f)
    
        t = login()


        KW = {'user' : d.get('api', 'user'), 
              'count' : d.get('api', 'count'), 
              'skip_users' : d.get('api', 'skip_users'), 
              'include_entities' : 'true',
              'since_id' : 1, 
              'id' : 2} 

        p = Tweet.objects.aggregate(Max('tweet_id'))

        latestId = p['tweet_id__max']
        if latestId == None:
           latestId = 1

        KW['since_id'] = int(latestId)

        api_call = getattr(t.statuses, 'user_timeline')
        tweets = makeTwitterRequest(t, api_call, **KW)

        print 'Fetched %i tweets' % len(tweets)

        a = len(tweets)           

        for i in range(a):

            txt = tweets[i]['text']
            ref = tweets[i]['id']
            src = tweets[i]['source']
            outc = int(classifier.classify(word_feats(txt)))
            created = mysql_date(tweets[i]['created_at'])

            q = Tweet( datetime = created, 
                       user = K['user'],
                       content = txt, 
                       source = src,
                       tweet_id = ref,
                       prop = outc )

            q.save()
        f.close()
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000):
    ids = []
    cursor = -1
    while cursor != 0:
        params = dict(cursor=cursor)
        if screen_name is not None:
            params["screen_name"] = screen_name
        else:
            params["user_id"] = user_id

        response = makeTwitterRequest(t, t.friends.ids, **params)

        ids.extend(response["ids"])
        cursor = response["next_cursor"]
        print >> sys.stderr, "Fetched %i ids for %s" % (len(ids), screen_name or user_id)
        if len(ids) >= friends_limit:
            break
    return ids
Exemplo n.º 4
0
def getFriendIds(t, screen_name=None, user_id=None, friends_limit=10000):
    assert screen_name or user_id

    ids = []
    cursor = -1
    while cursor != 0:
        params = dict(cursor=cursor)
        if screen_name is not None:
            params['screen_name'] = screen_name
        else:
            params['user_id'] = user_id

        response = makeTwitterRequest(t, t.friends.ids, **params)

        ids.extend(response['ids'])
        cursor = response['next_cursor']
        print >> sys.stderr,\
            'Fetched %d ids for %s' % (len(ids), screen_name or user_id)
        if len(ids) >= friends_limit:
            break
    return ids
Exemplo n.º 5
0
def getFriendIds(t, screen_name=None, user_id=None, friends_limit=10000):
    assert screen_name or user_id

    ids = []
    cursor = -1
    while cursor != 0:
        params = dict(cursor=cursor)
        if screen_name is not None:
            params['screen_name'] = screen_name
        else:
            params['user_id'] = user_id

        response = makeTwitterRequest(t, t.friends.ids, **params)

        ids.extend(response['ids'])
        cursor = response['next_cursor']
        print >> sys.stderr,\
            'Fetched %d ids for %s' % (len(ids), screen_name or user_id)
        if len(ids) >= friends_limit:
            break
    return ids
Exemplo n.º 6
0
    def handle(self, *args, **options):

        n = 10   # Number of training tweets
        SEARCH_TERM = ':)'
        
        MAX_PAGES = 1
        RESULTS_PER_PAGE = 1
        LANGUAGE = "en"
        INCLUDE_ENTITIES = "true"

        KW = {
            'domain': 'search.twitter.com',
            'count': 1000,
            'rpp': 100,
            'q': SEARCH_TERM,
            'lang': LANGUAGE,
            'include_entities': INCLUDE_ENTITIES 
            }

        t = twitter.Twitter(domain='search.twitter.com')

        posfeats = []

        for i in range(n):

               tweets = makeTwitterRequest(t, t.search, **KW)

               txt = tweets['results'][0]['text']
               itemb = extractwords(txt)
               posfeats.append((word_feats(itemb), '1'))

        classifier = NaiveBayesClassifier.train(posfeats)

        f = open('bayesclass.pickle', 'wb')
        pickle.dump(classifier, f)
        f.close()
    # home and user timelines. It has no effect for the public timeline


    def idMapper(doc):
        yield (None, doc['id'])


    def maxFindingReducer(keys, values, rereduce):
        return max(values)


    view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer, language='python')
    view.sync(db)
    KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value)

# Harvest tweets for the given timeline.
# For friend and home timelines, the unofficial limitation is about 800 statuses although
# other documentation may state otherwise. The public timeline only returns 20 statuses 
# and gets updated every 60 seconds.
# See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43
# Note that the count and since_id params have no effect for the public timeline

page_num = 1
while page_num <= MAX_PAGES:
    KW['page'] = page_num
    api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
    tweets = makeTwitterRequest(t, api_call, **KW)
    db.update(tweets, all_or_nothing=True)
    print 'Fetched %i tweets' % len(tweets)
    page_num += 1