def get_info_by_id(t, ids): id_to_info = {} while len(ids) > 0: # Process 100 ids at a time... ids_str = ','.join([str(_id) for _id in ids[:100]]) ids = ids[100:] response = make_twitter_request(t, getattr(getattr(t, "users"), "lookup"), user_id=ids_str) if response is None: break if type(response) is dict: # Handle Twitter API quirk response = [response] for user_info in response: id_to_info[user_info['id']] = user_info return id_to_info
def get_info_by_screen_name(t, screen_names): sn_to_info = {} while len(screen_names) > 0: # Process 100 ids at a time... screen_names_str = ','.join([str(sn) for sn in screen_names[:100]]) screen_names = screen_names[100:] response = make_twitter_request(t, getattr(getattr(t, "users"), "lookup"), screen_name=screen_names_str) if response is None: break if type(response) is dict: # Handle Twitter API quirk response = [response] for user_info in response: sn_to_info[user_info['screen_name']] = user_info return sn_to_info
def get_all_followers_ids(user_id, limit): cursor = -1 ids = [] while cursor != 0: response = make_twitter_request(t, t.followers.ids, user_id=user_id, cursor=cursor) if response is not None: ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'Fetched %i total ids for %s' % (len(ids), user_id) # Consider storing the ids to disk during each iteration to provide an # an additional layer of protection from exceptional circumstances if len(ids) >= limit or response is None: break return ids
view.sync(db) try: KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id') ][0].value) except IndexError, e: KW['since_id'] = 1 # Harvest tweets for the given timeline. # For friend and home timelines, the unofficial limitation is about 800 statuses # although other documentation may state otherwise. The public timeline only returns # 20 statuses and gets updated every 60 seconds, so consider using the streaming API # for public statuses. See http://bit.ly/fgJrAx # Note that the count and since_id params have no effect for the public timeline page_num = 1 while page_num <= MAX_PAGES: KW['page'] = page_num api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = make_twitter_request(t, api_call, **KW) # Actually storing tweets in CouchDB is as simple as passing them # into a call to db.update db.update(tweets, all_or_nothing=True) print >> sys.stderr, 'Fetched %i tweets' % (len(tweets), ) page_num += 1 print >> sys.stderr, 'Done fetching tweets'
language='python') view.sync(db) try: KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) except IndexError, e: KW['since_id'] = 1 # Harvest tweets for the given timeline. # For friend and home timelines, the unofficial limitation is about 800 statuses # although other documentation may state otherwise. The public timeline only returns # 20 statuses and gets updated every 60 seconds, so consider using the streaming API # for public statuses. See http://bit.ly/fgJrAx # Note that the count and since_id params have no effect for the public timeline page_num = 1 while page_num <= MAX_PAGES: KW['page'] = page_num api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = make_twitter_request(t, api_call, **KW) # Actually storing tweets in CouchDB is as simple as passing them # into a call to db.update db.update(tweets, all_or_nothing=True) print >> sys.stderr, 'Fetched %i tweets' % (len(tweets),) page_num += 1 print >> sys.stderr, 'Done fetching tweets'
def get_tweet_timeline(self, user=None, name_=None): KW = { # For the Twitter API call 'count': 200, 'skip_users': 'true', 'include_entities': 'true', 'since_id': 1, } TIMELINE_NAME = 'user' MAX_PAGES = 16 page_num = 1 cont = 0 result = {} data = [] users = [] if not user: KW['screen_name'] = self.Config.get('secuser', 'owner') user = KW['screen_name'] else: if user == 'user': if not name_: print "ERROR: user needs a screen_name" return -1 else: KW['screen_name'] = name_ user = KW['screen_name'] elif user == 'home': TIMELINE_NAME = 'home' MAX_PAGES = 4 elif user == 'public': # ESTE NO VA # AttributeError: twmac instance has no attribute 'account' TIMELINE_NAME = 'public' MAX_PAGES = 1 # Usage: $ %s timeline_name [max_pages] [screen_name]' % (sys.argv[0],) # timeline_name in [public, home, user]' # 0 < max_pages <= 16 for timeline_name in [home, user]' # max_pages == 1 for timeline_name == public' # Notes:' # * ~800 statuses are available from the home timeline.' # * ~3200 statuses are available from the user timeline.' # * The public timeline updates every 60 secs and returns 20 statuses.' # * See the streaming/search API for additional options to harvest tweets.' while page_num <= MAX_PAGES: KW['page'] = page_num api_call = getattr(self.twitter_api.statuses, TIMELINE_NAME + '_timeline') tweets = make_twitter_request(self, api_call, **KW) for line in tweets: aux = line['user']['id'] line['user']['_id'] = line['user'].pop('id') users.append(line['user']) line['user'] = aux #line['_id']=line.pop('id') data.append(line) cont = cont + len(tweets) page_num += 1 result['num_tweets'] = cont result['tweets'] = data result['users'] = users return (result)