def post(self): session = data.Session.from_request(self.request) result, had_error = twitterappengine.exec_twitter_api( lambda: update_timeline(session), error_detail='updating timeline for %s' % session.twitter_id) if had_error: self._write_error(500) return had_updates, status_ids = result if had_updates: ping_hub([session.get_timeline_feed_url()]) if not session.crawled_on_demand: session.enqueue_crawl_on_demand_task() self.response.out.write( 'Updated %s, %s updates' % (session.twitter_id, had_updates and 'had' or 'didn\'t have')) # If this update was triggered in response to a ping, see if we actually # got the status that we were looking for, otherwise we have to try # again. try: expected_status_id = int(self.request.get('expected_status_id')) update_retry_count = int(self.request.get('update_retry_count')) logging.info('Looking for expected status %d...' % expected_status_id) if expected_status_id in status_ids: logging.info('...found') return if update_retry_count == PING_UPDATE_RETRY_MAX: logging.info('...not found, and no retries left') return update_retry_count += 1 logging.info('...not found, queuing the %d-th retry' % update_retry_count) session.enqueue_update_task( countdown=update_retry_count * PING_UPDATE_DELAY_SEC, expected_status_id=expected_status_id, update_retry_count=update_retry_count) except ValueError: # Ignore mising/invalid values return
def _update(): stored_data = FollowingData.get_by_key_name(FollowingData._SINGLETON_ID) if stored_data: # The serialized following data ends up having its keys converted to # strings; convert them to numbers when deserializing. FollowingData._following_map = {} for twitter_id, following_twitter_ids in stored_data.following_map.items(): twitter_id = long(twitter_id) FollowingData._following_map[twitter_id] = following_twitter_ids FollowingData._last_update_time = stored_data.last_update_time if not FollowingData._is_stale(): return following_map = {} for session in Session.all(): twitter_id = long(session.twitter_id) following_twitter_ids, had_error = twitterappengine.exec_twitter_api( lambda: session.create_api().GetFriendIDs(user_id=twitter_id), error_detail="can't get friend IDs for %s, using stale data" % session.twitter_id, ) if had_error: return # TODO(mihaip): remove this and replace with rate-limitting, the # real problem is not how many people are followed, but how often # they tweet. if len(following_twitter_ids) > 200: logging.warning( "Not including followers for %d, following too " "many people (%d)" % (twitter_id, len(following_twitter_ids)) ) continue for following_twitter_id in following_twitter_ids: following_map.setdefault(following_twitter_id, []).append(twitter_id) # Users are also considered to be following themselves (since their # updates update their timeline). following_map.setdefault(twitter_id, []).append(twitter_id) stored_data = FollowingData(key_name=FollowingData._SINGLETON_ID, following_map=following_map) stored_data.put() FollowingData._following_map = following_map FollowingData._last_update_time = stored_data.last_update_time
def _update(): stored_data = FollowingData.get_by_key_name(FollowingData._SINGLETON_ID) if stored_data: # The serialized following data ends up having its keys converted to # strings; convert them to numbers when deserializing. FollowingData._following_map = {} for twitter_id, following_twitter_ids in stored_data.following_map.items(): twitter_id = long(twitter_id) FollowingData._following_map[twitter_id] = following_twitter_ids FollowingData._last_update_time = stored_data.last_update_time if not FollowingData._is_stale(): return following_map = {} for session in Session.all(): twitter_id = long(session.twitter_id) following_twitter_ids, had_error = twitterappengine.exec_twitter_api( lambda: session.create_api().GetFriendIDs(user_id=twitter_id), error_detail='can\'t get friend IDs for %s, using stale data' % session.twitter_id) if had_error: return for following_twitter_id in following_twitter_ids: following_map.setdefault(following_twitter_id, []).append(twitter_id) # Users are also considered to be following themselves (since their # updates update their timeline). following_map.setdefault(twitter_id, []).append(twitter_id) stored_data = FollowingData( key_name=FollowingData._SINGLETON_ID, following_map=following_map) stored_data.put() FollowingData._following_map = following_map FollowingData._last_update_time = stored_data.last_update_time
def get_digest_for_list(list_owner, list_id, dev_mode): digest_start_time, digest_end_time, max_cache_age = _get_digest_timestamps() api = _get_digest_twitter_api( max_cache_age, key='%s/%s' % (list_owner, list_id)) user, had_error = twitterappengine.exec_twitter_api( lambda: api.GetUser(list_owner), error_detail='user %s' % list_owner) if not had_error: timezone = twitterdisplay.get_timezone_for_user(user) else: timezone = None fetcher = ListTwitterFetcher(api, list_owner, list_id, digest_start_time) statuses, had_error = fetcher.fetch() return _process_digest_statuses( statuses, digest_start_time, digest_end_time, had_error, dev_mode, timezone=timezone)
def fetch(self): data, had_error = twitterappengine.exec_twitter_api( self._fetch, error_detail=self._id()) return data or [], had_error
def _get_signed_in(self): twitter_id = self._session.twitter_id logging.info('Serving feed for %s' % twitter_id) user, had_error = twitterappengine.exec_twitter_api( lambda: self._api.GetUser(twitter_id), error_detail='user %s' % twitter_id) if had_error: self._write_error(500) return stream = data.StreamData.get_timeline_for_user(twitter_id) threshold_time = time.time() - FEED_STATUS_INTERVAL_SEC # It's wasteful to serve the hub the full set of items in the feed, so # we use a variant of the feed windowing technique described at # http://code.google.com/p/pubsubhubbub/wiki/PublisherEfficiency#Feed_windowing # to only give it new items. We treat the If-Modified-Since header as # an indication of the items that the hub already has, but we allow one # hour of overlap, in case of items getting dropped, replication delay, # cosmic rays, etc. if self._user_agent_contains('appid: pubsubhubbub'): if_modified_since = self._get_if_modified_since() if if_modified_since: logging.info('If-Modified-Since: %d' % if_modified_since) threshold_time = if_modified_since - IF_MODIFIED_SINCE_INTERVAL_SEC # Since we're serving a partial response, we don't want proxies # caching it. self.response.headers['Cache-Control'] = 'private' # We want the feed to have all tweets from the past day, but also at # at least 10 items. feed_status_ids = [] if stream: for status_id, status_timestamp_sec in stream.status_pairs(): if status_timestamp_sec < threshold_time and \ len(feed_status_ids) >= MIN_FEED_ITEMS: break feed_status_ids.append(status_id) logging.info(' Feed has %d items' % len(feed_status_ids)) status_data = data.StatusData.get_by_status_ids(feed_status_ids) statuses = [s.to_status() for s in status_data] timezone = twitterdisplay.get_timezone_for_user(user) # We don't actually want statuses grouped, instead we want one status # per item. status_groups = [ twitterdisplay.DisplayStatusGroup( user=status.user, statuses=[status], thumbnail_size=thumbnails.LARGE_THUMBNAIL, timezone=timezone) for status in statuses ] updated_date = datetime.datetime.utcnow() self._write_template('birdfeeder/feed.atom', { 'feed_title': '@%s Twitter Timeline' % user.screen_name, 'updated_date_iso': updated_date.isoformat(), 'feed_url': self.request.url, 'status_groups': status_groups, }, content_type='application/atom+xml') self._add_last_modified_header(updated_date)
def _get_signed_in(self): twitter_id = self._session.twitter_id logging.info('Serving feed for %s' % twitter_id) user, had_error = twitterappengine.exec_twitter_api( lambda: self._caching_api.GetUser(user_id=twitter_id, include_entities=False), error_detail='user %s' % twitter_id) if had_error: self._write_error(500) return stream = data.StreamData.get_timeline_for_user(twitter_id) threshold_time = time.time() - FEED_STATUS_INTERVAL_SEC if self._should_use_feed_windowing(): if_modified_since = self._get_if_modified_since() if if_modified_since: logging.info('If-Modified-Since: %d' % if_modified_since) threshold_time = if_modified_since - IF_MODIFIED_SINCE_INTERVAL_SEC # Since we're serving a partial response, we don't want proxies # caching it. self.response.headers['Cache-Control'] = 'private' elif 'pubsubhubbub' in self.request.headers['User-Agent']: # Google's PubSubHubbub hub no longer seems to send an # If-Modified-Since header, but it crawls often enough that we # can use a shorter interval for it. threshold_time = time.time() - PUBSUBHUBBUB_HUB_INTERVAL_SEC self.response.headers['Cache-Control'] = 'private' # We want the feed to have all tweets from the past day, but also at # at least 10 items. feed_status_ids = [] if stream: for status_id, status_timestamp_sec in stream.status_pairs(): if status_timestamp_sec < threshold_time and \ len(feed_status_ids) >= MIN_FEED_ITEMS: break feed_status_ids.append(status_id) logging.info(' Feed has %d items' % len(feed_status_ids)) status_data = data.StatusData.get_by_status_ids(feed_status_ids) statuses = [s.to_status() for s in status_data] timezone = twitterdisplay.get_timezone_for_auth_user(self._caching_api) # We don't actually want statuses grouped, instead we want one status # per item. status_groups = [ twitterdisplay.DisplayStatusGroup( user=status.user, statuses=[status], thumbnail_size=thumbnails.LARGE_THUMBNAIL, timezone=timezone) for status in statuses ] updated_date = datetime.datetime.utcnow() self._write_template('birdfeeder/feed.atom', { 'feed_title': '@%s Twitter Timeline' % user.screen_name, 'updated_date_iso': updated_date.isoformat(), 'feed_url': self.request.url, 'status_groups': status_groups, }, content_type='application/atom+xml') self._add_last_modified_header(updated_date)