def post(self): accounts_to_recrawl = [] account_query = tweets.User.query() for account in account_query: accounts_to_recrawl.append(account.screen_name) account.key.delete() tweet_query = tweets.Tweet.query() for tweet in tweet_query: tweet.key.delete() token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) num_tweets = 10 try: num_tweets = int(self.request.get('num_tweets')) except ValueError: logging.warning('Could not parse num_tweets from %s', self.request.get('num_tweets')) num_tweets = min(num_tweets, 1000) num_tweets = max(num_tweets, 1) logging.info('Recrawling with %s num_tweets', num_tweets) for account in accounts_to_recrawl: IndexAccountData(account, self.request.get('num_tweets'), fetcher) self.redirect('/accounts')
def post(self): id = self.request.get('id', '') screen_name = self.request.get('screen_name', '') id = urllib2.unquote(id) query = game_model.Team.query(game_model.Team.score_reporter_id == id) teams = query.fetch(1) if not teams: self.response.write('Could not look up team info for %s' % id) return team = teams[0] # Lookup id w/ given screen name using twitter_fetcher. token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.LookupUsers(screen_name, use_screen_name=True) except twitter_fetcher.FetchError as e: msg = 'Could not lookup users %s, %s' % (screen_name, e) logging.warning(msg) self.response.write(msg) return if not json_obj: self.response.write('Could not look up team info for %s' % id) return # Update DB team.twitter_id = json_obj[0].get('id', 0) team.put() msg = 'Updated %s with twitter id %d' % (id, team.twitter_id) logging.info(msg) self.response.write(msg)
def setUp(self): """Mock out the logic from urlfetch which does the actual fetching.""" self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_urlfetch_stub() self.url_fetch_stub = self.testbed.get_stub(testbed.URLFETCH_SERVICE_NAME) self.return_statuscode = [500] self.return_content = [''] # Stub out the call to fetch the URL def _FakeFetch(url, payload, method, headers, request, response, follow_redirects=True, deadline=urlfetch_stub._API_CALL_DEADLINE, validate_certificate=urlfetch_stub._API_CALL_VALIDATE_CERTIFICATE_DEFAULT): response.set_statuscode(self.return_statuscode.pop(0)) response.set_content(self.return_content.pop(0)) self.saved_retrieve_url = self.url_fetch_stub._RetrieveURL self.token_manager = oauth_token_manager.OauthTokenManager(is_mock=True) # To make the calls to the real twitter API, do the following 3 things: # 1. Comment out this next line. self.url_fetch_stub._RetrieveURL = _FakeFetch # 2. Change this value to be a real Oauth bearer token -- OR -- self.token_manager.AddToken('mock token') # 3. Change this value to be a real Oauth secret. self.token_manager.AddSecret('mock secret') self.fetcher = twitter_fetcher.TwitterFetcher(self.token_manager)
def get(self): """Retrieve the lists via the Twitter API and store them in the datastore.""" token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.LookupLists( ADMIN_USER, fake_data=self.request.get('fake_data')) except twitter_fetcher.FetchError as e: msg = 'Could not retrieve lists for %s' % ADMIN_USER logging.warning('%s: %s', msg, e) self.response.write(msg) return list_objs = json_obj.get('lists', []) lists = [k.get('id_str', '') for k in list_objs] new_lists = set(lists) existing_list_results = ManagedLists.query( ancestor=lists_key()).fetch(1) if not existing_list_results: existing_list_results = [ManagedLists(parent=lists_key())] existing_list = existing_list_results[0] old_lists = set(existing_list.list_ids) if new_lists == old_lists: msg = 'No lists to update: %s' % ','.join(old_lists) logging.info(msg) self.response.write(msg) return # Update the db existing_list.list_ids = lists existing_list.put() msg = 'Updated lists for user %s: %s' % (ADMIN_USER, lists) logging.info(msg) self.response.write(msg)
def post(self): """Makes the calls using TwitterFetcher with the given params.""" template_values = self._PopulateTemplateValues() # Create the oauth client token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) # Do the requests try: template_values['user_timeline_response'] = fetcher.UserTimeline( template_values['account'], count=template_values['num']) except twitter_fetcher.FetchError as e: template_values['user_timeline_response'] = e try: template_values['lookup_lists_response'] = fetcher.LookupLists( template_values['account'], count=template_values['num']) except twitter_fetcher.FetchError as e: template_values['lookup_lists_response'] = e try: template_values['lookup_user_response'] = fetcher.LookupUsers( template_values['user_id']) except twitter_fetcher.FetchError as e: template_values['lookup_user_response'] = e # Find the first list and fetch tweets from it list_id = self._GetFirstListId(template_values['lookup_lists_response']) if list_id: try: template_values['list_statuses_response'] = fetcher.ListStatuses( list_id, count=template_values['num']) except twitter_fetcher.FetchError as e: template_values['list_statuses_response'] = e # Render the results template = JINJA_ENVIRONMENT.get_template('html/oauth_playground.html') self.response.write(template.render(template_values))
def post(self): user_id_param = self.request.get('user_id') if not user_id_param: msg = 'No user id specified' logging.warning(msg) self.response.write(msg) return # Assumption is user_id_param is a comma-separated list of # user ids as specified at # https://dev.twitter.com/rest/reference/get/users/lookup token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.LookupUsers(user_id_param) except twitter_fetcher.FetchError as e: msg = 'Could not lookup users %s' % user_id_param logging.warning('%s: %s', msg, e) self.response.write(msg) return for json_user in json_obj: UpdateUser(json_user, {})
def get(self): list_id = self.request.get('list_id') if not list_id: msg = 'No list name specified' logging.warning(msg) self.response.write(msg) return last_tweet_id = self._LookupLatestTweet(list_id) crawl_state = CrawlState.FromRequest(self.request, last_tweet_id) # In parallel: look-up the latest set of games for this # division and cache it division, age_bracket, league = list_id_bimap.ListIdBiMap.GetStructuredPropertiesForList( crawl_state.list_id) backfill_date = ParseDate(self.request.get('backfill_date')) update_games_only = self.request.get('update_games_only') games_start = datetime.utcnow() if backfill_date: games_start = backfill_date + timedelta(weeks=1) # Query tweets for that week for this list if not update_games_only: tweet_query = tweets.Tweet.query( tweets.Tweet.from_list == list_id, tweets.Tweet.created_at > games_start - timedelta(weeks=1), tweets.Tweet.created_at < games_start).order().order(-tweets.Tweet.created_at) twts_future = tweet_query.fetch_async() # For Twitter, only pull up games for the last two weeks. twit_games_query = Game.query( Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.last_modified_at > games_start - timedelta(weeks=1), Game.last_modified_at < games_start).order(-Game.last_modified_at) twit_games_future = twit_games_query.fetch_async() tourney_ids = [] if league == League.USAU: tourneys_query = Tournament.query( Tournament.end_date < games_start + timedelta(days=3)) tourneys = tourneys_query.fetch(100) for tourney in tourneys: if not tourney.sub_tournaments: continue for st in tourney.sub_tournaments: if st.division == division and st.age_bracket == age_bracket: tourney_ids.append(tourney.id_str) if tourney_ids: # For SR, pull up games scheduled for a day in either direction. sr_games_query = Game.query(Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.tournament_id.IN(tourney_ids)) sr_games_future = sr_games_query.fetch_async() if not backfill_date: token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.ListStatuses( crawl_state.list_id, count=crawl_state.num_to_crawl, since_id=crawl_state.last_tweet_id, max_id=crawl_state.max_id, fake_data=self.request.get('fake_data')) except twitter_fetcher.FetchError as e: msg = 'Could not fetch statuses for list %s' % crawl_state.list_id logging.warning('%s: %s', msg, e) self.response.write(msg) # TODO: retry the request a fixed # of times return # Update the various datastores. twts, users = self.UpdateTweetDbWithNewTweets( json_obj, crawl_state) if backfill_date: if update_games_only: twts = [] else: twts = twts_future.get_result() users = {} existing_games = twit_games_future.get_result() if tourney_ids: sr_existing_games = sr_games_future.get_result() existing_games.extend(sr_existing_games) self.UpdateGames(twts, existing_games, users, division, age_bracket, league)
def post(self): token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) IndexAccountData(self.request.get('account'), 100, fetcher) self.redirect('/accounts')