def try_request_multiple_times(url, **kwargs): """ Try multiple request to the Facebook Graph API. Not all requests to the API are successful. To allow some requests to fail (mainly: to time out), request the API up to four times. """ headers = kwargs.pop('headers', None) if not headers: access_token = os.getenv('FB_ACCESS_TOKEN') if not access_token: raise EnvironmentError("FB Access token is not set") headers = {'Authorization': 'Bearer ' + access_token} for _ in range(3): try: response = requests.get(url, timeout=60, headers=headers, **kwargs) response.raise_for_status() return response except requests.RequestException as e: logger.error("An Error occurred requesting the Facebook API.\n" "Trying to request the API again.\n" f"Error message: {e}") response = requests.get(url, timeout=100, headers=headers, **kwargs) # cause clear error instead of trying # to process the invalid response # (except if we tried to access a foreign object) if not response.ok and not response.status_code == 400: response.raise_for_status() return response
def get_categories(): """ Download event categories from the gomus API. Fall back to manual list if API is not available. """ try: url = 'https://barberini.gomus.de/api/v4/events/categories' response = requests.get(url) response.raise_for_status() response_json = response.json() categories = [ category.get('name') for category in response_json.get('categories') ] except requests.HTTPError as e: # Fetch Error and log instead of raising logger.error(f"Unable to fetch event categories!" f"Using manual list as fallback. Error: {e}") categories = [ "Event", "Gespräch", "Kinder-Workshop", "Konzert", "Lesung", "Öffentliche Führung", "Vortrag" ] categories.sort() return categories
def fetch_for_country(self, country_code): with self.input().open('r') as facts_file: facts = json.load(facts_file) app_id = facts['ids']['apple']['appId'] url = (f'https://itunes.apple.com/{country_code}/rss/customerreviews/' f'page=1/id={app_id}/sortby=mostrecent/xml') data_list = [] while url: try: data, url = self.fetch_page(url) data_list += data except requests.exceptions.HTTPError as error: if error.response is not None and ( error.response.status_code == 503 or (error.response.status_code in {403, 404} and country_code not in {'DE', 'US', 'GB'})): logger.error(f"Encountered {error.response.status_code} " f"server error '{error}' for country code " f"'{country_code}'") logger.error("Continuing anyway...") break else: raise if not data_list: # no reviews for the given country code logger.debug(f"Empty data for country {country_code}") result = pd.DataFrame(data_list) result['country_code'] = country_code result.insert(0, 'app_id', app_id) return result
def run(self) -> None: logger.info("loading credentials...") credentials = self.load_credentials() try: logger.info("creating service...") service = self.load_service(credentials) logger.info("fetching reviews...") raw_reviews = list(self.fetch_raw_reviews(service)) except googleapiclient.errors.HttpError as error: if error.resp.status is not None: raise logger.error("Generic HTTPError raised by Google Maps. Aborting. " "If you see this error message frequently, consider " "to do something against it.") raw_reviews = [] logger.info("extracting reviews...") reviews_df = self.extract_reviews(raw_reviews) logger.info("success! writing...") with self.output().open('w') as output_file: reviews_df.to_csv(output_file, index=False)