Exemplo n.º 1
0
def try_request_multiple_times(url, **kwargs):
    """
    Try multiple request to the Facebook Graph API.

    Not all requests to the API are successful. To allow some requests to fail
    (mainly: to time out), request the API up to four times.
    """
    headers = kwargs.pop('headers', None)
    if not headers:
        access_token = os.getenv('FB_ACCESS_TOKEN')
        if not access_token:
            raise EnvironmentError("FB Access token is not set")
        headers = {'Authorization': 'Bearer ' + access_token}

    for _ in range(3):
        try:
            response = requests.get(url, timeout=60, headers=headers, **kwargs)
            response.raise_for_status()
            return response
        except requests.RequestException as e:
            logger.error("An Error occurred requesting the Facebook API.\n"
                         "Trying to request the API again.\n"
                         f"Error message: {e}")
    response = requests.get(url, timeout=100, headers=headers, **kwargs)

    # cause clear error instead of trying
    # to process the invalid response
    # (except if we tried to access a foreign object)
    if not response.ok and not response.status_code == 400:
        response.raise_for_status()
    return response
Exemplo n.º 2
0
def get_categories():
    """
    Download event categories from the gomus API.

    Fall back to manual list if API is not available.
    """
    try:
        url = 'https://barberini.gomus.de/api/v4/events/categories'
        response = requests.get(url)
        response.raise_for_status()
        response_json = response.json()
        categories = [
            category.get('name')
            for category in response_json.get('categories')
        ]
    except requests.HTTPError as e:
        # Fetch Error and log instead of raising
        logger.error(f"Unable to fetch event categories!"
                     f"Using manual list as fallback. Error: {e}")
        categories = [
            "Event", "Gespräch", "Kinder-Workshop", "Konzert", "Lesung",
            "Öffentliche Führung", "Vortrag"
        ]
    categories.sort()
    return categories
Exemplo n.º 3
0
    def fetch_for_country(self, country_code):

        with self.input().open('r') as facts_file:
            facts = json.load(facts_file)
            app_id = facts['ids']['apple']['appId']
        url = (f'https://itunes.apple.com/{country_code}/rss/customerreviews/'
               f'page=1/id={app_id}/sortby=mostrecent/xml')
        data_list = []

        while url:
            try:
                data, url = self.fetch_page(url)
                data_list += data
            except requests.exceptions.HTTPError as error:
                if error.response is not None and (
                        error.response.status_code == 503 or
                    (error.response.status_code in {403, 404}
                     and country_code not in {'DE', 'US', 'GB'})):
                    logger.error(f"Encountered {error.response.status_code} "
                                 f"server error '{error}' for country code "
                                 f"'{country_code}'")
                    logger.error("Continuing anyway...")
                    break
                else:
                    raise

        if not data_list:
            # no reviews for the given country code
            logger.debug(f"Empty data for country {country_code}")

        result = pd.DataFrame(data_list)
        result['country_code'] = country_code
        result.insert(0, 'app_id', app_id)

        return result
Exemplo n.º 4
0
    def run(self) -> None:

        logger.info("loading credentials...")
        credentials = self.load_credentials()
        try:
            logger.info("creating service...")
            service = self.load_service(credentials)
            logger.info("fetching reviews...")
            raw_reviews = list(self.fetch_raw_reviews(service))
        except googleapiclient.errors.HttpError as error:
            if error.resp.status is not None:
                raise
            logger.error("Generic HTTPError raised by Google Maps. Aborting. "
                         "If you see this error message frequently, consider "
                         "to do something against it.")
            raw_reviews = []
        logger.info("extracting reviews...")
        reviews_df = self.extract_reviews(raw_reviews)
        logger.info("success! writing...")

        with self.output().open('w') as output_file:
            reviews_df.to_csv(output_file, index=False)