Exemplo n.º 1
0
def get_html(url: str) -> str:
    """
    Make the GET request, return the response text. Report any errors to Google Cloud Error reporting.

    :type url: str
    :param url: The URL to make the GET request to

    :rtype: str
    :returns: requests.Response.text
    """
    html_str = ''
    res = None

    headers = get_headers(url)

    try:
        res = requests.get(url=url,
                           headers=headers,
                           timeout=1,
                           allow_redirects=False)
        res.raise_for_status(
        )  # Raise an exception for error codes (4xx or 5xx)
        html_str = res.text
    except requests.exceptions.RequestException as err:
        status_code = res.status_code if res else None
        request_context = error_reporting.HTTPContext(
            method='GET',
            url=url,
            user_agent=headers['User-Agent'],
            referrer=headers['Referer'],
            response_status_code=status_code)
        err_client.report(message=str(err), http_context=request_context)

    return html_str
def get_html(url_to_get: str, args: argparse.Namespace, **kwargs) -> str:
    """
    Make the GET request, return the response text. Report any errors to Google Cloud Error reporting.

    :type url_to_get: str
    :param url_to_get: The URL to request

    :type args: argparse.Namespace
    :param args: command line arguments

    :rtype: str
    :return:
    """
    response = None
    response_str = ''

    headers = get_headers(args)

    try:
        response = requests.get(url=url_to_get,
                                headers=headers,
                                params=kwargs.get('params', None))
        response.raise_for_status()
        response_str = response.text
    except requests.exceptions.RequestException as err:
        status_code = response.status_code if response else None
        request_context = error_reporting.HTTPContext(
            method='GET',
            url=url_to_get,
            user_agent=headers['User-Agent'],
            referrer=headers['Referer'],
            response_status_code=status_code)
        err_client.report(message=str(err), http_context=request_context)

    return response_str
Exemplo n.º 3
0
    def report_error(self, resp, status=None):
        """Report an error to StackDriver Error reporting."""
        # don't report specific known failures
        if ('Deadline exceeded while waiting for HTTP response' in resp
                or 'urlfetch.Fetch() took too long' in resp or
                # WordPress Jetpack bugs
                # https://github.com/snarfed/bridgy/issues/161
                '"resp": "invalid_input"' in resp or
                # https://github.com/snarfed/bridgy/issues/750
                '"error": "jetpack_verification_failed"' in resp or
                # https://console.cloud.google.com/errors/CMjIg52NkMLQYA
                'The Jetpack site encountered an error and could not process the API request'
                in resp or
                # Blogger known bug
                # https://github.com/snarfed/bridgy/issues/175
                'bX-2i87au' in resp or
                # Tumblr: transient Disqus error looking up thread
                # https://github.com/snarfed/bridgy/issues/177
                "Invalid argument, 'thread': Unable to find thread" in resp or
                # expected for partially set up tumblr accounts
                "we haven't found your Disqus account" in resp or
                # Twitter 5MB image file size limit
                '"message":"Image file size must be' in resp or
                # Twitter media file number limits
                'Tweet with media must have exactly 1 gif or video' in resp or
                # Facebook image type/size req'ts
                'Missing or invalid image file' in resp or
                "Your photos couldn't be uploaded. Photos should be less than 4 MB"
                in resp or
                # Twitter duplicate publish attempts
                'Status is a duplicate.' in resp
                or 'You have already favorited this status.' in resp
                or 'You have already retweeted this' in resp or
                # Facebook duplicate publish attempts
                'This status update is identical to the last one you posted.'
                in resp or
                # WordPress duplicate comment
                # "error": "Error: 409 HTTP Error 409: Conflict; {\n    \"error\": \"comment_duplicate\",\n    \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n"
                'comment_duplicate' in resp):
            return

        subject = '%s %s' % (self.__class__.__name__, '%s %s' %
                             (self.entity.type, self.entity.status)
                             if self.entity else 'failed')
        user = self.source.bridgy_url() if self.source else None
        util.report_error(subject,
                          user=user,
                          http_context=error_reporting.HTTPContext(
                              method=request.method,
                              url=request.url,
                              response_status_code=status,
                              remote_ip=request.remote_addr))
Exemplo n.º 4
0
def insert_daily_fitness_data_thread(bucket_name, retry, username):
    error_reporting_client = error_reporting.Client()
    http_context = error_reporting.HTTPContext(
        method='GET',
        url='/v1/insert_daily_fitness',
        user_agent='cron job for user {}'.format(username))
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    http_auth, timezone = get_google_http_auth_n_user_timezone(username)
    # get today's local date - 1 day
    yesterday_local = datetime.now(pytz.timezone(timezone)) - timedelta(days=1)
    yesterday_local_str = yesterday_local.strftime(backend.DATE_FORMAT)
    df = backend.UserDataFlow(username, http_auth, yesterday_local.year,
                              yesterday_local.month, yesterday_local.day,
                              backend.current_milli_time(), timezone)
    retry[username] = {}
    categories = {'heartrate', 'activities', 'steps', 'calories'}
    for category in categories:
        retry[username][category] = {}
        # countdown is the number of retries
        retry[username][category]['countdown'] = 1
        gs_path_get = '{}/{}/{}.json'.format(username, yesterday_local_str,
                                             category)
        gs_path_insert = '{}/{}/{}_inserted_count.json'.format(
            username, yesterday_local_str, category)
        get_result = None
        insert_result = None

        # start of the retry logic
        while retry[username][category]['countdown'] >= 0:
            try:
                if category == 'heartrate':
                    # get and insert heart rate data
                    insert_result = df.get_and_post_heart_rate()
                    get_result = insert_result['heart_datasets']
                elif category == 'activities':
                    # get and insert activities data
                    get_result = df.get_activities()
                    insert_result = df.post_activities()
                elif category == 'steps':
                    # get and insert step counts
                    get_result = df.get_steps()
                    insert_result = df.post_steps()
                elif category == 'calories':
                    # get and insert calories
                    get_result = df.get_calories()
                    insert_result = df.post_calories()
                # set to None upon success of getting API data and inserting to BigQuery
                retry[username][category]['countdown'] = None
            except client.HttpAccessTokenRefreshError as err:
                http_context.responseStatusCode = httplib.UNAUTHORIZED
                user_token_err = '{} has invalid refresh token'.format(
                    username)
                error_reporting_client.report_exception(
                    http_context=http_context, user=user_token_err)
                retry[username][category]['error'] = "{}: {}".format(
                    user_token_err, err)
                # can't recover; abandon retry
                retry[username][category]['countdown'] = -2
            except googleapiclient.errors.HttpError as err:
                http_context.responseStatusCode = err.resp.status
                error_reporting_client.report_exception(
                    http_context=http_context,
                    user='******'.format(username))
                retry[username][category]['error'] = str(err)
                if err.resp.status in (httplib.BAD_REQUEST,
                                       httplib.UNAUTHORIZED, httplib.NOT_FOUND,
                                       httplib.FORBIDDEN):
                    # can't recover; abandon retry
                    retry[username][category]['countdown'] = -2
            except Exception as err:
                # https://googleapis.github.io/google-cloud-python/latest/error-reporting/usage.html
                error_reporting_client.report_exception(
                    http_context=http_context,
                    user='******'.format(
                        category, username))
                retry[username][category]['error'] = str(err)

            # if retry for user on category isn't None, recoverable failure happened, decrement the retry count
            if retry[username][category]['countdown'] is not None:
                retry[username][category]['countdown'] -= 1
            else:
                # exiting while loop because None >= 0 is False
                pass

        # per category, putting the get, insert results on Cloud Storage upon success
        if retry[username][category]['countdown'] is None:
            retry[username][category]['gs://'] = []
            blob_get_result = bucket.blob(gs_path_get)
            blob_get_result.upload_from_string(json.dumps(get_result))
            retry[username][category]['gs://'].append("{}/{}".format(
                bucket_name, gs_path_get))
            blob_insert_result = bucket.blob(gs_path_insert)
            blob_insert_result.upload_from_string(json.dumps(insert_result))
            retry[username][category]['gs://'].append("{}/{}".format(
                bucket_name, gs_path_insert))

        retry[username][category].pop('countdown')
Exemplo n.º 5
0
def report_error(error):
    # add SensorID to the error payload so the correct sensor can be identified.
    client = error_reporting.Client()
    http_context = error_reporting.HTTPContext(user_agent=sensorID)
    client.report_exception(http_context=http_context)
 def error_handler(request, exc):
     http_context = error_reporting.HTTPContext(
         method=request.method, url=request.url.path
     )
     handler(http_context=http_context)