Exemplo n.º 1
0
    def test_is_fennec(self):
        raw_crash = {
            'ProductName': 'Fennec'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_fennec', 100)
Exemplo n.º 2
0
    def test_is_thunderbird_seamonkey(self, product):
        raw_crash = {
            'ProductName': product
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_thunderbird_seamonkey', 100)
Exemplo n.º 3
0
    def test_is_thunderbird_seamonkey(self, product):
        raw_crash = {
            'ProductName': product
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_thunderbird_seamonkey', 100)
Exemplo n.º 4
0
    def test_bad_value(self):
        raw_crash = {
            'ProductName': ''
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (DEFER, 'NO_MATCH', 0)
Exemplo n.º 5
0
    def test_is_fennec(self):
        raw_crash = {
            'ProductName': 'Fennec'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_fennec', 100)
Exemplo n.º 6
0
    def test_comments(self):
        raw_crash = {
            'ProductName': 'Test',
            'Comments': 'foo bar baz'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'has_comments', 100)
Exemplo n.º 7
0
    def test_is_version_alpha_beta_special(self, version):
        raw_crash = {
            'ProductName': 'Test',
            'Version': version
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_version_alpha_beta_special', 100)
Exemplo n.º 8
0
    def test_comments(self):
        raw_crash = {
            'ProductName': 'Test',
            'Comments': 'foo bar baz'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'has_comments', 100)
Exemplo n.º 9
0
    def test_is_nightly(self, channel):
        raw_crash = {
            'ProductName': 'Test',
            'ReleaseChannel': channel
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_nightly', 100)
Exemplo n.º 10
0
    def test_is_version_alpha_beta_special(self, version):
        raw_crash = {
            'ProductName': 'Test',
            'Version': version
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_version_alpha_beta_special', 100)
Exemplo n.º 11
0
    def test_is_nightly(self, channel):
        raw_crash = {
            'ProductName': 'Test',
            'ReleaseChannel': channel
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (ACCEPT, 'is_nightly', 100)
Exemplo n.º 12
0
 def test_infobar(self):
     raw_crash = {
         'ProductName': 'Firefox',
         'SubmittedFromInfobar': 'true',
         'Version': '52.0.2',
         'BuildID': '20171223222554',
     }
     throttler = Throttler(ConfigManager.from_dict({}))
     assert throttler.throttle(raw_crash) == (REJECT, 'infobar_is_true', None)
Exemplo n.º 13
0
    def test_email(self, email, expected):
        raw_crash = {
            'ProductName': 'BarTest',
        }
        if email is not None:
            raw_crash['Email'] = email

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == expected
Exemplo n.º 14
0
    def test_hangid(self):
        raw_crash = {
            'ProductName': 'FireSquid',
            'Version': '99',
            'ProcessType': 'browser',
            'HangID': 'xyz'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (REJECT, 'has_hangid_and_browser', None)
Exemplo n.º 15
0
    def test_hangid(self):
        raw_crash = {
            'ProductName': 'FireSquid',
            'Version': '99',
            'ProcessType': 'browser',
            'HangID': 'xyz'
        }

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (REJECT, 'has_hangid_and_browser', None)
Exemplo n.º 16
0
 def test_productname_no_unsupported_products(self):
     """Verify productname rule doesn't do anything if using ALL_PRODUCTS"""
     throttler = Throttler(ConfigManager.from_dict({
         'PRODUCTS': 'antenna.throttler.ALL_PRODUCTS'
     }))
     raw_crash = {
         'ProductName': 'testproduct'
     }
     # This is an unsupported product, but it's not accepted for processing
     # by any of the rules, so it gets caught up by the last rule
     assert throttler.throttle(raw_crash) == (ACCEPT, 'accept_everything', 100)
Exemplo n.º 17
0
 def test_productname_reject(self, caplogpp, productname, expected):
     """Verify productname rule blocks unsupported products"""
     with caplogpp.at_level(logging.INFO, logger='antenna'):
         # Need a throttler with the default configuration which includes supported
         # products
         throttler = Throttler(ConfigManager.from_dict({}))
         raw_crash = {}
         if productname is not None:
             raw_crash['ProductName'] = productname
         assert throttler.throttle(raw_crash) == expected
         assert caplogpp.record_tuples == [
             ('antenna.throttler', logging.INFO, 'ProductName rejected: %r' % productname)
         ]
Exemplo n.º 18
0
 def test_productname_fakeaccept(self, caplogpp):
     # This product isn't in the list and it's B2G which is the special case
     with caplogpp.at_level(logging.INFO, logger='antenna'):
         # Need a throttler with the default configuration which includes supported
         # products
         throttler = Throttler(ConfigManager.from_dict({}))
         raw_crash = {
             'ProductName': 'b2g'
         }
         assert throttler.throttle(raw_crash) == (FAKEACCEPT, 'b2g', 100)
         assert caplogpp.record_tuples == [
             ('antenna.throttler', logging.INFO, 'ProductName B2G: fake accept')
         ]
Exemplo n.º 19
0
    def test_percentage(self, randommock):
        throttler = Throttler(ConfigManager.from_dict({}))

        # Overrwrite the rule set for something we need
        throttler.rule_set = [
            Rule('test', 'ProductName', 'test', 50)
        ]

        with randommock(0.45):
            # Below the percentage line, so ACCEPT!
            assert throttler.throttle({'ProductName': 'test'}) == (ACCEPT, 'test', 50)

        with randommock(0.55):
            # Above the percentage line, so DEFER!
            assert throttler.throttle({'ProductName': 'test'}) == (DEFER, 'test', 50)
Exemplo n.º 20
0
    def test_percentage(self, randommock):
        throttler = Throttler(ConfigManager.from_dict({}))

        # Overrwrite the rule set for something we need
        throttler.rule_set = [
            Rule('test', 'ProductName', 'test', 50)
        ]

        with randommock(0.45):
            # Below the percentage line, so ACCEPT!
            assert throttler.throttle({'ProductName': 'test'}) == (ACCEPT, 'test', 50)

        with randommock(0.55):
            # Above the percentage line, so DEFER!
            assert throttler.throttle({'ProductName': 'test'}) == (DEFER, 'test', 50)
Exemplo n.º 21
0
    def test_is_firefox(self, randommock):
        with randommock(0.09):
            raw_crash = {
                'ProductName': 'Firefox',
            }

            throttler = Throttler(ConfigManager.from_dict({}))
            assert throttler.throttle(raw_crash) == (ACCEPT, 'is_firefox_desktop', 10)

        with randommock(0.9):
            raw_crash = {
                'ProductName': 'Firefox',
            }

            throttler = Throttler(ConfigManager.from_dict({}))
            assert throttler.throttle(raw_crash) == (DEFER, 'is_firefox_desktop', 10)
Exemplo n.º 22
0
    def __init__(self, config):
        self.config = config.with_options(self)
        self.crashstorage = self.config('crashstorage_class')(
            config.with_namespace('crashstorage'))
        self.throttler = Throttler(config)

        # Gevent pool for crashmover workers
        self.crashmover_pool = Pool(size=self.config('concurrent_crashmovers'))

        # Queue for crashmover of crashes to save
        self.crashmover_save_queue = deque()

        # Register hb functions with heartbeat manager
        register_for_heartbeat(self.hb_report_health_stats)
        register_for_heartbeat(self.hb_run_crashmover)

        # Register life function with heartbeat manager
        register_for_life(self.has_work_to_do)
Exemplo n.º 23
0
    def __init__(self, config):
        self.config = config.with_options(self)
        self.crashstorage = self.config('crashstorage_class')(config.with_namespace('crashstorage'))
        self.crashpublish = self.config('crashpublish_class')(config.with_namespace('crashpublish'))
        self.throttler = Throttler(config)

        # Gevent pool for crashmover workers
        self.crashmover_pool = Pool(size=self.config('concurrent_crashmovers'))

        # Queue for crashmover work
        self.crashmover_queue = deque()

        # Register hb functions with heartbeat manager
        register_for_heartbeat(self.hb_report_health_stats)
        register_for_heartbeat(self.hb_run_crashmover)

        # Register life function with heartbeat manager
        register_for_life(self.has_work_to_do)
Exemplo n.º 24
0
    def test_is_firefox(self, randommock):
        with randommock(0.09):
            raw_crash = {
                'ProductName': 'Firefox',
            }

            throttler = Throttler(ConfigManager.from_dict({}))
            assert throttler.throttle(raw_crash) == (ACCEPT, 'is_firefox_desktop', 10)

        with randommock(0.9):
            raw_crash = {
                'ProductName': 'Firefox',
            }

            throttler = Throttler(ConfigManager.from_dict({}))
            assert throttler.throttle(raw_crash) == (DEFER, 'is_firefox_desktop', 10)
Exemplo n.º 25
0
class BreakpadSubmitterResource(RequiredConfigMixin):
    """Handles incoming breakpad-style crash reports.

    This handles incoming HTTP POST requests containing breakpad-style crash reports in
    multipart/form-data format.

    It can handle compressed or uncompressed POST payloads.

    It parses the payload from the HTTP POST request, runs it through the throttler with
    the specified rules, generates a crash_id, returns the crash_id to the HTTP client,
    saves the crash using the configured crashstorage class, and publishes it using
    the configured crashpublish class.

    .. Note::

       From when a crash comes in to when it's saved by the crashstorage class, the
       crash is entirely in memory. Keep that in mind when figuring out how to scale
       your Antenna nodes.


    The most important configuration bit here is choosing the crashstorage class.

    For example::

        CRASHSTORAGE_CLASS=antenna.ext.s3.crashstorage.S3CrashStorage

    """

    required_config = ConfigOptions()
    required_config.add_option(
        "dump_field",
        default="upload_file_minidump",
        doc="The name of the field in the POST data for dumps.",
    )
    required_config.add_option(
        "dump_id_prefix", default="bp-", doc="The crash type prefix."
    )
    required_config.add_option(
        "concurrent_crashmovers",
        default="2",
        parser=positive_int,
        doc=(
            "The number of crashes concurrently being saved and published. "
            "Each process gets this many concurrent crashmovers, so if you're "
            "running 5 processes on the node, then it's "
            "(5 * concurrent_crashmovers) sharing upload bandwidth."
        ),
    )

    # crashstorage things
    required_config.add_option(
        "crashstorage_class",
        default="antenna.ext.crashstorage_base.NoOpCrashStorage",
        parser=parse_class,
        doc="The class in charge of storing crashes.",
    )

    # crashpublish things
    required_config.add_option(
        "crashpublish_class",
        default="antenna.ext.crashpublish_base.NoOpCrashPublish",
        parser=parse_class,
        doc="The class in charge of publishing crashes.",
    )

    def __init__(self, config):
        self.config = config.with_options(self)
        self.crashstorage = self.config("crashstorage_class")(
            config.with_namespace("crashstorage")
        )
        self.crashpublish = self.config("crashpublish_class")(
            config.with_namespace("crashpublish")
        )
        self.throttler = Throttler(config)

        # Gevent pool for crashmover workers
        self.crashmover_pool = Pool(size=self.config("concurrent_crashmovers"))

        # Queue for crashmover work
        self.crashmover_queue = deque()

        # Register hb functions with heartbeat manager
        register_for_heartbeat(self.hb_report_health_stats)
        register_for_heartbeat(self.hb_run_crashmover)

        # Register life function with heartbeat manager
        register_for_life(self.has_work_to_do)

    def get_runtime_config(self, namespace=None):
        """Return generator of runtime configuration."""
        for item in super().get_runtime_config():
            yield item

        for item in self.throttler.get_runtime_config():
            yield item

        for item in self.crashstorage.get_runtime_config(["crashstorage"]):
            yield item

        for item in self.crashpublish.get_runtime_config(["crashpublish"]):
            yield item

    def check_health(self, state):
        """Return health state."""
        if hasattr(self.crashstorage, "check_health"):
            self.crashstorage.check_health(state)
        if hasattr(self.crashpublish, "check_health"):
            self.crashpublish.check_health(state)

    def hb_report_health_stats(self):
        """Heartbeat function to report health stats."""
        # The number of crash reports sitting in the work queue; this is a
        # direct measure of the health of this process--a number that's going
        # up means impending doom
        mymetrics.gauge("work_queue_size", value=len(self.crashmover_queue))

    def has_work_to_do(self):
        """Return whether this still has work to do."""
        work_to_do = len(self.crashmover_pool) + len(self.crashmover_queue)
        logger.info("work left to do: %s" % work_to_do)
        # Indicates whether or not we're sitting on crashes to save--this helps
        # keep Antenna alive until we're done saving crashes
        return bool(work_to_do)

    def extract_payload(self, req):
        """Parse HTTP POST payload.

        Decompresses the payload if necessary and then walks through the
        FieldStorage converting from multipart/form-data to Python datatypes.

        NOTE(willkg): The FieldStorage is poorly documented (in my opinion). It
        has a list attribute that is a list of FieldStorage items--one for each
        key/val in the form. For attached files, the FieldStorage will have a
        name, value and filename and the type should be
        ``application/octet-stream``. Thus we parse it looking for things of type
        ``text/plain``, ``application/json``, and application/octet-stream.

        :arg falcon.request.Request req: a Falcon Request instance

        :returns: (raw_crash dict, dumps dict)

        :raises MalformedCrashReport:

        """
        # If we don't have a content type, raise MalformedCrashReport
        if not req.content_type:
            raise MalformedCrashReport("no_content_type")

        # If it's the wrong content type or there's no boundary section, raise
        # MalformedCrashReport
        content_type = [part.strip() for part in req.content_type.split(";", 1)]
        if (
            len(content_type) != 2
            or content_type[0] != "multipart/form-data"
            or not content_type[1].startswith("boundary=")
        ):
            if content_type[0] != "multipart/form-data":
                raise MalformedCrashReport("wrong_content_type")
            else:
                raise MalformedCrashReport("no_boundary")

        content_length = req.content_length or 0

        # If there's no content, raise MalformedCrashReport
        if content_length == 0:
            raise MalformedCrashReport("no_content_length")

        # Decompress payload if it's compressed
        if req.env.get("HTTP_CONTENT_ENCODING") == "gzip":
            mymetrics.incr("gzipped_crash")

            # If the content is gzipped, we pull it out and decompress it. We
            # have to do that here because nginx doesn't have a good way to do
            # that in nginx-land.
            gzip_header = 16 + zlib.MAX_WBITS
            try:
                data = zlib.decompress(req.stream.read(content_length), gzip_header)
            except zlib.error:
                # This indicates this isn't a valid compressed stream. Given
                # that the HTTP request insists it is, we're just going to
                # assume it's junk and not try to process any further.
                raise MalformedCrashReport("bad_gzip")

            # Stomp on the content length to correct it because we've changed
            # the payload size by decompressing it. We save the original value
            # in case we need to debug something later on.
            req.env["ORIG_CONTENT_LENGTH"] = content_length
            content_length = len(data)
            req.env["CONTENT_LENGTH"] = str(content_length)

            data = io.BytesIO(data)
            mymetrics.histogram(
                "crash_size", value=content_length, tags=["payload:compressed"]
            )
        else:
            # NOTE(willkg): At this point, req.stream is either a
            # falcon.request_helper.BoundedStream (in tests) or a
            # gunicorn.http.body.Body (in production).
            #
            # FieldStorage doesn't work with BoundedStream so we pluck out the
            # internal stream from that which works fine.
            #
            # FIXME(willkg): why don't tests work with BoundedStream?
            if isinstance(req.stream, BoundedStream):
                data = req.stream.stream
            else:
                data = req.stream

            mymetrics.histogram(
                "crash_size", value=content_length, tags=["payload:uncompressed"]
            )

        # Stomp on querystring so we don't pull it in
        request_env = dict(req.env)
        request_env["QUERY_STRING"] = ""

        fs = cgi.FieldStorage(fp=data, environ=request_env, keep_blank_values=1)

        raw_crash = {}
        dumps = {}

        has_json = False
        has_kvpairs = False

        for fs_item in fs.list:
            # If the field has no name, then it's probably junk, so let's drop it.
            if not fs_item.name:
                continue

            if fs_item.name == "dump_checksums":
                # We don't want to pick up the dump_checksums from a raw
                # crash that was re-submitted.
                continue

            elif fs_item.type and fs_item.type.startswith("application/json"):
                # This is a JSON blob, so load it and override raw_crash with
                # it.
                has_json = True
                try:
                    raw_crash = json.loads(fs_item.value)
                except json.decoder.JSONDecodeError:
                    raise MalformedCrashReport("bad_json")

            elif fs_item.type and (
                fs_item.type.startswith("application/octet-stream")
                or isinstance(fs_item.value, bytes)
            ):
                # This is a dump, so add it to dumps using a sanitized dump
                # name.
                dump_name = sanitize_dump_name(fs_item.name)
                dumps[dump_name] = fs_item.value

            else:
                # This isn't a dump, so it's a key/val pair, so we add that.
                has_kvpairs = True
                raw_crash[fs_item.name] = fs_item.value

        if not raw_crash:
            raise MalformedCrashReport("no_annotations")

        if has_json and has_kvpairs:
            # If the crash payload has both kvpairs and a JSON blob, then it's
            # malformed and we should dump it.
            raise MalformedCrashReport("has_json_and_kv")

        # Add a note about how the annotations were encoded in the crash report.
        # For now, there are two options: json and multipart.
        if has_json:
            raw_crash["payload"] = "json"
        else:
            raw_crash["payload"] = "multipart"

        return raw_crash, dumps

    def get_throttle_result(self, raw_crash):
        """Run raw_crash through throttler for a throttling result.

        :arg dict raw_crash: the raw crash to throttle

        :returns tuple: ``(result, rule_name, percentage)``

        """
        # At this stage, nothing has given us a throttle answer, so we
        # throttle the crash.
        result, rule_name, throttle_rate = self.throttler.throttle(raw_crash)

        # Save the results in the raw_crash itself
        raw_crash["legacy_processing"] = result
        raw_crash["throttle_rate"] = throttle_rate

        return result, rule_name, throttle_rate

    def cleanup_crash_report(self, raw_crash):
        """Remove anything from the crash report that shouldn't be there.

        This operates on the raw_crash in-place. This adds notes to ``collector_notes``.

        """
        collector_notes = []

        # Remove bad fields
        for bad_field in BAD_FIELDS:
            if bad_field in raw_crash:
                del raw_crash[bad_field]
                collector_notes.append("Removed %s from raw crash." % bad_field)

        raw_crash["collector_notes"] = collector_notes

    @mymetrics.timer_decorator("on_post.time")
    def on_post(self, req, resp):
        """Handle incoming HTTP POSTs.

        Note: This is executed by the WSGI app, so it and anything it does is
        covered by the Sentry middleware.

        """
        resp.status = falcon.HTTP_200

        start_time = time.time()
        # NOTE(willkg): This has to return text/plain since that's what the
        # breakpad clients expect.
        resp.content_type = "text/plain"

        try:
            raw_crash, dumps = self.extract_payload(req)

        except MalformedCrashReport as exc:
            # If this is malformed, then reject it with malformed error code.
            msg = str(exc)
            mymetrics.incr("malformed", tags=["reason:%s" % msg])
            resp.status = falcon.HTTP_400
            resp.body = "Discarded=malformed_%s" % msg
            return

        mymetrics.incr("incoming_crash")

        # Add timestamps
        current_timestamp = utc_now()
        raw_crash["submitted_timestamp"] = current_timestamp.isoformat()
        raw_crash["timestamp"] = start_time

        # Add checksums and MinidumpSha256Hash
        raw_crash["dump_checksums"] = {
            dump_name: hashlib.sha256(dump).hexdigest()
            for dump_name, dump in dumps.items()
        }
        raw_crash["MinidumpSha256Hash"] = raw_crash["dump_checksums"].get(
            "upload_file_minidump", ""
        )

        # First throttle the crash which gives us the information we need
        # to generate a crash id.
        throttle_result, rule_name, percentage = self.get_throttle_result(raw_crash)

        # Use a uuid if they gave us one and it's valid--otherwise create a new
        # one.
        if "uuid" in raw_crash and validate_crash_id(raw_crash["uuid"]):
            crash_id = raw_crash["uuid"]
            logger.info("%s has existing crash_id", crash_id)

        else:
            crash_id = create_crash_id(
                timestamp=current_timestamp, throttle_result=throttle_result
            )
            raw_crash["uuid"] = crash_id

        raw_crash["type_tag"] = self.config("dump_id_prefix").strip("-")

        # Log the throttle result
        logger.info(
            "%s: matched by %s; returned %s",
            crash_id,
            rule_name,
            RESULT_TO_TEXT[throttle_result],
        )
        mymetrics.incr("throttle_rule", tags=["rule:%s" % rule_name])
        mymetrics.incr(
            "throttle", tags=["result:%s" % RESULT_TO_TEXT[throttle_result].lower()]
        )

        # If the result is REJECT, then discard it
        if throttle_result is REJECT:
            resp.body = "Discarded=rule_%s" % rule_name
            return

        # If the result is a FAKEACCEPT, then we return a crash id, but throw the crash
        # away
        if throttle_result is FAKEACCEPT:
            resp.body = "CrashID=%s%s\n" % (self.config("dump_id_prefix"), crash_id)
            return

        # If we're accepting the cash report, then clean it up, save it and return the
        # CrashID to the client
        self.cleanup_crash_report(raw_crash)
        crash_report = CrashReport(raw_crash, dumps, crash_id)
        crash_report.set_state(STATE_SAVE)
        self.crashmover_queue.append(crash_report)
        self.hb_run_crashmover()
        resp.body = "CrashID=%s%s\n" % (self.config("dump_id_prefix"), crash_id)

    def hb_run_crashmover(self):
        """Spawn a crashmover if there's work to do."""
        # Spawn a new crashmover if there's stuff in the queue and we haven't
        # hit the limit of how many we can run
        if self.crashmover_queue and self.crashmover_pool.free_count() > 0:
            self.crashmover_pool.spawn(self.crashmover_process_queue)

    def crashmover_process_queue(self):
        """Process crashmover work.

        NOTE(willkg): This has to be super careful not to lose crash reports.
        If there's any kind of problem, this must return the crash report to
        the relevant queue.

        """
        while self.crashmover_queue:
            crash_report = self.crashmover_queue.popleft()

            try:
                if crash_report.state == STATE_SAVE:
                    # Save crash and then toss crash_id in the publish queue
                    self.crashmover_save(crash_report)
                    crash_report.set_state(STATE_PUBLISH)
                    self.crashmover_queue.append(crash_report)

                elif crash_report.state == STATE_PUBLISH:
                    # Publish crash and we're done
                    self.crashmover_publish(crash_report)
                    self.crashmover_finish(crash_report)

            except Exception:
                mymetrics.incr("%s_crash_exception.count" % crash_report.state)
                crash_report.errors += 1
                logger.exception(
                    "Exception when processing queue (%s), state: %s; error %d/%d",
                    crash_report.crash_id,
                    crash_report.state,
                    crash_report.errors,
                    MAX_ATTEMPTS,
                )

                # After MAX_ATTEMPTS, we give up on this crash and move on
                if crash_report.errors < MAX_ATTEMPTS:
                    self.crashmover_queue.append(crash_report)
                else:
                    logger.error(
                        "%s: too many errors trying to %s; dropped",
                        crash_report.crash_id,
                        crash_report.state,
                    )
                    mymetrics.incr("%s_crash_dropped.count" % crash_report.state)

    def crashmover_finish(self, crash_report):
        """Finish bookkeeping on crash report."""
        # Capture the total time it took for this crash to be handled from
        # being received from breakpad client to saving to s3.
        #
        # NOTE(willkg): time.time returns seconds, but .timing() wants
        # milliseconds, so we multiply!
        delta = (time.time() - crash_report.raw_crash["timestamp"]) * 1000

        mymetrics.timing("crash_handling.time", value=delta)
        mymetrics.incr("save_crash.count")

    @mymetrics.timer("crash_save.time")
    def crashmover_save(self, crash_report):
        """Save crash report to storage."""
        self.crashstorage.save_crash(crash_report)
        logger.info("%s saved", crash_report.crash_id)

    @mymetrics.timer("crash_publish.time")
    def crashmover_publish(self, crash_report):
        """Publish crash_id in publish queue."""
        self.crashpublish.publish_crash(crash_report)
        logger.info("%s published", crash_report.crash_id)

    def join_pool(self):
        """Join the pool.

        NOTE(willkg): Only use this in tests!

        This is helpful for forcing all the coroutines in the pool to complete
        so that we can verify outcomes in the test suite for work that might
        cross coroutines.

        """
        self.crashmover_pool.join()
Exemplo n.º 26
0
class BreakpadSubmitterResource(RequiredConfigMixin):
    """Handles incoming breakpad crash reports and saves to crashstorage

    This handles incoming HTTP POST requests containing breakpad-style crash
    reports in multipart/form-data format.

    It can handle compressed or uncompressed POST payloads.

    It parses the payload from the HTTP POST request, runs it through the
    throttler with the specified rules, generates a crash_id, returns the
    crash_id to the HTTP client and then saves the crash using the configured
    crashstorage class.

    .. Note::

       From when a crash comes in to when it's saved by the crashstorage class,
       the crash is entirely in memory. Keep that in mind when figuring out
       how to scale your Antenna nodes.


    The most important configuration bit here is choosing the crashstorage
    class.

    For example::

        CRASHSTORAGE_CLASS=antenna.ext.s3.crashstorage.S3CrashStorage

    """
    required_config = ConfigOptions()
    required_config.add_option(
        'dump_field',
        default='upload_file_minidump',
        doc='the name of the field in the POST data for dumps')
    required_config.add_option('dump_id_prefix',
                               default='bp-',
                               doc='the crash type prefix')
    required_config.add_option(
        'crashstorage_class',
        default='antenna.ext.crashstorage_base.NoOpCrashStorage',
        parser=parse_class,
        doc='the class in charge of storing crashes')

    # Maximum number of concurrent crashmover workers; each process gets this
    # many concurrent crashmovers, so if you're running 5 processes on the node
    # then it's (5 * concurrent_crashmovers) fighting for upload bandwidth
    required_config.add_option(
        'concurrent_crashmovers',
        default='2',
        parser=int,
        doc='the number of crashes concurrently being saved to s3')

    def __init__(self, config):
        self.config = config.with_options(self)
        self.crashstorage = self.config('crashstorage_class')(
            config.with_namespace('crashstorage'))
        self.throttler = Throttler(config)

        # Gevent pool for crashmover workers
        self.crashmover_pool = Pool(size=self.config('concurrent_crashmovers'))

        # Queue for crashmover of crashes to save
        self.crashmover_save_queue = deque()

        # Register hb functions with heartbeat manager
        register_for_heartbeat(self.hb_report_health_stats)
        register_for_heartbeat(self.hb_run_crashmover)

        # Register life function with heartbeat manager
        register_for_life(self.has_work_to_do)

    def get_runtime_config(self, namespace=None):
        for item in super().get_runtime_config():
            yield item

        for item in self.throttler.get_runtime_config():
            yield item

        for item in self.crashstorage.get_runtime_config(['crashstorage']):
            yield item

    def check_health(self, state):
        if hasattr(self.crashstorage, 'check_health'):
            self.crashstorage.check_health(state)

    def hb_report_health_stats(self):
        # The number of crash reports sitting in the queue; this is a direct
        # measure of the health of this process--a number that's going up means
        # impending doom
        mymetrics.gauge('save_queue_size',
                        value=len(self.crashmover_save_queue))

    def has_work_to_do(self):
        work_to_do = len(self.crashmover_save_queue) + len(
            self.crashmover_pool)
        logger.info('work left to do: %s' % work_to_do)
        # Indicates whether or not we're sitting on crashes to save--this helps
        # keep Antenna alive until we're done saving crashes
        return bool(work_to_do)

    def extract_payload(self, req):
        """Parses the HTTP POST payload

        Decompresses the payload if necessary and then walks through the
        FieldStorage converting from multipart/form-data to Python datatypes.

        NOTE(willkg): The FieldStorage is poorly documented (in my opinion). It
        has a list attribute that is a list of FieldStorage items--one for each
        key/val in the form. For attached files, the FieldStorage will have a
        name, value and filename and the type should be
        application/octet-stream. Thus we parse it looking for things of type
        text/plain and application/octet-stream.

        :arg falcon.request.Request req: a Falcon Request instance

        :returns: (raw_crash dict, dumps dict)

        """
        # If we don't have a content type, return an empty crash
        if not req.content_type:
            return {}, {}

        # If it's the wrong content type or there's no boundary section, return
        # an empty crash
        content_type = [
            part.strip() for part in req.content_type.split(';', 1)
        ]
        if ((len(content_type) != 2 or content_type[0] != 'multipart/form-data'
             or not content_type[1].startswith('boundary='))):
            return {}, {}

        content_length = req.content_length or 0

        # If there's no content, return an empty crash
        if content_length == 0:
            return {}, {}

        # Decompress payload if it's compressed
        if req.env.get('HTTP_CONTENT_ENCODING') == 'gzip':
            mymetrics.incr('gzipped_crash')

            # If the content is gzipped, we pull it out and decompress it. We
            # have to do that here because nginx doesn't have a good way to do
            # that in nginx-land.
            gzip_header = 16 + zlib.MAX_WBITS
            try:
                data = zlib.decompress(req.stream.read(content_length),
                                       gzip_header)
            except zlib.error:
                # This indicates this isn't a valid compressed stream. Given
                # that the HTTP request insists it is, we're just going to
                # assume it's junk and not try to process any further.
                mymetrics.incr('bad_gzipped_crash')
                return {}, {}

            # Stomp on the content length to correct it because we've changed
            # the payload size by decompressing it. We save the original value
            # in case we need to debug something later on.
            req.env['ORIG_CONTENT_LENGTH'] = content_length
            content_length = len(data)
            req.env['CONTENT_LENGTH'] = str(content_length)

            data = io.BytesIO(data)
            mymetrics.histogram('crash_size',
                                value=content_length,
                                tags=['payload:compressed'])
        else:
            # NOTE(willkg): At this point, req.stream is either a
            # falcon.request_helper.BoundedStream (in tests) or a
            # gunicorn.http.body.Body (in production).
            #
            # FieldStorage doesn't work with BoundedStream so we pluck out the
            # internal stream from that which works fine.
            #
            # FIXME(willkg): why don't tests work with BoundedStream?
            if isinstance(req.stream, BoundedStream):
                data = req.stream.stream
            else:
                data = req.stream

            mymetrics.histogram('crash_size',
                                value=content_length,
                                tags=['payload:uncompressed'])

        fs = cgi.FieldStorage(fp=data, environ=req.env, keep_blank_values=1)

        # NOTE(willkg): In the original collector, this returned request
        # querystring data as well as request body data, but we're not doing
        # that because the query string just duplicates data in the payload.

        raw_crash = {}
        dumps = {}

        for fs_item in fs.list:
            # NOTE(willkg): We saw some crashes come in where the raw crash ends up with
            # a None as a key. Make sure we can't end up with non-strings as keys.
            item_name = de_null(fs_item.name or '')

            if item_name == 'dump_checksums':
                # We don't want to pick up the dump_checksums from a raw
                # crash that was re-submitted.
                continue

            elif fs_item.type and (
                    fs_item.type.startswith('application/octet-stream')
                    or isinstance(fs_item.value, bytes)):
                # This is a dump, so add it to dumps using a sanitized dump
                # name.
                dump_name = sanitize_dump_name(item_name)
                dumps[dump_name] = fs_item.value

            else:
                # This isn't a dump, so it's a key/val pair, so we add that.
                raw_crash[item_name] = de_null(fs_item.value)

        return raw_crash, dumps

    def get_throttle_result(self, raw_crash):
        """Given a raw_crash, figures out the throttling

        If the raw_crash contains throttling information already, it returns
        that. If it doesn't, then this will apply throttling and return the
        results of that.

        A rule name of ``ALREADY_THROTTLED`` indicates that the raw_crash was
        previously throttled and we're re-using that data.

        A rule name of ``THROTTLEABLE_0`` indicates that the raw_crash was
        marked to not be throttled.

        :arg dict raw_crash: the raw crash to throttle

        :returns tuple: ``(result, rule_name, percentage)``

        """
        # If the raw_crash has a uuid, then that implies throttling, so return
        # that.
        if 'uuid' in raw_crash:
            crash_id = raw_crash['uuid']
            if crash_id[-7] in (str(ACCEPT), str(DEFER)):
                result = int(crash_id[-7])
                throttle_rate = 100

                # Save the results in the raw_crash itself
                raw_crash['legacy_processing'] = result
                raw_crash['throttle_rate'] = throttle_rate

                return result, 'FROM_CRASHID', throttle_rate

        # If we have throttle results for this crash, return those.
        if 'legacy_processing' in raw_crash and 'throttle_rate' in raw_crash:
            try:
                result = int(raw_crash['legacy_processing'])
                if result not in (ACCEPT, DEFER):
                    raise ValueError('Result is not a valid value: %r', result)

                throttle_rate = int(raw_crash['throttle_rate'])
                if not (0 <= throttle_rate <= 100):
                    raise ValueError('Throttle rate is not a valid value: %r',
                                     result)
                return result, 'ALREADY_THROTTLED', throttle_rate

            except ValueError:
                # If we've gotten a ValueError, it means one or both of the
                # values is bad and we should ignore it and move forward.
                mymetrics.incr('throttle.bad_throttle_values')

        # If we have a Throttleable=0, then return that.
        if raw_crash.get('Throttleable', None) == '0':
            # If the raw crash has ``Throttleable=0``, then we accept the
            # crash.
            mymetrics.incr('throttleable_0')
            result = ACCEPT
            rule_name = 'THROTTLEABLE_0'
            throttle_rate = 100

        else:
            # At this stage, nothing has given us a throttle answer, so we
            # throttle the crash.
            result, rule_name, throttle_rate = self.throttler.throttle(
                raw_crash)

        # Save the results in the raw_crash itself
        raw_crash['legacy_processing'] = result
        raw_crash['throttle_rate'] = throttle_rate

        return result, rule_name, throttle_rate

    @mymetrics.timer_decorator('on_post.time')
    def on_post(self, req, resp):
        """Handles incoming HTTP POSTs

        Note: This is executed by the WSGI app, so it and anything it does is
        covered by the Sentry middleware.

        """
        resp.status = falcon.HTTP_200

        start_time = time.time()
        # NOTE(willkg): This has to return text/plain since that's what the
        # breakpad clients expect.
        resp.content_type = 'text/plain'

        raw_crash, dumps = self.extract_payload(req)

        # If we didn't get any crash data, then just drop it and move on--don't
        # count this as an incoming crash and don't do any more work on it
        if not raw_crash:
            resp.body = 'Discarded=1'
            return

        mymetrics.incr('incoming_crash')

        # Add timestamps
        current_timestamp = utc_now()
        raw_crash['submitted_timestamp'] = current_timestamp.isoformat()
        raw_crash['timestamp'] = start_time

        # Add checksums and MinidumpSha256Hash
        raw_crash['dump_checksums'] = {
            dump_name: hashlib.sha256(dump).hexdigest()
            for dump_name, dump in dumps.items()
        }
        raw_crash['MinidumpSha256Hash'] = raw_crash['dump_checksums'].get(
            'upload_file_minidump', '')

        # First throttle the crash which gives us the information we need
        # to generate a crash id.
        throttle_result, rule_name, percentage = self.get_throttle_result(
            raw_crash)

        # Use a uuid if they gave us one and it's valid--otherwise create a new
        # one.
        if 'uuid' in raw_crash and validate_crash_id(raw_crash['uuid']):
            crash_id = raw_crash['uuid']
            logger.info('%s has existing crash_id', crash_id)

        else:
            crash_id = create_crash_id(timestamp=current_timestamp,
                                       throttle_result=throttle_result)
            raw_crash['uuid'] = crash_id

        raw_crash['type_tag'] = self.config('dump_id_prefix').strip('-')

        # Log the throttle result
        logger.info('%s: matched by %s; returned %s', crash_id, rule_name,
                    RESULT_TO_TEXT[throttle_result])
        mymetrics.incr('throttle_rule', tags=['rule:%s' % rule_name])
        mymetrics.incr(
            'throttle',
            tags=['result:%s' % RESULT_TO_TEXT[throttle_result].lower()])

        if throttle_result is REJECT:
            # If the result is REJECT, then discard it
            resp.body = 'Discarded=1'

        else:
            # If the result is not REJECT, then save it and return the CrashID to
            # the client
            self.crashmover_save_queue.append(
                CrashReport(raw_crash, dumps, crash_id))
            self.hb_run_crashmover()
            resp.body = 'CrashID=%s%s\n' % (self.config('dump_id_prefix'),
                                            crash_id)

    def hb_run_crashmover(self):
        """Checks to see if it should spawn a crashmover and does if appropriate"""
        # Spawn a new crashmover if there's stuff in the queue and there isn't
        # one currently running
        if self.crashmover_save_queue and self.crashmover_pool.free_count(
        ) > 0:
            self.crashmover_pool.spawn(self.crashmover_process_queue)

    def crashmover_process_queue(self):
        """Processes the queue of crashes to save until it's empty

        Note: This has to be super careful not to lose crash reports. If
        there's any kind of problem, this must return the crash to the queue.

        """
        # Process crashes until the queue is empty
        while self.crashmover_save_queue:
            crash_report = self.crashmover_save_queue.popleft()

            try:
                self.crashmover_save(crash_report)

            except Exception:
                mymetrics.incr('save_crash_exception.count')
                crash_report.errors += 1
                logger.exception(
                    'Exception when processing save queue (%s); error %d/%d',
                    crash_report.crash_id, crash_report.errors, MAX_ATTEMPTS)

                # After MAX_ATTEMPTS, we give up on this crash and move on
                if crash_report.errors < MAX_ATTEMPTS:
                    self.crashmover_save_queue.append(crash_report)
                else:
                    logger.error('%s: too many errors trying to save; dropped',
                                 crash_report.crash_id)
                    mymetrics.incr('save_crash_dropped.count')

    def crashmover_save(self, crash_report):
        """Saves a crash to storage

        If this raises an error, then that bubbles up and the caller can figure
        out what to do with it and retry again later.

        """
        crash_id = crash_report.crash_id
        dumps = crash_report.dumps
        raw_crash = crash_report.raw_crash

        # Capture total time it takes to save the crash
        with mymetrics.timer('crash_save.time'):
            # Save dumps to crashstorage
            self.crashstorage.save_dumps(crash_id, dumps)

            # Save the raw crash metadata to crashstorage
            self.crashstorage.save_raw_crash(crash_id, raw_crash)

        # Capture the total time it took for this crash to be handled from
        # being received from breakpad client to saving to s3.
        #
        # NOTE(willkg): time.time returns seconds, but .timing() wants
        # milliseconds, so we multiply!
        delta = (time.time() - raw_crash['timestamp']) * 1000
        mymetrics.timing('crash_handling.time', value=delta)

        mymetrics.incr('save_crash.count')
        logger.info('%s saved', crash_id)

    def join_pool(self):
        """Joins the pool--use only in tests!

        This is helpful for forcing all the coroutines in the pool to complete
        so that we can verify outcomes in the test suite for work that might
        cross coroutines.

        """
        self.crashmover_pool.join()
Exemplo n.º 27
0
def throttler():
    return Throttler(ConfigManager.from_dict({
        'PRODUCTS': 'antenna.throttler.ALL_PRODUCTS'
    }))
Exemplo n.º 28
0
    def test_ruleset(self):
        throttler = Throttler(ConfigManager.from_dict({
            'THROTTLE_RULES': 'antenna.throttler.accept_all'
        }))

        assert throttler.throttle({'ProductName': 'Test'}) == (ACCEPT, 'accept_everything', 100)
Exemplo n.º 29
0
    def test_ruleset(self):
        throttler = Throttler(ConfigManager.from_dict({
            'THROTTLE_RULES': 'antenna.throttler.ACCEPT_ALL'
        }))

        assert throttler.throttle({'ProductName': 'Test'}) == (ACCEPT, 'accept_everything', 100)
Exemplo n.º 30
0
class BreakpadSubmitterResource(RequiredConfigMixin):
    """Handles incoming breakpad crash reports and saves to crashstorage.

    This handles incoming HTTP POST requests containing breakpad-style crash
    reports in multipart/form-data format.

    It can handle compressed or uncompressed POST payloads.

    It parses the payload from the HTTP POST request, runs it through the
    throttler with the specified rules, generates a crash_id, returns the
    crash_id to the HTTP client and then saves the crash using the configured
    crashstorage class.

    .. Note::

       From when a crash comes in to when it's saved by the crashstorage class,
       the crash is entirely in memory. Keep that in mind when figuring out
       how to scale your Antenna nodes.


    The most important configuration bit here is choosing the crashstorage
    class.

    For example::

        CRASHSTORAGE_CLASS=antenna.ext.s3.crashstorage.S3CrashStorage

    """

    required_config = ConfigOptions()
    required_config.add_option(
        'dump_field', default='upload_file_minidump',
        doc='The name of the field in the POST data for dumps.'
    )
    required_config.add_option(
        'dump_id_prefix', default='bp-',
        doc='The crash type prefix.'
    )
    required_config.add_option(
        'concurrent_crashmovers',
        default='2',
        parser=positive_int,
        doc=(
            'The number of crashes concurrently being saved and published. '
            'Each process gets this many concurrent crashmovers, so if you\'re '
            'running 5 processes on the node, then it\'s '
            '(5 * concurrent_crashmovers) sharing upload bandwidth.'
        )
    )

    # crashstorage things
    required_config.add_option(
        'crashstorage_class',
        default='antenna.ext.crashstorage_base.NoOpCrashStorage',
        parser=parse_class,
        doc='The class in charge of storing crashes.'
    )

    # crashpublish things
    required_config.add_option(
        'crashpublish_class',
        default='antenna.ext.crashpublish_base.NoOpCrashPublish',
        parser=parse_class,
        doc='The class in charge of publishing crashes.'
    )

    def __init__(self, config):
        self.config = config.with_options(self)
        self.crashstorage = self.config('crashstorage_class')(config.with_namespace('crashstorage'))
        self.crashpublish = self.config('crashpublish_class')(config.with_namespace('crashpublish'))
        self.throttler = Throttler(config)

        # Gevent pool for crashmover workers
        self.crashmover_pool = Pool(size=self.config('concurrent_crashmovers'))

        # Queue for crashmover work
        self.crashmover_queue = deque()

        # Register hb functions with heartbeat manager
        register_for_heartbeat(self.hb_report_health_stats)
        register_for_heartbeat(self.hb_run_crashmover)

        # Register life function with heartbeat manager
        register_for_life(self.has_work_to_do)

    def get_runtime_config(self, namespace=None):
        """Return generator of runtime configuration."""
        for item in super().get_runtime_config():
            yield item

        for item in self.throttler.get_runtime_config():
            yield item

        for item in self.crashstorage.get_runtime_config(['crashstorage']):
            yield item

        for item in self.crashpublish.get_runtime_config(['crashpublish']):
            yield item

    def check_health(self, state):
        """Return health state."""
        if hasattr(self.crashstorage, 'check_health'):
            self.crashstorage.check_health(state)
        if hasattr(self.crashpublish, 'check_health'):
            self.crashpublish.check_health(state)

    def hb_report_health_stats(self):
        """Heartbeat function to report health stats."""
        # The number of crash reports sitting in the work queue; this is a
        # direct measure of the health of this process--a number that's going
        # up means impending doom
        mymetrics.gauge('work_queue_size', value=len(self.crashmover_queue))

    def has_work_to_do(self):
        """Return whether this still has work to do."""
        work_to_do = (
            len(self.crashmover_pool) +
            len(self.crashmover_queue)
        )
        logger.info('work left to do: %s' % work_to_do)
        # Indicates whether or not we're sitting on crashes to save--this helps
        # keep Antenna alive until we're done saving crashes
        return bool(work_to_do)

    def extract_payload(self, req):
        """Parse HTTP POST payload.

        Decompresses the payload if necessary and then walks through the
        FieldStorage converting from multipart/form-data to Python datatypes.

        NOTE(willkg): The FieldStorage is poorly documented (in my opinion). It
        has a list attribute that is a list of FieldStorage items--one for each
        key/val in the form. For attached files, the FieldStorage will have a
        name, value and filename and the type should be
        application/octet-stream. Thus we parse it looking for things of type
        text/plain and application/octet-stream.

        :arg falcon.request.Request req: a Falcon Request instance

        :returns: (raw_crash dict, dumps dict)

        """
        # If we don't have a content type, return an empty crash
        if not req.content_type:
            mymetrics.incr('malformed', tags=['reason:no_content_type'])
            return {}, {}

        # If it's the wrong content type or there's no boundary section, return
        # an empty crash
        content_type = [part.strip() for part in req.content_type.split(';', 1)]
        if ((len(content_type) != 2 or
             content_type[0] != 'multipart/form-data' or
             not content_type[1].startswith('boundary='))):
            if content_type[0] != 'multipart/form-data':
                mymetrics.incr('malformed', tags=['reason:wrong_content_type'])
            else:
                mymetrics.incr('malformed', tags=['reason:no_boundary'])
            return {}, {}

        content_length = req.content_length or 0

        # If there's no content, return an empty crash
        if content_length == 0:
            mymetrics.incr('malformed', tags=['reason:no_content_length'])
            return {}, {}

        # Decompress payload if it's compressed
        if req.env.get('HTTP_CONTENT_ENCODING') == 'gzip':
            mymetrics.incr('gzipped_crash')

            # If the content is gzipped, we pull it out and decompress it. We
            # have to do that here because nginx doesn't have a good way to do
            # that in nginx-land.
            gzip_header = 16 + zlib.MAX_WBITS
            try:
                data = zlib.decompress(req.stream.read(content_length), gzip_header)
            except zlib.error:
                # This indicates this isn't a valid compressed stream. Given
                # that the HTTP request insists it is, we're just going to
                # assume it's junk and not try to process any further.
                mymetrics.incr('malformed', tags=['reason:bad_gzip'])
                return {}, {}

            # Stomp on the content length to correct it because we've changed
            # the payload size by decompressing it. We save the original value
            # in case we need to debug something later on.
            req.env['ORIG_CONTENT_LENGTH'] = content_length
            content_length = len(data)
            req.env['CONTENT_LENGTH'] = str(content_length)

            data = io.BytesIO(data)
            mymetrics.histogram('crash_size', value=content_length, tags=['payload:compressed'])
        else:
            # NOTE(willkg): At this point, req.stream is either a
            # falcon.request_helper.BoundedStream (in tests) or a
            # gunicorn.http.body.Body (in production).
            #
            # FieldStorage doesn't work with BoundedStream so we pluck out the
            # internal stream from that which works fine.
            #
            # FIXME(willkg): why don't tests work with BoundedStream?
            if isinstance(req.stream, BoundedStream):
                data = req.stream.stream
            else:
                data = req.stream

            mymetrics.histogram('crash_size', value=content_length, tags=['payload:uncompressed'])

        fs = cgi.FieldStorage(fp=data, environ=req.env, keep_blank_values=1)

        # NOTE(willkg): In the original collector, this returned request
        # querystring data as well as request body data, but we're not doing
        # that because the query string just duplicates data in the payload.

        raw_crash = {}
        dumps = {}

        has_json = False
        has_kvpairs = False

        for fs_item in fs.list:
            # NOTE(willkg): We saw some crashes come in where the raw crash ends up with
            # a None as a key. Make sure we can't end up with non-strings as keys.
            item_name = fs_item.name or ''

            if item_name == 'dump_checksums':
                # We don't want to pick up the dump_checksums from a raw
                # crash that was re-submitted.
                continue

            elif fs_item.type and fs_item.type.startswith('application/json'):
                # This is a JSON blob, so load it and override raw_crash with
                # it.
                has_json = True
                raw_crash = json.loads(fs_item.value)

            elif fs_item.type and (fs_item.type.startswith('application/octet-stream') or isinstance(fs_item.value, bytes)):
                # This is a dump, so add it to dumps using a sanitized dump
                # name.
                dump_name = sanitize_dump_name(item_name)
                dumps[dump_name] = fs_item.value

            else:
                # This isn't a dump, so it's a key/val pair, so we add that.
                has_kvpairs = True
                raw_crash[item_name] = fs_item.value

        if has_json and has_kvpairs:
            # If the crash payload has both kvpairs and a JSON blob, then it's
            # malformed and we should dump it.
            mymetrics.incr('malformed', tags=['reason:has_json_and_kv'])
            return {}, {}

        return raw_crash, dumps

    def get_throttle_result(self, raw_crash):
        """Run raw_crash through throttler for a throttling result.

        :arg dict raw_crash: the raw crash to throttle

        :returns tuple: ``(result, rule_name, percentage)``

        """
        # At this stage, nothing has given us a throttle answer, so we
        # throttle the crash.
        result, rule_name, throttle_rate = self.throttler.throttle(raw_crash)

        # Save the results in the raw_crash itself
        raw_crash['legacy_processing'] = result
        raw_crash['throttle_rate'] = throttle_rate

        return result, rule_name, throttle_rate

    @mymetrics.timer_decorator('on_post.time')
    def on_post(self, req, resp):
        """Handle incoming HTTP POSTs.

        Note: This is executed by the WSGI app, so it and anything it does is
        covered by the Sentry middleware.

        """
        resp.status = falcon.HTTP_200

        start_time = time.time()
        # NOTE(willkg): This has to return text/plain since that's what the
        # breakpad clients expect.
        resp.content_type = 'text/plain'

        raw_crash, dumps = self.extract_payload(req)

        # If we didn't get any crash data, then just drop it and move on--don't
        # count this as an incoming crash and don't do any more work on it
        if not raw_crash:
            resp.body = 'Discarded=1'
            return

        mymetrics.incr('incoming_crash')

        # Add timestamps
        current_timestamp = utc_now()
        raw_crash['submitted_timestamp'] = current_timestamp.isoformat()
        raw_crash['timestamp'] = start_time

        # Add checksums and MinidumpSha256Hash
        raw_crash['dump_checksums'] = {
            dump_name: hashlib.sha256(dump).hexdigest()
            for dump_name, dump in dumps.items()
        }
        raw_crash['MinidumpSha256Hash'] = raw_crash['dump_checksums'].get('upload_file_minidump', '')

        # First throttle the crash which gives us the information we need
        # to generate a crash id.
        throttle_result, rule_name, percentage = self.get_throttle_result(raw_crash)

        # Use a uuid if they gave us one and it's valid--otherwise create a new
        # one.
        if 'uuid' in raw_crash and validate_crash_id(raw_crash['uuid']):
            crash_id = raw_crash['uuid']
            logger.info('%s has existing crash_id', crash_id)

        else:
            crash_id = create_crash_id(
                timestamp=current_timestamp,
                throttle_result=throttle_result
            )
            raw_crash['uuid'] = crash_id

        raw_crash['type_tag'] = self.config('dump_id_prefix').strip('-')

        # Log the throttle result
        logger.info('%s: matched by %s; returned %s', crash_id, rule_name,
                    RESULT_TO_TEXT[throttle_result])
        mymetrics.incr('throttle_rule', tags=['rule:%s' % rule_name])
        mymetrics.incr('throttle', tags=['result:%s' % RESULT_TO_TEXT[throttle_result].lower()])

        if throttle_result is REJECT:
            # If the result is REJECT, then discard it
            resp.body = 'Discarded=1'

        elif throttle_result is FAKEACCEPT:
            # If the result is a FAKEACCEPT, then we return a crash id, but throw
            # the crash away
            resp.body = 'CrashID=%s%s\n' % (self.config('dump_id_prefix'), crash_id)

        else:
            # If the result is not REJECT, then save it and return the CrashID to
            # the client
            crash_report = CrashReport(raw_crash, dumps, crash_id)
            crash_report.set_state(STATE_SAVE)
            self.crashmover_queue.append(crash_report)
            self.hb_run_crashmover()
            resp.body = 'CrashID=%s%s\n' % (self.config('dump_id_prefix'), crash_id)

    def hb_run_crashmover(self):
        """Spawn a crashmover if there's work to do."""
        # Spawn a new crashmover if there's stuff in the queue and we haven't
        # hit the limit of how many we can run
        if self.crashmover_queue and self.crashmover_pool.free_count() > 0:
            self.crashmover_pool.spawn(self.crashmover_process_queue)

    def crashmover_process_queue(self):
        """Process crashmover work.

        NOTE(willkg): This has to be super careful not to lose crash reports.
        If there's any kind of problem, this must return the crash report to
        the relevant queue.

        """
        while self.crashmover_queue:
            crash_report = self.crashmover_queue.popleft()

            try:
                if crash_report.state == STATE_SAVE:
                    # Save crash and then toss crash_id in the publish queue
                    self.crashmover_save(crash_report)
                    crash_report.set_state(STATE_PUBLISH)
                    self.crashmover_queue.append(crash_report)

                elif crash_report.state == STATE_PUBLISH:
                    # Publish crash and we're done
                    self.crashmover_publish(crash_report)
                    self.crashmover_finish(crash_report)

            except Exception:
                mymetrics.incr('%s_crash_exception.count' % crash_report.state)
                crash_report.errors += 1
                logger.exception(
                    'Exception when processing queue (%s), state: %s; error %d/%d',
                    crash_report.crash_id,
                    crash_report.state,
                    crash_report.errors,
                    MAX_ATTEMPTS
                )

                # After MAX_ATTEMPTS, we give up on this crash and move on
                if crash_report.errors < MAX_ATTEMPTS:
                    self.crashmover_queue.append(crash_report)
                else:
                    logger.error(
                        '%s: too many errors trying to %s; dropped',
                        crash_report.crash_id,
                        crash_report.state
                    )
                    mymetrics.incr('%s_crash_dropped.count' % crash_report.state)

    def crashmover_finish(self, crash_report):
        """Finish bookkeeping on crash report."""
        # Capture the total time it took for this crash to be handled from
        # being received from breakpad client to saving to s3.
        #
        # NOTE(willkg): time.time returns seconds, but .timing() wants
        # milliseconds, so we multiply!
        delta = (time.time() - crash_report.raw_crash['timestamp']) * 1000

        mymetrics.timing('crash_handling.time', value=delta)
        mymetrics.incr('save_crash.count')

    @mymetrics.timer('crash_save.time')
    def crashmover_save(self, crash_report):
        """Save crash report to storage."""
        self.crashstorage.save_crash(crash_report)
        logger.info('%s saved', crash_report.crash_id)

    @mymetrics.timer('crash_publish.time')
    def crashmover_publish(self, crash_report):
        """Publish crash_id in publish queue."""
        self.crashpublish.publish_crash(crash_report)
        logger.info('%s published', crash_report.crash_id)

    def join_pool(self):
        """Join the pool.

        NOTE(willkg): Only use this in tests!

        This is helpful for forcing all the coroutines in the pool to complete
        so that we can verify outcomes in the test suite for work that might
        cross coroutines.

        """
        self.crashmover_pool.join()
Exemplo n.º 31
0
    def test_is_nothing(self):
        # None of the rules will match an empty crash
        raw_crash = {}

        throttler = Throttler(ConfigManager.from_dict({}))
        assert throttler.throttle(raw_crash) == (DEFER, 'NO_MATCH', 0)