Exemplo n.º 1
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """
    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self._all_fields = SuperSearchFieldsData().get()

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        crash_report = {}

        # TODO Opportunity of optimization: We could inspect
        # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that
        # are in there and use that to limit the two following loops to not
        # bother filling up `crash_report` with keys that will never be needed.

        # Rename fields in raw_crash
        raw_fields_map = dict((x["in_database_name"], x["name"])
                              for x in self._all_fields.values()
                              if x["namespace"] == "raw_crash")
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash
        processed_fields_map = dict((x["in_database_name"], x["name"])
                                    for x in self._all_fields.values()
                                    if x["namespace"] == "processed_crash")
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def do_save_processed(conn, processed_crash):
        """Overriding this to change "name of thing" to crash_report"""
        crash_id = processed_crash["uuid"]
        data = conn._convert_mapping_to_string(processed_crash).encode("utf-8")
        conn.submit(crash_id, "crash_report", data)

    @staticmethod
    def do_get_unredacted_processed(conn, crash_id, json_object_hook):
        """Overriding this to change "name of thing" to crash_report"""
        try:
            processed_crash_as_string = conn.fetch(crash_id, "crash_report")
            return json.loads(processed_crash_as_string,
                              object_hook=json_object_hook)
        except conn.ResponseError as x:
            raise CrashIDNotFound("%s not found: %s" % (crash_id, x))
Exemplo n.º 2
0
    def run(self, end_datetime):
        # Truncate to the hour
        end_datetime = end_datetime.replace(minute=0, second=0, microsecond=0)

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch(self.config)
        start_datetime = end_datetime - datetime.timedelta(
            minutes=self.config.period)
        self.logger.info('Looking at %s to %s', start_datetime, end_datetime)

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size':
            0,
            '_fields':
            all_fields,

            # Set up first page
            '_results_offset':
            0,
            '_results_number':
            MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count)

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if self.config.dry_run:
                self.logger.info('Inserting/updating signature (%s, %s, %s)',
                                 item['signature'], item['date'],
                                 item['build_id'])
            else:
                self.update_crashstats_signature(
                    signature=item['signature'],
                    report_date=item['date'],
                    report_build=item['build_id'],
                )

        self.logger.info('Inserted/updated %d signatures.',
                         len(signature_data))
Exemplo n.º 3
0
 def __init__(self, config, *args, **kwargs):
     super().__init__(config, *args, **kwargs)
     self._all_fields = SuperSearchFieldsData().get()
Exemplo n.º 4
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """

    required_config = Namespace()
    required_config.add_option(
        'resource_class',
        default='socorro.external.boto.connection_context.RegionalS3ConnectionContext',
        doc='fully qualified dotted Python classname to handle Boto connections',
        from_string_converter=class_converter,
        reference_value_from='resource.boto'
    )

    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self._all_fields = SuperSearchFieldsData().get()

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id):
        crash_report = {}

        # TODO Opportunity of optimization: We could inspect
        # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that
        # are in there and use that to limit the two following loops to not
        # bother filling up `crash_report` with keys that will never be needed.

        # Rename fields in raw_crash
        raw_fields_map = dict(
            (x['in_database_name'], x['name'])
            for x in self._all_fields.values()
            if x['namespace'] == 'raw_crash'
        )
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash
        processed_fields_map = dict(
            (x['in_database_name'], x['name'])
            for x in self._all_fields.values()
            if x['namespace'] == 'processed_crash'
        )
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report
        crash_report = json_schema_reducer.make_reduced_dict(CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this to change "name of thing" to crash_report"""
        crash_id = processed_crash['uuid']
        data = boto_connection._convert_mapping_to_string(processed_crash).encode('utf-8')
        boto_connection.submit(crash_id, "crash_report", data)

    @staticmethod
    def _do_get_unredacted_processed(boto_connection, crash_id, json_object_hook):
        """Overriding this to change "name of thing" to crash_report"""
        try:
            processed_crash_as_string = boto_connection.fetch(crash_id, 'crash_report')
            return json.loads(processed_crash_as_string, object_hook=json_object_hook)
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))
Exemplo n.º 5
0
    def handle(self, **options):
        start_datetime = options.get("last_success")
        end_datetime = options.get("run_time")

        if end_datetime:
            end_datetime = parse_datetime(end_datetime)
        else:
            end_datetime = timezone.now()

        if start_datetime:
            start_datetime = parse_datetime(start_datetime)
            # When run via cronrun, start_datetime is based on the last success
            # and we want to increase the window by 10 minutes to get some
            # overlap with the previous run
            start_datetime = start_datetime - datetime.timedelta(minutes=10)
        else:
            # Default to end_datetime - 90 minutes
            start_datetime = end_datetime - datetime.timedelta(minutes=90)

        # Truncate seconds and microseconds
        start_datetime = start_datetime.replace(second=0, microsecond=0)
        end_datetime = end_datetime.replace(second=0, microsecond=0)

        if not end_datetime > start_datetime:
            raise CommandError("start time must be before end time.")

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch()
        self.stdout.write("Looking at %s to %s" % (start_datetime, end_datetime))

        params = {
            "date": [
                f">={start_datetime.isoformat()}",
                f"<{end_datetime.isoformat()}",
            ],
            "_columns": ["signature", "build_id", "date"],
            "_facets_size": 0,
            "_fields": all_fields,
            # Set up first page
            "_results_offset": 0,
            "_results_number": MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp["hits"]
            for hit in hits:
                crashids_count += 1

                if not hit["build_id"]:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit["signature"] in results:
                    data = results[hit["signature"]]
                    data["build_id"] = min(data["build_id"], hit["build_id"])
                    data["date"] = min(data["date"], hit["date"])
                else:
                    data = {
                        "signature": hit["signature"],
                        "build_id": hit["build_id"],
                        "date": hit["date"],
                    }
                results[hit["signature"]] = data

            # If there are no more crash ids to get, we return
            total = resp["total"]
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params["_results_offset"] += MAX_PAGE
            params["_results_number"] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,
                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count,
            )

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if options["dry_run"]:
                self.stdout.write(
                    "Inserting/updating signature (%s, %s, %s)"
                    % (item["signature"], item["date"], item["build_id"])
                )
            else:
                self.update_crashstats_signature(
                    signature=item["signature"],
                    report_date=item["date"],
                    report_build=item["build_id"],
                )

        self.stdout.write("Inserted/updated %d signatures." % len(signature_data))
Exemplo n.º 6
0
 def __init__(self, config, *args, **kwargs):
     super().__init__(config, *args, **kwargs)
     self._all_fields = SuperSearchFieldsData().get()
Exemplo n.º 7
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """
    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self._all_fields = SuperSearchFieldsData().get()

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """Save the raw and processed crash data.

        For Telemetry, we combine the raw and processed crash data into a "crash report"
        which we save to an S3 bucket for the Telemetry system to pick up later.

        """
        crash_report = {}

        # TODO Opportunity of optimization: We could inspect
        # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that
        # are in there and use that to limit the two following loops to not
        # bother filling up `crash_report` with keys that will never be needed.

        # Rename fields in raw_crash
        raw_fields_map = dict((x["in_database_name"], x["name"])
                              for x in self._all_fields.values()
                              if x["namespace"] == "raw_crash")
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash
        processed_fields_map = dict((x["in_database_name"], x["name"])
                                    for x in self._all_fields.values()
                                    if x["namespace"] == "processed_crash")
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)

        self.save_processed(crash_report)

    def save_processed(self, processed_crash):
        """Save a crash report to the S3 bucket."""
        crash_id = processed_crash["uuid"]
        data = dict_to_str(processed_crash).encode("utf-8")
        path = build_keys("crash_report", crash_id)[0]
        self.conn.save_file(path, data)

    def get_unredacted_processed(self, crash_id):
        """Get a crash report from the S3 bucket.

        :returns: DotDict

        :raises CrashIDNotFound: if file does not exist

        """
        path = build_keys("crash_report", crash_id)[0]
        try:
            crash_report_as_str = self.conn.load_file(path)
            return json.loads(crash_report_as_str,
                              object_hook=self.config.json_object_hook)
        except self.conn.KeyNotFound as x:
            raise CrashIDNotFound("%s not found: %s" % (crash_id, x))
Exemplo n.º 8
0
    def handle(self, **options):
        start_datetime = options.get('last_success')
        end_datetime = options.get('run_time')

        if end_datetime:
            end_datetime = parse_datetime(end_datetime)
        else:
            end_datetime = timezone.now()

        if start_datetime:
            start_datetime = parse_datetime(start_datetime)
            # When run via cronrun, start_datetime is based on the last success
            # and we want to increase the window by 10 minutes to get some
            # overlap with the previous run
            start_datetime = start_datetime - datetime.timedelta(minutes=10)
        else:
            # Default to end_datetime - 90 minutes
            start_datetime = end_datetime - datetime.timedelta(minutes=90)

        # Truncate seconds and microseconds
        start_datetime = start_datetime.replace(second=0, microsecond=0)
        end_datetime = end_datetime.replace(second=0, microsecond=0)

        if not end_datetime > start_datetime:
            raise CommandError('start time must be before end time.')

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch()
        self.stdout.write('Looking at %s to %s' % (start_datetime, end_datetime))

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size': 0,
            '_fields': all_fields,

            # Set up first page
            '_results_offset': 0,
            '_results_number': MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count
            )

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if options['dry_run']:
                self.stdout.write(
                    'Inserting/updating signature (%s, %s, %s)' %
                    (item['signature'], item['date'], item['build_id'])
                )
            else:
                self.update_crashstats_signature(
                    signature=item['signature'],
                    report_date=item['date'],
                    report_build=item['build_id'],
                )

        self.stdout.write('Inserted/updated %d signatures.' % len(signature_data))
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """

    required_config = Namespace()
    required_config.add_option(
        'resource_class',
        default=
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext',
        doc=
        'fully qualified dotted Python classname to handle Boto connections',
        from_string_converter=class_converter,
        reference_value_from='resource.boto')

    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self._all_fields = SuperSearchFieldsData().get()

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        crash_report = {}

        # TODO Opportunity of optimization: We could inspect
        # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that
        # are in there and use that to limit the two following loops to not
        # bother filling up `crash_report` with keys that will never be needed.

        # Rename fields in raw_crash
        raw_fields_map = dict((x['in_database_name'], x['name'])
                              for x in self._all_fields.values()
                              if x['namespace'] == 'raw_crash')
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash
        processed_fields_map = dict((x['in_database_name'], x['name'])
                                    for x in self._all_fields.values()
                                    if x['namespace'] == 'processed_crash')
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this to change "name of thing" to crash_report"""
        crash_id = processed_crash['uuid']
        data = boto_connection._convert_mapping_to_string(
            processed_crash).encode('utf-8')
        boto_connection.submit(crash_id, "crash_report", data)

    @staticmethod
    def _do_get_unredacted_processed(boto_connection, crash_id,
                                     json_object_hook):
        """Overriding this to change "name of thing" to crash_report"""
        try:
            processed_crash_as_string = boto_connection.fetch(
                crash_id, 'crash_report')
            return json.loads(processed_crash_as_string,
                              object_hook=json_object_hook)
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))