Exemplo n.º 1
0
    def get(self, **kwargs):
        """Return JSON data of a crash report, given its uuid. """
        filters = [
            ('uuid', None, str),
            ('datatype', None, str),
            ('name', None, str)  # only applicable if datatype == 'raw'
        ]
        params = external_common.parse_arguments(filters, kwargs, modern=True)

        if not params.uuid:
            raise MissingArgumentError('uuid')

        if not params.datatype:
            raise MissingArgumentError('datatype')

        datatype_method_mapping = {
            'raw': 'get_raw_dump',
            'meta': 'get_raw_crash',
            'processed': 'get_processed',
            'unredacted': 'get_unredacted_processed',
        }
        if params.datatype not in datatype_method_mapping:
            raise BadArgumentError(params.datatype)
        get = self.__getattribute__(datatype_method_mapping[params.datatype])
        try:
            if params.datatype == 'raw':
                return get(params.uuid, name=params.name)
            else:
                return get(params.uuid)
        except CrashIDNotFound:
            # The CrashIDNotFound exception that happens inside the
            # crashstorage is too revealing as exception message
            # contains information about buckets and prefix keys.
            # Re-wrap it here so the message is just the crash ID.
            raise CrashIDNotFound(params.uuid)
Exemplo n.º 2
0
    def get(self, **kwargs):
        """Return JSON data of a crash report, given its uuid. """

        filters = [
            ('uuid', None, 'str'),
            ('datatype', None, 'str'),
            ('name', None, 'str')  # only applicable if datatype == 'raw'
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params.uuid:
            raise MissingArgumentError('uuid')

        if not params.datatype:
            raise MissingArgumentError('datatype')

        # get a generic crashstorage instance from whatever external resource
        # is implementing this service.
        store = self.get_storage()

        datatype_method_mapping = {
            'raw': 'get_raw_dump',
            'meta': 'get_raw_crash',
            'processed': 'get_processed',
            'unredacted': 'get_unredacted_processed',
        }

        get = store.__getattribute__(datatype_method_mapping[params.datatype])
        try:
            if params.datatype == 'raw':
                return (get(params.uuid,
                            name=params.name), 'application/octet-stream')
            else:
                return get(params.uuid)
        except CrashIDNotFound:
            if params.datatype in ('processed', 'unredacted'):
                # try to fetch a raw crash just to ensure that the raw crash
                # exists.  If this line fails, there's no reason to actually
                # submit the priority job.
                try:
                    store.get_raw_crash(params.uuid)
                except CrashIDNotFound:
                    raise ResourceNotFound(params.uuid)
                # search through the existing other services to find the
                # Priorityjob service.
                try:
                    priorityjob_service_impl = self.all_services[
                        'Priorityjobs']
                except KeyError:
                    raise ServiceUnavailable('Priorityjobs')
                # get the underlying implementation of the Priorityjob
                # service and instantiate it.
                priority_job_service = priorityjob_service_impl.cls(
                    config=self.config)
                # create the priority job for this crash_ids
                priority_job_service.create(uuid=params.uuid)
                raise ResourceUnavailable(params.uuid)
            raise ResourceNotFound(params.uuid)
Exemplo n.º 3
0
    def get(self, **kwargs):
        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('build_id', None, 'int'),
        ]
        params = external_common.parse_arguments(filters, kwargs)
        required = ('product', 'build_id', 'version')
        for key in required:
            if not params.get(key):
                raise MissingArgumentError(key)

        sql = """
            SELECT
                pv.version_string
            FROM product_versions pv
                LEFT JOIN product_version_builds pvb ON
                    (pv.product_version_id = pvb.product_version_id)
            WHERE pv.product_name = %(product)s
            AND pv.release_version = %(version)s
            AND pvb.build_id = %(build_id)s
        """
        results = self.query(sql, params)

        # The query can return multiple results, but they're the same value. So
        # we just return the first one.
        version_string = [
            row['version_string'] for row in results.zipped()
        ]
        if version_string:
            version_string = [version_string[0]]

        return {
            'hits': version_string
        }
Exemplo n.º 4
0
    def create(self, **kwargs):
        """Add a new job to the priority queue
        """
        filters = [
            ("uuid", None, "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params.uuid:
            raise MissingArgumentError('uuid')

        with self.context() as connection:
            try:
                self.config.logger.debug(
                    'Inserting priority job into RabbitMQ %s', params.uuid)
                connection.channel.basic_publish(
                    exchange='',
                    routing_key=self.config.priority_queue_name,
                    body=params.uuid,
                    properties=pika.BasicProperties(delivery_mode=2))
            except ChannelClosed:
                self.config.logger.error(
                    "Failed inserting priorityjobs data into RabbitMQ",
                    exc_info=True)
                return False

        return True
Exemplo n.º 5
0
    def get(self, **kwargs):
        """Return a dict that holds the throttling value per build type
        for a specific product."""
        filters = [
            ('product', None, 'str'),
        ]
        params = external_common.parse_arguments(filters, kwargs)
        required = ('product', )
        for key in required:
            if not params.get(key):
                raise MissingArgumentError(key)

        sql = """
            SELECT
                build_type,
                throttle::REAL
            FROM product_build_types
            WHERE product_name = %(product)s
        """
        results = self.query(sql, params)

        build_types = {}
        for row in results.zipped():
            build_types[row['build_type']] = row['throttle']

        return {
            'hits': build_types,
        }
Exemplo n.º 6
0
    def get(self, **kwargs):
        filters = [
            ("vendor_hex", None, ["list", "str"]),
            ("adapter_hex", None, ["list", "str"]),
        ]
        params = external_common.parse_arguments(filters, kwargs)
        for key in ('vendor_hex', 'adapter_hex'):
            param = params[key]
            if not param:
                raise MissingArgumentError(key)

            params[key] = tuple(params[key])

        sql_query = """
            SELECT
                vendor_hex, adapter_hex, vendor_name, adapter_name
            FROM graphics_device
            WHERE vendor_hex IN %(vendor_hex)s
            AND adapter_hex IN %(adapter_hex)s
        """

        results = self.query(sql_query, params)
        hits = results.zipped()

        return {'hits': hits, 'total': len(hits)}
Exemplo n.º 7
0
    def get(self, **kwargs):
        """Return JSON data of a crash report, given its uuid. """
        filters = [
            ("uuid", None, str),
            ("datatype", None, str),
            ("name", None, str),  # only applicable if datatype == 'raw'
        ]
        params = external_common.parse_arguments(filters, kwargs, modern=True)

        if not params.uuid:
            raise MissingArgumentError("uuid")

        if not ooid.is_crash_id_valid(params.uuid):
            raise BadArgumentError("uuid")

        if not params.datatype:
            raise MissingArgumentError("datatype")

        datatype_method_mapping = {
            "raw": "get_raw_dump",
            "meta": "get_raw_crash",
            "processed": "get_processed",
            "unredacted": "get_unredacted_processed",
        }
        if params.datatype not in datatype_method_mapping:
            raise BadArgumentError(params.datatype)
        get = self.__getattribute__(datatype_method_mapping[params.datatype])
        try:
            if params.datatype == "raw":
                return get(params.uuid, name=params.name)
            else:
                return get(params.uuid)
        except CrashIDNotFound as cidnf:
            self.logger.warning(
                "%(datatype)s not found: %(exception)s",
                {
                    "datatype": params.datatype,
                    "exception": cidnf
                },
            )
            # The CrashIDNotFound exception that happens inside the
            # crashstorage is too revealing as exception message
            # contains information about buckets and prefix keys.
            # Re-wrap it here so the message is just the crash ID.
            raise CrashIDNotFound(params.uuid)
Exemplo n.º 8
0
    def get(self, **kwargs):
        """Return a list of ADUs and crash counts by signature and ADU date
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("signature", None, "str"),
            ("channel", None, "str"),
            ("product_name", None, "str"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        for param in ("start_date", "end_date", "signature", "channel"):
            if not params[param]:
                raise MissingArgumentError(param)

        if params.end_date - params.start_date > datetime.timedelta(days=365):
            raise BadArgumentError('Duration too long. Max 365 days.')

        sql_query = """
            SELECT
                product_name,
                signature,
                adu_date::TEXT,
                build_date::TEXT,
                buildid::TEXT,
                crash_count,
                adu_count,
                os_name,
                channel
            FROM crash_adu_by_build_signature
            WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s
            AND product_name = %(product_name)s
            AND channel = %(channel)s
            AND signature = %(signature)s
            ORDER BY buildid
        """

        error_message = (
            "Failed to retrieve crash ADU by build signature from PostgreSQL"
        )
        results = self.query(sql_query, params, error_message=error_message)

        crashes = results.zipped()

        return {
            "hits": crashes,
            "total": len(crashes)
        }
Exemplo n.º 9
0
    def prepare_search_params(self, **kwargs):
        """Return a dictionary of parameters for a search-like SQL query.

        Uses socorro.middleware.search_common.get_parameters() for arguments
        filtering.
        """
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = Crashes.prepare_terms(
                params["plugin_terms"],
                params["plugin_search_mode"]
            )

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"], params["products"]) = Crashes.parse_versions(
            params["versions"],
            params["products"]
        )

        # Changing the OS ids to OS names
        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"] == elem:
                    params["os"][i] = platform["name"]

        return params
Exemplo n.º 10
0
    def get_count_by_day(self, **kwargs):
        """Returns the number of crashes on a daily basis"""
        filters = [
            ("signature", None, "str"),
            ("start_date", None, "date"),
            ("end_date", None, "date")
        ]

        DATE_FORMAT = "%Y-%m-%d"

        params = external_common.parse_arguments(filters, kwargs)

        for param in ("signature", "start_date"):
            if not params[param]:
                raise MissingArgumentError(param)

        if not params.end_date:
            params.end_date = params.start_date + datetime.timedelta(1)

        sql = """
            SELECT
                COUNT(*),
                date_processed::date
            FROM
                reports_clean rc
            JOIN signatures ON
                rc.signature_id=signatures.signature_id
            WHERE
                rc.date_processed >= %(start_date)s AND
                rc.date_processed::date < %(end_date)s AND
                signatures.signature=%(signature)s
            GROUP BY
                rc.date_processed::date
        """

        hits = {}

        for count, date in self.query(sql, params):
            hits[date.strftime(DATE_FORMAT)] = count

        current = params.start_date
        while current < params.end_date:
            hits.setdefault(current.strftime(DATE_FORMAT), 0)
            current += datetime.timedelta(1)

        return {"hits": hits, "total": len(hits)}
Exemplo n.º 11
0
    def get(self, **kwargs):
        """Return JSON data of a crash report, given its uuid."""
        filters = [("uuid", None, str)]
        params = external_common.parse_arguments(filters, kwargs, modern=True)

        if not params.uuid:
            raise MissingArgumentError("uuid")

        try:
            return self.get_unredacted_processed(params.uuid)
        except CrashIDNotFound as cidnf:
            self.logger.warning("telemetry crash not found: %(exception)s",
                                {"exception": cidnf})
            # The CrashIDNotFound exception that happens inside the
            # crashstorage is too revealing as exception message contains
            # information about buckets and prefix keys. Re-wrap it here so the
            # message is just the crash ID.
            raise CrashIDNotFound(params.uuid)
Exemplo n.º 12
0
    def get(self, **kwargs):
        '''Return the result of a custom query. '''
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params.query:
            raise MissingArgumentError('query')

        # Set indices.
        indices = []
        if not params.indices:
            # By default, use the last two indices.
            today = datetimeutil.utc_now()
            last_week = today - datetime.timedelta(days=7)

            indices = self.generate_list_of_indexes(last_week, today)
        elif len(params.indices) == 1 and params.indices[0] == 'ALL':
            # If we want all indices, just do nothing.
            pass
        else:
            indices = params.indices

        search_args = {}
        if indices:
            search_args['index'] = indices
            search_args['doc_type'] = (
                self.config.elasticsearch.elasticsearch_doctype
            )

        connection = self.get_connection()

        try:
            results = connection.search(
                body=json.dumps(params.query),
                **search_args
            )
        except elasticsearch.exceptions.NotFoundError as e:
            missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
            raise ResourceNotFound(
                "elasticsearch index '%s' does not exist" % missing_index
            )
        except elasticsearch.exceptions.TransportError as e:
            raise DatabaseError(e)

        return results
Exemplo n.º 13
0
    def get(self, **kwargs):
        """Return a list of signatures-to-bug_ids or bug_ids-to-signatures
           associations. """
        params = external_common.parse_arguments(self.filters,
                                                 kwargs,
                                                 modern=True)

        if not params['signatures'] and not params['bug_ids']:
            raise MissingArgumentError('specify one of signatures or bug_ids')
        elif params['signatures'] and params['bug_ids']:
            raise BadArgumentError('specify only one of signatures or bug_ids')

        sql_params = []
        if params['signatures']:
            sql_params.append(tuple(params.signatures))

            sql = """/* socorro.external.postgresql.bugs.Bugs.get */
                SELECT ba.signature, bugs.id
                FROM bugs
                    JOIN bug_associations AS ba ON bugs.id = ba.bug_id
                WHERE EXISTS(
                    SELECT 1 FROM bug_associations
                    WHERE bug_associations.bug_id = bugs.id
                    AND signature IN %s
                )
            """
        elif params['bug_ids']:
            sql_params.append(tuple(params.bug_ids))

            sql = """/* socorro.external.postgresql.bugs.Bugs.get */
                SELECT ba.signature, bugs.id
                FROM bugs
                    JOIN bug_associations AS ba ON bugs.id = ba.bug_id
                WHERE bugs.id IN %s
            """

        error_message = "Failed to retrieve bug associations from PostgreSQL"
        results = self.query(sql, sql_params, error_message=error_message)

        bugs = results.zipped()

        return {"hits": bugs, "total": len(bugs)}
Exemplo n.º 14
0
    def get(self, **kwargs):
        """ return GC crashes per build ID """

        for arg in ['product', 'version']:
            if not kwargs.get(arg):
                raise MissingArgumentError(arg)

        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("product", None, "str"),
            ("version", None, "str"),
            ("from_date", lastweek, "date"),
            ("to_date", now, "date"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        result = self.query(
            """
            /* socorro.external.postgresql.gccrashes.GCCrashes.get */
            SELECT
                build::text,
                sum(gc_count_madu)
            FROM gccrashes
            JOIN product_versions
            USING (product_version_id)
            WHERE product_name = %(product)s
            AND version_string = %(version)s
            AND report_date BETWEEN %(from_date)s AND %(to_date)s
            AND build IS NOT NULL
            GROUP BY build
            ORDER BY build
        """, params)

        # Because we don't return a list of dicts, we turn it into a
        # pure list first so it becomes a list of tuples.
        rows = list(result)
        return {'hits': rows, 'total': len(rows)}
Exemplo n.º 15
0
    def get(self, **kwargs):

        filters = [
            ("backfill_type", None, "str"),
            ("reports_clean", True, "bool"),
            ("check_period", '01:00:00', "str"),
            ("table_name", None, "str"),
            ("update_day", None, "datetime"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        if not params.backfill_type:
            raise MissingArgumentError('backfill_type')

        date_param = ['update_day', 'start_date', 'end_date']
        for i in date_param:
            if i in kwargs:
                params[i] = str(params[i].date())

        try:
            query = 'SELECT backfill_%(backfill_type)s (%(params)s); '
            required_params = BACKFILL_PARAMETERS[params.backfill_type]
            query_params = [(i, params[i]) for i in required_params]
            query_params_str = ', '.join('%(' + str(i[0]) + ')s'
                                         for i in query_params)
            query = query % {
                'backfill_type': params.backfill_type,
                'params': query_params_str
            }
        except:
            raise BadArgumentError(kwargs['backfill_type'])

        error_message = "Failed to retrieve backfill %s from PostgreSQL"
        error_message = error_message % kwargs['backfill_type']
        results = self.query(query, params, error_message=error_message)
        return results
Exemplo n.º 16
0
    def delete_field(self, **kwargs):
        """Remove a field from the database.

        Removing a field means that it won't be indexed in elasticsearch
        anymore, nor will it be exposed or accessible via supersearch. It
        doesn't delete the data from crash reports though, so it would be
        possible to re-create the field and reindex some indices to get that
        data back.
        """
        filters = [
            ('name', None, 'str'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params['name']:
            raise MissingArgumentError('name')

        es_connection = self.get_connection()
        es_connection.delete(
            index=self.config.elasticsearch.elasticsearch_default_index,
            doc_type='supersearch_fields',
            id=params['name'],
            refresh=True,
        )
Exemplo n.º 17
0
    def get(self, **kwargs):
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params['signatures']:
            raise MissingArgumentError('signatures')

        sql_params = [tuple(params['signatures'])]
        sql = """
            SELECT
                signature,
                first_report AS first_date,
                first_build::VARCHAR
            FROM signatures
            WHERE signature IN %s
        """

        error_message = 'Failed to retrieve signatures from PostgreSQL'
        results = self.query(sql, sql_params, error_message=error_message)

        signatures = results.zipped()
        return {
            'hits': signatures,
            'total': len(signatures)
        }
Exemplo n.º 18
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                '_results_number',
                                msg=(
                                    '_results_number cannot be greater '
                                    'than 1,000'
                                )
                            )
                        if results_number < 0:
                            raise BadArgumentError(
                                '_results_number',
                                msg='_results_number cannot be negative'
                            )
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]
                        # Why cap it?
                        # Because if the query is covering a lot of different
                        # things you can get a really really large query
                        # which can hog resources excessively.
                        # Downloading, as an example, 100k facets (and 0 hits)
                        # when there is plenty of data yields a 11MB JSON
                        # file.
                        if facets_size > 10000:
                            raise BadArgumentError(
                                '_facets_size greater than 10,000'
                            )

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]
                name = self.get_full_field_name(field_data)

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '^': '%s*',  # starts with
                    '$': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, basestring) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator == '__true__':
                    filter_type = 'term'
                    filter_value = True
                elif param.operator == '@':
                    filter_type = 'regexp'
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = (
                        operator_wildcards[param.operator] % param.value
                    )
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        if facets_size:
            self._create_aggregations(
                params,
                search,
                facets_size,
                histogram_intervals
            )

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        errors = []

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()

                aggregations = getattr(results, 'aggregations', {})
                if aggregations:
                    aggregations = self.format_aggregations(aggregations)

                shards = getattr(results, '_shards', {})

                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                errors.append({
                    'type': 'missing_index',
                    'index': missing_index,
                })

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    shards = None
                    break
Exemplo n.º 19
0
    def update_field(self, **kwargs):
        """Update an existing field in the database.

        If the field does not exist yet, a ResourceNotFound error is raised.

        If you want to update only some keys, just do not pass the ones you
        don't want to change.
        """
        filters = [
            ('name', None, 'str'),
            ('data_validation_type', None, 'str'),
            ('default_value', None, 'str'),
            ('description', None, 'str'),
            ('form_field_choices', None, ['list', 'str']),
            ('has_full_version', None, 'bool'),
            ('in_database_name', None, 'str'),
            ('is_exposed', None, 'bool'),
            ('is_returned', None, 'bool'),
            ('is_mandatory', None, 'bool'),
            ('query_type', None, 'str'),
            ('namespace', None, 'str'),
            ('permissions_needed', None, ['list', 'str']),
            ('storage_mapping', None, 'json'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params['name']:
            raise MissingArgumentError('name')

        # Remove all the parameters that were not explicitely passed.
        for key in params.keys():
            if key not in kwargs:
                del params[key]

        es_connection = self.get_connection()
        es_index = self.config.elasticsearch.elasticsearch_default_index
        es_doc_type = 'supersearch_fields'

        # First verify that the field does exist.
        try:
            old_value = es_connection.get(
                index=es_index,
                doc_type=es_doc_type,
                id=params['name'],
            )['_source']  # Only the actual document is of interest.
        except elasticsearch.exceptions.NotFoundError:
            # This field does not exist yet, it thus cannot be updated!
            raise ResourceNotFound(
                'The field "%s" does not exist in the database, it needs to '
                'be created before it can be updated. ' % params['name'])

        # Then, if necessary, verify the new mapping.
        if (('storage_mapping' in params
             and params['storage_mapping'] != old_value['storage_mapping']) or
            ('in_database_name' in params
             and params['in_database_name'] != old_value['in_database_name'])):
            # This is a change that will have an impact on the Elasticsearch
            # mapping, we first need to make sure it doesn't break.
            new_mapping = self.get_mapping(overwrite_mapping=params)

            # Try the mapping. If there is an error, an exception will be
            # raised. If an exception is raised, the new mapping will be
            # rejected.
            self.test_mapping(new_mapping)

        if ('storage_mapping' in params
                and params['storage_mapping'] != old_value['storage_mapping']):
            # The storage mapping is an object, and thus is treated
            # differently than other fields by Elasticsearch. If a user
            # changes the object by removing a field from it, that field won't
            # be removed as part of the update (which performs a merge of all
            # objects in the back-end). We therefore want to perform the merge
            # ourselves, and remove the field from the database before
            # re-indexing it.
            new_doc = old_value.copy()
            new_doc.update(params)

            es_connection.delete(
                index=es_index,
                doc_type=es_doc_type,
                id=new_doc['name'],
            )
            es_connection.index(
                index=es_index,
                doc_type=es_doc_type,
                body=new_doc,
                id=new_doc['name'],
                op_type='create',
                refresh=True,
            )

            # If we made a change to the storage_mapping, log that change.
            self.config.logger.info(
                'Elasticsearch mapping changed for field "%s", '
                'was "%s", now "%s"',
                params['name'],
                old_value['storage_mapping'],
                new_doc['storage_mapping'],
            )
        else:
            # Then update the new field in the database. Note that
            # Elasticsearch takes care of merging the new document into the
            # old one, so missing values won't be changed.
            es_connection.update(
                index=es_index,
                doc_type=es_doc_type,
                body={'doc': params},
                id=params['name'],
                refresh=True,
            )

        return True
Exemplo n.º 20
0
 def mocked_supersearch_get(**params):
     if params.get('product'):
         raise MissingArgumentError(params['product'])
     else:
         raise BadArgumentError('That was a bad thing to do')
Exemplo n.º 21
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get("_fields"):
            raise MissingArgumentError("_fields")
        self.all_fields = kwargs["_fields"]

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params["date"])

        if "%" in self.context.get_index_template():
            # If the index template is date-centric, remove indices before the retention
            # policy because they're not valid to search through and probably don't
            # exist
            policy = datetime.timedelta(
                weeks=self.context.get_retention_policy())
            template = self.context.get_index_template()
            indices = prune_invalid_indices(indices, policy, template)

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.context.get_doctype(),
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith("_"):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == "_results_offset":
                        results_from = param.value[0]
                    elif param.name == "_results_number":
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                "_results_number",
                                msg=("_results_number cannot be greater "
                                     "than 1,000"),
                            )
                        if results_number < 0:
                            raise BadArgumentError(
                                "_results_number",
                                msg="_results_number cannot be negative",
                            )
                    elif param.name == "_facets_size":
                        facets_size = param.value[0]
                        # Why cap it?
                        # Because if the query is covering a lot of different
                        # things you can get a really really large query
                        # which can hog resources excessively.
                        # Downloading, as an example, 100k facets (and 0 hits)
                        # when there is plenty of data yields a 11MB JSON
                        # file.
                        if facets_size > 10000:
                            raise BadArgumentError(
                                "_facets_size greater than 10,000")

                    for f in self.histogram_fields:
                        if param.name == "_histogram_interval.%s" % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]
                name = self.get_full_field_name(field_data)

                if param.data_type in ("date", "datetime"):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == "enum":
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == "str" and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    "~": "*%s*",  # contains
                    "^": "%s*",  # starts with
                    "$": "*%s",  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    ">": "gt",
                    "<": "lt",
                    ">=": "gte",
                    "<=": "lte"
                }

                args = {}
                filter_type = "term"
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, str) or " " not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = "query"
                            args = Q(
                                "simple_query_string",
                                query=param.value[0],
                                fields=[name],
                                default_operator="and",
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = "terms"
                        filter_value = param.value
                elif param.operator == "=":
                    # is exactly
                    if field_data["has_full_version"]:
                        name = "%s.full" % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = "range"
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == "__null__":
                    filter_type = "missing"
                    args["field"] = name
                elif param.operator == "__true__":
                    filter_type = "term"
                    filter_value = True
                elif param.operator == "@":
                    filter_type = "regexp"
                    if field_data["has_full_version"]:
                        name = "%s.full" % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = "query"

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data["has_full_version"]:
                        name = "%s.full" % name

                    q_args = {}
                    q_args[name] = operator_wildcards[
                        param.operator] % param.value
                    query = Q("wildcard", **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == "range":
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F("bool", must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params["_columns"]:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params["_sort"]:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith("-"):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = "-" + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        if facets_size:
            self._create_aggregations(params, search, facets_size,
                                      histogram_intervals)

        # Query and compute results.
        hits = []

        if params["_return_query"][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {"query": search.to_dict(), "indices": indices}

        errors = []

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()

                aggregations = getattr(results, "aggregations", {})
                if aggregations:
                    aggregations = self.format_aggregations(aggregations)

                shards = getattr(results, "_shards", {})

                break  # Yay! Results!
            except NotFoundError as e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                errors.append({
                    "type": "missing_index",
                    "index": missing_index
                })

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    shards = None
                    break
            except RequestError as exception:
                # Try to handle it gracefully if we can find out what
                # input was bad and caused the exception.
                try:
                    bad_input = ELASTICSEARCH_PARSE_EXCEPTION_REGEX.findall(
                        exception.error)[-1]
                    # Loop over the original parameters to try to figure
                    # out which *key* had the bad input.
                    for key, value in kwargs.items():
                        if value == bad_input:
                            raise BadArgumentError(key)
                except IndexError:
                    # Not an ElasticsearchParseException exception
                    pass

                # Re-raise the original exception
                raise

        if shards and shards.failed:
            # Some shards failed. We want to explain what happened in the
            # results, so the client can decide what to do.
            failed_indices = defaultdict(int)
            for failure in shards.failures:
                failed_indices[failure.index] += 1

            for index, shards_count in failed_indices.items():
                errors.append({
                    "type": "shards",
                    "index": index,
                    "shards_count": shards_count
                })

        return {
            "hits": hits,
            "total": total,
            "facets": aggregations,
            "errors": errors
        }
Exemplo n.º 22
0
    def get_exploitability(self, **kwargs):
        """Return a list of exploitable crash reports.

        See socorro.lib.external_common.parse_arguments() for all filters.
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("product", None, "str"),
            ("version", None, "str"),
            ("page", None, "int"),
            ("batch", None, "int"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        sql_where = """
            report_date BETWEEN %(start_date)s AND %(end_date)s
            AND
            null_count + none_count + low_count + medium_count + high_count > 4
        """

        if params.product:
            sql_where += " AND pv.product_name = %(product)s"
        if params.version:
            sql_where += " AND pv.version_string = %(version)s"

        inner_with_sql = """
            SELECT
                signature,
                SUM(high_count) AS high_count,
                SUM(medium_count) AS medium_count,
                SUM(low_count) AS low_count,
                SUM(null_count) AS null_count,
                SUM(none_count) AS none_count,
                SUM(high_count) + SUM(medium_count) AS med_or_high
            FROM exploitability_reports
            JOIN product_versions AS pv USING (product_version_id)
            WHERE
                high_count + medium_count + null_count + none_count > 4
                AND
                %s
            GROUP BY signature
        """ % (sql_where,)

        count_sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            WITH sums AS (
                %s
            )
            SELECT
                count(signature)
            FROM sums
        """ % (inner_with_sql,)

        results = self.query(
            count_sql_query,
            params,
            error_message="Failed to retrieve exploitable crashes count"
        )
        total_crashes_count, = results[0]

        sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            WITH sums AS (
                %s
            )
            SELECT
                signature,
                high_count,
                medium_count,
                low_count,
                null_count,
                none_count
            FROM sums
            ORDER BY
                med_or_high DESC, signature ASC
        """ % (inner_with_sql,)

        if params['page'] is not None:
            if params['page'] <= 0:
                raise BadArgumentError('page', params['page'], 'starts on 1')
            if params['batch'] is None:
                raise MissingArgumentError('batch')
            sql_query += """
            LIMIT %(limit)s
            OFFSET %(offset)s
            """
            params['limit'] = params['batch']
            params['offset'] = params['batch'] * (params['page'] - 1)

        error_message = (
            "Failed to retrieve exploitable crashes from PostgreSQL"
        )
        results = self.query(sql_query, params, error_message=error_message)

        # Transforming the results into what we want
        crashes = results.zipped()

        return {
            "hits": crashes,
            "total": total_crashes_count
        }
Exemplo n.º 23
0
    def get_signature_history(self, **kwargs):
        """Return the history of a signature.

        See https://socorro.readthedocs.io/en/latest/middleware.html
        """
        now = datetimeutil.utc_now()
        lastweek = now - datetime.timedelta(days=7)

        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('signature', None, 'str'),
            ('end_date', now, 'datetime'),
            ('start_date', lastweek, 'datetime'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        for param in ('product', 'version', 'signature'):
            if not params[param]:
                raise MissingArgumentError(param)

        if params.signature == '##null##':
            signature_where = 'AND signature IS NULL'
        else:
            signature_where = 'AND signature = %(signature)s'

        if params.signature == '##empty##':
            params.signature = ''

        sql = """
            /* external.postgresql.crashes.Crashes.get_signature_history */
            WITH hist AS (
                SELECT
                    report_date,
                    report_count
                FROM
                    tcbs JOIN signatures using (signature_id)
                         JOIN product_versions using (product_version_id)
                WHERE
                    report_date BETWEEN %%(start_date)s AND %%(end_date)s
                    AND product_name = %%(product)s
                    AND version_string = %%(version)s
                    %s
                GROUP BY
                    report_date, report_count
                ORDER BY 1
            ),
            scaling_window AS (
                SELECT
                    hist.*,
                    SUM(report_count) over () AS total_crashes
                FROM hist
            )
            SELECT
                report_date AS date,
                report_count AS count,
                report_count / total_crashes::float * 100 AS percent_of_total
            FROM scaling_window
            ORDER BY report_date DESC
        """ % signature_where

        error_message = 'Failed to retrieve signature history from PostgreSQL'
        results = self.query(sql, params, error_message=error_message)

        # Transforming the results into what we want
        history = []
        for dot in results.zipped():
            dot['date'] = datetimeutil.date_to_string(dot['date'])
            history.append(dot)

        return {
            'hits': history,
            'total': len(history)
        }
Exemplo n.º 24
0
    def get_parameters(self, **kwargs):
        parameters = {}

        fields = kwargs['_fields']
        assert fields
        if fields:
            self.build_filters(fields)

        for param in self.filters:
            values = kwargs.get(param.name, param.default)

            if values in ('', []):
                # Those values are equivalent to None here.
                # Note that we cannot use bool(), because 0 is not equivalent
                # to None in our case.
                values = None

            if values is None and param.mandatory:
                raise MissingArgumentError(param.name)
            if values is None and param.default is not None:
                values = param.default

            # all values can be a list, so we make them all lists to simplify
            if values is not None and not isinstance(values, (list, tuple)):
                values = [values]

            if values is not None:
                # There should only be one parameter with no operator, and
                # we want to stack all values into it. That's why we want
                # to keep track of it.
                # Actually, we want _two_ parameters with no operator: one
                # for each possible value of "operator_not".
                no_operator_param = {True: None, False: None}

                for value in values:
                    operator = None
                    operator_not = False

                    operators = OPERATORS_MAP.get(param.data_type,
                                                  OPERATORS_MAP['default'])

                    if isinstance(value, basestring):
                        if value.startswith(OPERATOR_NOT):
                            operator_not = True
                            value = value[1:]

                        for ope in operators:
                            if value.startswith(ope):
                                operator = ope
                                value = value[len(ope):]
                                break

                    # ensure the right data type
                    try:
                        value = convert_to_type(value, param.data_type)
                    except ValueError:
                        raise BadArgumentError(
                            param.name,
                            msg='Bad value for parameter %s:'
                            ' "%s" is not a valid %s' %
                            (param.name, value, param.data_type))

                    if param.name not in parameters:
                        parameters[param.name] = []

                    if not operator:
                        if not no_operator_param[operator_not]:
                            no_operator_param[operator_not] = SearchParam(
                                param.name, [value], operator, param.data_type,
                                operator_not)
                        else:
                            no_operator_param[operator_not].value.append(value)
                    else:
                        parameters[param.name].append(
                            SearchParam(param.name, value, operator,
                                        param.data_type, operator_not))

                for value in no_operator_param.values():
                    if value:
                        parameters[value.name].append(value)

        self.fix_date_parameter(parameters)
        self.fix_process_type_parameter(parameters)
        self.fix_hang_type_parameter(parameters)
        self.fix_version_parameter(parameters)

        return parameters
Exemplo n.º 25
0
    def create_field(self, **kwargs):
        """Create a new field in the database, to be used by supersearch and
        all Elasticsearch related services.
        """
        filters = [
            ('name', None, 'str'),
            ('data_validation_type', 'enum', 'str'),
            ('default_value', None, 'str'),
            ('description', None, 'str'),
            ('form_field_choices', None, ['list', 'str']),
            ('has_full_version', False, 'bool'),
            ('in_database_name', None, 'str'),
            ('is_exposed', False, 'bool'),
            ('is_returned', False, 'bool'),
            ('is_mandatory', False, 'bool'),
            ('query_type', 'enum', 'str'),
            ('namespace', None, 'str'),
            ('permissions_needed', None, ['list', 'str']),
            ('storage_mapping', None, 'json'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        mandatory_params = ('name', 'in_database_name')
        for param in mandatory_params:
            if not params[param]:
                raise MissingArgumentError(param)

        # Before making the change, make sure it does not break indexing.
        new_mapping = self.get_mapping(overwrite_mapping=params)

        # Try the mapping. If there is an error, an exception will be raised.
        # If an exception is raised, the new mapping will be rejected.
        self.test_mapping(new_mapping)

        es_connection = self.get_connection()

        try:
            es_connection.index(
                index=self.config.elasticsearch.elasticsearch_default_index,
                doc_type='supersearch_fields',
                body=params,
                id=params['name'],
                op_type='create',
                refresh=True,
            )
        except elasticsearch.exceptions.ConflictError:
            # This field exists in the database, it thus cannot be created!
            raise BadArgumentError(
                'name',
                msg='The field "%s" already exists in the database, '
                'impossible to create it. ' % params['name'],
            )

        if params.get('storage_mapping'):
            # If we made a change to the storage_mapping, log that change.
            self.config.logger.info(
                'elasticsearch mapping changed for field "%s", '
                'added new mapping "%s"',
                params['name'],
                params['storage_mapping'],
            )

        return True
Exemplo n.º 26
0
    def get_daily(self, **kwargs):
        """Return crashes by active daily users. """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("product", None, "str"),
            ("versions", None, ["list", "str"]),
            ("from_date", lastweek, "date"),
            ("to_date", now, "date"),
            ("os", None, ["list", "str"]),
            ("report_type", None, ["list", "str"]),
            ("date_range_type", "date", "str"),
        ]

        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        params = external_common.parse_arguments(filters, kwargs)

        if not params.product:
            raise MissingArgumentError('product')

        if not params.versions or not params.versions[0]:
            raise MissingArgumentError('versions')

        params.versions = tuple(params.versions)

        # simple version, for home page graphs mainly
        if ((not params.os or not params.os[0]) and
                (not params.report_type or not params.report_type[0])):
            if params.date_range_type == "build":
                table_to_use = "home_page_graph_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "home_page_graph_view"
                date_range_field = "report_date"

            db_fields = ("product_name", "version_string", date_range_field,
                         "report_count", "adu", "crash_hadu")

            out_fields = ("product", "version", "date", "report_count", "adu",
                          "crash_hadu")

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM %(table_to_use)s
                WHERE product_name=%%(product)s
                AND version_string IN %%(versions)s
                AND %(date_range_field)s BETWEEN %%(from_date)s
                    AND %%(to_date)s
            """ % {"db_fields": ", ".join(db_fields),
                   "date_range_field": date_range_field,
                   "table_to_use": table_to_use}

        # complex version, for daily crashes page mainly
        else:
            if params.date_range_type == "build":
                table_to_use = "crashes_by_user_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "crashes_by_user_view"
                date_range_field = "report_date"

            db_fields = [
                "product_name",
                "version_string",
                date_range_field,
                "sum(adjusted_report_count)::bigint as report_count",
                "sum(adu)::bigint as adu",
                """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint,
                              avg(throttle)) as crash_hadu""",
                "avg(throttle) as throttle"
            ]

            out_fields = ["product", "version", "date", "report_count", "adu",
                          "crash_hadu", "throttle"]

            db_group = ["product_name", "version_string", date_range_field]

            sql_where = []
            if params.os and params.os[0]:
                sql_where.append("os_short_name IN %(os)s")
                params.os = tuple(x[0:3].lower() for x in params.os)

            if params.report_type and params.report_type[0]:
                sql_where.append("crash_type_short IN %(report_type)s")
                params.report_type = tuple(params.report_type)

            if sql_where:
                sql_where = "AND %s" % " AND ".join(sql_where)
            else:
                sql_where = ''

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM (
                    SELECT
                        product_name,
                        version_string,
                        %(date_range_field)s,
                        os_name,
                        os_short_name,
                        SUM(report_count)::int as report_count,
                        SUM(adjusted_report_count)::int
                            as adjusted_report_count,
                        MAX(adu) as adu,
                        AVG(throttle) as throttle
                    FROM %(table_to_use)s
                    WHERE product_name=%%(product)s
                    AND version_string IN %%(versions)s
                    AND %(date_range_field)s BETWEEN %%(from_date)s
                        AND %%(to_date)s
                    %(sql_where)s
                    GROUP BY product_name, version_string,
                             %(date_range_field)s, os_name, os_short_name
                ) as aggregated_crashes_by_user
            """ % {"db_fields": ", ".join(db_fields),
                   "date_range_field": date_range_field,
                   "table_to_use": table_to_use,
                   "sql_where": sql_where}

            if db_group:
                sql = "%s GROUP BY %s" % (sql, ", ".join(db_group))

        error_message = "Failed to retrieve daily crashes data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        hits = {}
        for row in results:
            daily_data = dict(zip(out_fields, row))
            if "throttle" in daily_data:
                daily_data["throttle"] = float(daily_data["throttle"])
            daily_data["crash_hadu"] = float(daily_data["crash_hadu"])
            daily_data["date"] = datetimeutil.date_to_string(
                daily_data["date"]
            )

            key = "%s:%s" % (daily_data["product"],
                             daily_data["version"])

            if "os_short" in daily_data:
                del daily_data["os_short"]

            if key not in hits:
                hits[key] = {}

            hits[key][daily_data["date"]] = daily_data

        return {"hits": hits}
Exemplo n.º 27
0
    def post(self, **kwargs):
        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('update_channel', None, 'str'),
            ('build_id', None, 'str'),
            ('platform', None, 'str'),
            ('beta_number', None, 'int'),
            ('release_channel', None, 'str'),
            ('throttle', None, 'int'),
        ]
        params = external_common.parse_arguments(filters, kwargs)
        # all fields are mandatory
        for key in [x[0] for x in filters if x[1] is None]:
            if key == 'beta_number':
                # exception because this can either be a non-zero integer
                # or a None
                if params.get(key) is not None:
                    if not params.get(key):
                        raise MissingArgumentError(key)

            elif not params.get(key) and params.get(key) != 0:
                raise MissingArgumentError(key)

        with self.get_connection() as connection:
            try:
                single_row_sql(
                    connection,
                    # product, version, update_channel, build_id, platform,
                    # beta_number
                    "SELECT add_new_release(%s, %s, %s, %s, %s, %s)",
                    (params['product'], params['version'],
                     params['update_channel'], params['build_id'],
                     params['platform'], params['beta_number']),
                )
                execute_no_results(
                    connection,
                    """
                        INSERT INTO product_release_channels
                        (product_name, release_channel, throttle)
                        SELECT %s, %s, %s
                        WHERE NOT EXISTS (
                            SELECT product_name, release_channel
                            FROM product_release_channels
                            WHERE
                            product_name = %s
                            AND
                            release_channel = %s
                        )
                    """,
                    (
                        params['product'],
                        params['release_channel'],
                        params['throttle'],
                        params['product'],
                        params['release_channel'],
                    ),
                )
                single_row_sql(connection, "SELECT update_product_versions()")
            except psycopg2.Error:
                connection.rollback()
                raise
            else:
                connection.commit()

        return True
Exemplo n.º 28
0
    def get(self, **kwargs):
        """ Return urls for signature """
        filters = [
            ("signature", None, "str"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("products", None, ["list", "str"]),
            ("versions", None, ["list", "str"]),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        #Because no parameters are optional, we need to loop through
        #all parameters to ensure each has been set and is not None
        missingParams = []
        for param in params:
            if not params[param]:
                if param == 'versions':
                    # force versions parameter to being 'ALL' if empty
                    params[param] = 'ALL'
                    continue
                missingParams.append(param)

        if len(missingParams) > 0:
            raise MissingArgumentError(", ".join(missingParams))

        all_products_versions_sql = """
        /* socorro.external.postgresql.signature_urls.SignatureURLs.get */
            SELECT url, count(*) as crash_count FROM reports_clean
            JOIN reports_user_info USING ( UUID )
            JOIN signatures USING ( signature_id )
            WHERE reports_clean.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND reports_user_info.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND signature = %(signature)s
            AND url <> ''
        """

        sql = """
        /* socorro.external.postgresql.signature_urls.SignatureURLs.get */
            SELECT url, count(*) as crash_count FROM reports_clean
            JOIN reports_user_info USING ( UUID )
            JOIN signatures USING ( signature_id )
            JOIN product_versions USING ( product_version_id )
            WHERE reports_clean.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND reports_user_info.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND signature = %(signature)s
            AND url <> ''
            AND (
        """

        sql_group_order = """ GROUP BY url
            ORDER BY crash_count DESC LIMIT 100"""
        sql_params = {
            "start_date": params.start_date,
            "end_date": params.end_date,
            "signature": params.signature
        }

        # if this query is for all products the 'ALL' keyword will be
        # the only item in the products list and this will then also
        # be for all versions.
        if 'ALL' in params['products']:
            sql_query = " ".join((all_products_versions_sql, sql_group_order))
        # if this query is for all versions the 'ALL' keyword will be
        # the only item in the versions list.
        elif 'ALL' in params['versions']:
            sql_products = " product_name IN %(products)s )"
            sql_params['products'] = tuple(params.products)

            sql_date_range_limit = """AND %(end_date)s BETWEEN
                product_versions.build_date
                    AND product_versions.sunset_date"""
            sql_query = " ".join(
                (sql, sql_products, sql_date_range_limit, sql_group_order))
        else:
            products = []
            (params["products_versions"],
             products) = self.parse_versions(params["versions"], [])

            if len(params["products_versions"]) == 0:
                raise BadArgumentError(", ".join(params["versions"]))

            versions_list = []
            products_list = []
            for x in range(0, len(params["products_versions"]), 2):
                products_list.append(params["products_versions"][x])
                versions_list.append(params["products_versions"][x + 1])

            product_version_list = []
            for prod in params["products"]:
                versions = []
                [
                    versions.append(versions_list[i])
                    for i, x in enumerate(products_list) if x == prod
                ]
                product_version_list.append(tuple(versions))

            sql_product_version_ids = [
                """( product_name = %%(product%s)s
                    AND version_string IN %%(version%s)s ) """ % (x, x)
                for x in range(len(product_version_list))
            ]

            sql_params = add_param_to_dict(sql_params, "version",
                                           product_version_list)

            sql_params = add_param_to_dict(sql_params, "product",
                                           params.products)

            sql_query = " ".join((sql, " OR ".join(sql_product_version_ids),
                                  " ) " + sql_group_order))

        error_message = "Failed to retrieve urls for signature from PostgreSQL"
        results = self.query(sql_query,
                             sql_params,
                             error_message=error_message)
        urls = results.zipped()
        return {"hits": urls, "total": len(urls)}
Exemplo n.º 29
0
 def get(self, **kwargs):
     self.context.logger.info('Running %s' % self.__class__.__name__)
     raise MissingArgumentError('missing arg')
Exemplo n.º 30
0
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Both `from_date` and `to_date` (and their aliases `from` and `to`)
        are required and can not be greater than 30 days apart.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        if not kwargs.get('from_date'):
            raise MissingArgumentError('from_date')
        if not kwargs.get('to_date'):
            raise MissingArgumentError('to_date')

        from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date'])
        to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date'])
        span_days = (to_date - from_date).days
        if span_days > 30:
            raise BadArgumentError(
                'Span between from_date and to_date can not be more than 30'
            )

        # start with the default
        sort_order = {
            'key': 'date_processed',
            'direction': 'DESC'
        }
        if 'sort' in kwargs:
            sort_order['key'] = kwargs.pop('sort')
            _recognized_sort_orders = (
                'date_processed',
                'uptime',
                'user_comments',
                'uuid',
                'uuid_text',
                'product',
                'version',
                'build',
                'signature',
                'url',
                'os_name',
                'os_version',
                'cpu_name',
                'cpu_info',
                'address',
                'reason',
                'last_crash',
                'install_age',
                'hangid',
                'process_type',
                'release_channel',
                'install_time',
                'duplicate_of',
            )
            if sort_order['key'] not in _recognized_sort_orders:
                raise BadArgumentError(
                    '%s is not a recognized sort order key' % sort_order['key']
                )
            sort_order['direction'] = 'ASC'
            if str(kwargs.get('reverse', '')).lower() == 'true':
                if kwargs.pop('reverse'):
                    sort_order['direction'] = 'DESC'

        include_raw_crash = kwargs.get('include_raw_crash') or False
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"],
                params["plugin_search_mode"]
            )

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"], params["products"]) = self.parse_versions(
            params["versions"],
            params["products"]
        )

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"][:3] == elem[:3]:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {
        }

        # Preparing the different parts of the sql query
        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid::uuid,
                r.uuid as uuid_text,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                r.release_channel,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                  AS install_time
        """
        if include_raw_crash:
            pass
        else:
            sql_select += """
                , rd.duplicate_of
            """

        wrapped_select = """
            WITH report_slice AS (
              %s
            ), dupes AS (
                SELECT
                    report_slice.uuid,
                    rd.duplicate_of
                FROM reports_duplicates rd
                JOIN report_slice ON report_slice.uuid_text = rd.uuid
                WHERE
                    rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
            )

            SELECT
                rs.*,
                dupes.duplicate_of,
                rc.raw_crash
            FROM report_slice rs
            LEFT OUTER JOIN dupes USING (uuid)
            LEFT OUTER JOIN raw_crashes rc ON
                rs.uuid = rc.uuid
                AND
                rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
        """

        sql_from = self.build_reports_sql_from(params)

        if not include_raw_crash:
            sql_from = """%s
                LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
            """ % sql_from

        sql_where, sql_params = self.build_reports_sql_where(
            params,
            sql_params,
            self.context
        )

        sql_order = """
            ORDER BY %(key)s %(direction)s
        """ % sort_order

        sql_limit, sql_params = self.build_reports_sql_limit(
            params,
            sql_params
        )

        # Assembling the query
        if include_raw_crash:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )
        else:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )

        # Query for counting the results
        sql_count_query = "\n".join((
            "/* socorro.external.postgresql.report.Report.list */",
            "SELECT count(*)", sql_from, sql_where)
        )

        # Querying the DB
        with self.get_connection() as connection:

            total = self.count(
                sql_count_query,
                sql_params,
                error_message="Failed to count crashes from reports.",
                connection=connection
            )

            # No need to call Postgres if we know there will be no results
            if total:

                if include_raw_crash:
                    sql_query = wrapped_select % sql_query

                results = self.query(
                    sql_query,
                    sql_params,
                    error_message="Failed to retrieve crashes from reports",
                    connection=connection
                ).zipped()
            else:
                results = []

        crashes = []
        for crash in results:
            assert crash['uuid'] == crash['uuid_text']
            crash.pop('uuid_text')
            if not include_raw_crash and 'raw_crash' in crash:
                crash.pop('raw_crash')
            for i in crash:
                try:
                    crash[i] = datetimeutil.date_to_string(crash[i])
                except TypeError:
                    pass
            crashes.append(crash)

        return {
            "hits": crashes,
            "total": total
        }