def get(self, **kwargs): """Return JSON data of a crash report, given its uuid. """ filters = [ ('uuid', None, str), ('datatype', None, str), ('name', None, str) # only applicable if datatype == 'raw' ] params = external_common.parse_arguments(filters, kwargs, modern=True) if not params.uuid: raise MissingArgumentError('uuid') if not params.datatype: raise MissingArgumentError('datatype') datatype_method_mapping = { 'raw': 'get_raw_dump', 'meta': 'get_raw_crash', 'processed': 'get_processed', 'unredacted': 'get_unredacted_processed', } get = self.__getattribute__(datatype_method_mapping[params.datatype]) try: if params.datatype == 'raw': return get(params.uuid, name=params.name) else: return get(params.uuid) except CrashIDNotFound: # The CrashIDNotFound exception that happens inside the # crashstorage is too revealing as exception message # contains information about buckets and prefix keys. # Re-wrap it here so the message is just the crash ID. raise CrashIDNotFound(params.uuid)
def create(self, **kwargs): """Add a new job to the priority queue """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingArgumentError('uuid') with self.context() as connection: try: self.config.logger.debug( 'Inserting priority job into RabbitMQ %s', params.uuid) connection.channel.basic_publish( exchange='', routing_key=self.config.priority_queue_name, body=params.uuid, properties=pika.BasicProperties(delivery_mode=2)) except ChannelClosed: self.config.logger.error( "Failed inserting priorityjobs data into RabbitMQ", exc_info=True) return False return True
def get(self, **kwargs): '''Return data about a field from its name. ''' filters = [ ('name', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) if not params.name: raise MissingArgumentError("name") sql = '''/* socorro.external.postgresql.field.Field.get */ SELECT raw_field AS name, transforms, product FROM data_dictionary WHERE raw_field=%(name)s ''' error_message = 'Failed to retrieve field data from PostgreSQL' results = self.query(sql, params, error_message=error_message) field_data = { 'name': None, 'transforms': None, 'product': None } if not results: return field_data field_data = results.zipped()[0] return field_data
def get(self, **kwargs): filters = [ ("vendor_hex", None, ["list", "str"]), ("adapter_hex", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) for key in ('vendor_hex', 'adapter_hex'): param = params[key] if not param: raise MissingArgumentError(key) params[key] = tuple(params[key]) sql_query = """ SELECT vendor_hex, adapter_hex, vendor_name, adapter_name FROM graphics_device WHERE vendor_hex IN %(vendor_hex)s AND adapter_hex IN %(adapter_hex)s """ results = self.query(sql_query, params) hits = results.zipped() return {'hits': hits, 'total': len(hits)}
def get(self, **kwargs): """Return a dict that holds the throttling value per build type for a specific product.""" filters = [ ('product', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) required = ('product', ) for key in required: if not params.get(key): raise MissingArgumentError(key) sql = """ SELECT build_type, throttle::REAL FROM product_build_types WHERE product_name = %(product)s """ results = self.query(sql, params) build_types = {} for row in results.zipped(): build_types[row['build_type']] = row['throttle'] return { 'hits': build_types, }
def get_adu_by_signature(self, **kwargs): """Return a list of ADUs and crash counts by signature and ADU date """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("signature", None, "str"), ("channel", None, "str"), ("product_name", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) for param in ("start_date", "end_date", "signature", "channel"): if not params[param]: raise MissingArgumentError(param) if (params.end_date - params.start_date) > datetime.timedelta(days=365): raise BadArgumentError('Duration too long. Max 365 days.') sql_query = """ SELECT product_name, signature, adu_date::TEXT, build_date::TEXT, buildid::TEXT, crash_count, adu_count, os_name, channel FROM crash_adu_by_build_signature WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s AND product_name = %(product_name)s AND channel = %(channel)s AND signature = %(signature)s ORDER BY buildid """ error_message = ( "Failed to retrieve crash ADU by build signature from PostgreSQL") results = self.query(sql_query, params, error_message=error_message) crashes = results.zipped() return {"hits": crashes, "total": len(crashes)}
def prepare_search_params(self, **kwargs): """Return a dictionary of parameters for a search-like SQL query. Uses socorro.middleware.search_common.get_parameters() for arguments filtering. """ params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Crashes.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Crashes.parse_versions(params["versions"], params["products"]) # Changing the OS ids to OS names if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] return params
def get_count_by_day(self, **kwargs): """Returns the number of crashes on a daily basis""" filters = [("signature", None, "str"), ("start_date", None, "date"), ("end_date", None, "date")] DATE_FORMAT = "%Y-%m-%d" params = external_common.parse_arguments(filters, kwargs) for param in ("signature", "start_date"): if not params[param]: raise MissingArgumentError(param) if not params.end_date: params.end_date = params.start_date + datetime.timedelta(1) sql = """ SELECT COUNT(*), date_processed::date FROM reports_clean rc JOIN signatures ON rc.signature_id=signatures.signature_id WHERE rc.date_processed >= %(start_date)s AND rc.date_processed::date < %(end_date)s AND signatures.signature=%(signature)s GROUP BY rc.date_processed::date """ hits = {} for count, date in self.query(sql, params): hits[date.strftime(DATE_FORMAT)] = count current = params.start_date while current < params.end_date: hits.setdefault(current.strftime(DATE_FORMAT), 0) current += datetime.timedelta(1) return {"hits": hits, "total": len(hits)}
def get(self, **kwargs): """Return a list of signatures-to-bug_ids or bug_ids-to-signatures associations. """ params = external_common.parse_arguments(self.filters, kwargs, modern=True) if not params['signatures'] and not params['bug_ids']: raise MissingArgumentError('specify one of signatures or bug_ids') elif params['signatures'] and params['bug_ids']: raise BadArgumentError('specify only one of signatures or bug_ids') sql_params = [] if params['signatures']: sql_params.append(tuple(params.signatures)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE EXISTS( SELECT 1 FROM bug_associations WHERE bug_associations.bug_id = bugs.id AND signature IN %s ) """ elif params['bug_ids']: sql_params.append(tuple(params.bug_ids)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE bugs.id IN %s """ error_message = "Failed to retrieve bug associations from PostgreSQL" results = self.query(sql, sql_params, error_message=error_message) bugs = results.zipped() return {"hits": bugs, "total": len(bugs)}
def post(self, **kwargs): '''Return the result of a custom query. ''' params = external_common.parse_arguments(self.filters, kwargs) if not params.query: raise MissingArgumentError('query') try: query = json.loads(params.query) except ValueError: raise BadArgumentError( 'query', msg="Invalid JSON value for parameter 'query'") # Set indices. indices = [] if not params.indices: # By default, use the last two indices. today = datetimeutil.utc_now() last_week = today - datetime.timedelta(days=7) indices = self.generate_list_of_indexes(last_week, today) elif len(params.indices) == 1 and params.indices[0] == 'ALL': # If we want all indices, just do nothing. pass else: indices = params.indices search_args = {} if indices: search_args['index'] = indices search_args['doc_type'] = ( self.config.elasticsearch.elasticsearch_doctype) connection = self.get_connection() try: results = connection.search(body=query, **search_args) except elasticsearch.exceptions.NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] raise ResourceNotFound("elasticsearch index '%s' does not exist" % missing_index)
def get(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build::VARCHAR FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = results.zipped() return {'hits': signatures, 'total': len(signatures)}
def get(self, **kwargs): """ return GC crashes per build ID """ for arg in ['product', 'version']: if not kwargs.get(arg): raise MissingArgumentError(arg) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "date"), ("to_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) result = self.query( """ /* socorro.external.postgresql.gccrashes.GCCrashes.get */ SELECT build::text, sum(gc_count_madu) FROM gccrashes JOIN product_versions USING (product_version_id) WHERE product_name = %(product)s AND version_string = %(version)s AND report_date BETWEEN %(from_date)s AND %(to_date)s AND build IS NOT NULL GROUP BY build ORDER BY build """, params) # Because we don't return a list of dicts, we turn it into a # pure list first so it becomes a list of tuples. rows = list(result) return {'hits': rows, 'total': len(rows)}
def get(self, **kwargs): filters = [ ("backfill_type", None, "str"), ("reports_clean", True, "bool"), ("check_period", '01:00:00', "str"), ("table_name", None, "str"), ("update_day", None, "datetime"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) if not params.backfill_type: raise MissingArgumentError('backfill_type') date_param = ['update_day', 'start_date', 'end_date'] for i in date_param: if i in kwargs: params[i] = str(params[i].date()) try: query = 'SELECT backfill_%(backfill_type)s (%(params)s); ' required_params = BACKFILL_PARAMETERS[params.backfill_type] query_params = [(i, params[i]) for i in required_params] query_params_str = ', '.join( '%(' + str(i[0]) + ')s' for i in query_params ) query = query % {'backfill_type': params.backfill_type, 'params': query_params_str} except: raise BadArgumentError(kwargs['backfill_type']) error_message = "Failed to retrieve backfill %s from PostgreSQL" error_message = error_message % kwargs['backfill_type'] results = self.query(query, params, error_message=error_message) return results
def post(self, **kwargs): try: data = kwargs['data'] if data is None: raise BadArgumentError('POST data sent was null') except AttributeError: raise MissingArgumentError('No POST data sent') except ValueError: raise BadArgumentError('Posted data not valid JSON') except TypeError: # happens if kwargs['data'] is None raise BadArgumentError('POST data sent was empty') # make an upsert for each thing and rollback if any failed upsert = """ WITH update_graphics_device AS ( UPDATE graphics_device SET adapter_name = %(adapter_name)s, vendor_name = %(vendor_name)s WHERE vendor_hex = %(vendor_hex)s AND adapter_hex = %(adapter_hex)s RETURNING 1 ), insert_graphics_device AS ( INSERT INTO graphics_device (vendor_hex, adapter_hex, vendor_name, adapter_name) SELECT %(vendor_hex)s AS vendor_hex, %(adapter_hex)s AS adapter_hex, %(vendor_name)s AS vendor_name, %(adapter_name)s AS adapter_name WHERE NOT EXISTS ( SELECT * FROM graphics_device WHERE vendor_hex = %(vendor_hex)s AND adapter_hex = %(adapter_hex)s LIMIT 1 ) RETURNING 2 ) SELECT * FROM update_graphics_device UNION ALL SELECT * FROM insert_graphics_device """ with self.get_connection() as connection: try: for row in data: self.query(upsert, row, connection=connection) connection.commit() return True except (psycopg2.Error, KeyError): # KeyErrors happen if any of the rows don't have # all the required keys connection.rollback() return False
def get(self, **kwargs): """Return JSON data of a crash report, given its uuid. """ filters = [ ('uuid', None, 'str'), ('datatype', None, 'str'), ('name', None, 'str') # only applicable if datatype == 'raw' ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingArgumentError('uuid') if not params.datatype: raise MissingArgumentError('datatype') # get a generic crashstorage instance from whatever external resource # is implementing this service. store = self.get_storage() datatype_method_mapping = { 'raw': 'get_raw_dump', 'meta': 'get_raw_crash', 'processed': 'get_processed', 'unredacted': 'get_unredacted_processed', } get = store.__getattribute__(datatype_method_mapping[params.datatype]) try: if params.datatype == 'raw': return ( get(params.uuid, name=params.name), 'application/octet-stream' ) else: return get(params.uuid) except CrashIDNotFound: if params.datatype in ('processed', 'unredacted'): # try to fetch a raw crash just to ensure that the raw crash # exists. If this line fails, there's no reason to actually # submit the priority job. try: store.get_raw_crash(params.uuid) except CrashIDNotFound: raise ResourceNotFound(params.uuid) # search through the existing other services to find the # Priorityjob service. try: priorityjob_service_impl = self.all_services[ 'Priorityjobs' ] except KeyError: raise ServiceUnavailable('Priorityjobs') # get the underlying implementation of the Priorityjob # service and instantiate it. priority_job_service = priorityjob_service_impl.cls( config=self.config ) # create the priority job for this crash_ids priority_job_service.create(uuid=params.uuid) raise ResourceUnavailable(params.uuid) raise ResourceNotFound(params.uuid)
def get(self, **kwargs): yesterday = datetime.datetime.utcnow() - datetime.timedelta(1) tomorrow = yesterday + datetime.timedelta(2) yesterday = yesterday.date() tomorrow = tomorrow.date() filters = [ ('start_date', yesterday, 'date'), ('end_date', tomorrow, 'date'), ('product', '', 'str'), ('versions', [], 'list'), ('platforms', [], 'list'), ] params = external_common.parse_arguments(filters, kwargs) required = ( 'start_date', 'end_date', 'product', 'versions', 'platforms', ) missing = [] for each in required: if not params.get(each): missing.append(each) if missing: raise MissingArgumentError(', '.join(missing)) sql_versions = [] for i, version in enumerate(params['versions'], start=1): key = 'version{}'.format(i) # We make a very special exception for versions that end with # the letter 'b'. It means it's a beta version and when some # queries on that version they actually mean all # the "sub-versions". For example version="19.0b" actually # means "all versions starting with '19.0b'". # This is succinct with what we do in SuperSearch. if version.endswith('b'): # exception! sql_versions.append( 'pv.version_string LIKE %({})s'.format(key)) version += '%' else: # the norm sql_versions.append('pv.version_string = %({})s'.format(key)) params[key] = version sql = """ SELECT SUM(adu_count)::BIGINT AS adi_count, adu_date AS date, pv.build_type, pv.version_string AS version FROM product_adu LEFT OUTER JOIN product_versions pv USING (product_version_id) WHERE pv.product_name = %(product)s AND ({}) AND os_name IN %(platforms)s AND adu_date BETWEEN %(start_date)s AND %(end_date)s GROUP BY adu_date, build_type, version_string """.format(' OR '.join(sql_versions)) params['platforms'] = tuple(params['platforms']) assert isinstance(params, dict) results = self.query(sql, params) rows = results.zipped() return {'hits': rows, 'total': len(rows)}
def get(self, **kwargs): self.context.logger.info('Running %s' % self.__class__.__name__) raise MissingArgumentError('missing arg')
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Both `from_date` and `to_date` (and their aliases `from` and `to`) are required and can not be greater than 30 days apart. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if not kwargs.get('from_date'): raise MissingArgumentError('from_date') if not kwargs.get('to_date'): raise MissingArgumentError('to_date') from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date']) to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date']) span_days = (to_date - from_date).days if span_days > 30: raise BadArgumentError( 'Span between from_date and to_date can not be more than 30' ) # start with the default sort_order = { 'key': 'date_processed', 'direction': 'DESC' } if 'sort' in kwargs: sort_order['key'] = kwargs.pop('sort') _recognized_sort_orders = ( 'date_processed', 'uptime', 'user_comments', 'uuid', 'uuid_text', 'product', 'version', 'build', 'signature', 'url', 'os_name', 'os_version', 'cpu_name', 'cpu_info', 'address', 'reason', 'last_crash', 'install_age', 'hangid', 'process_type', 'release_channel', 'install_time', 'duplicate_of', ) if sort_order['key'] not in _recognized_sort_orders: raise BadArgumentError( '%s is not a recognized sort order key' % sort_order['key'] ) sort_order['direction'] = 'ASC' if str(kwargs.get('reverse', '')).lower() == 'true': if kwargs.pop('reverse'): sort_order['direction'] = 'DESC' include_raw_crash = kwargs.get('include_raw_crash') or False params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"] ) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions( params["versions"], params["products"] ) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid::uuid, r.uuid as uuid_text, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, r.release_channel, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time """ if include_raw_crash: pass else: sql_select += """ , rd.duplicate_of """ wrapped_select = """ WITH report_slice AS ( %s ), dupes AS ( SELECT report_slice.uuid, rd.duplicate_of FROM reports_duplicates rd JOIN report_slice ON report_slice.uuid_text = rd.uuid WHERE rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s ) SELECT rs.*, dupes.duplicate_of, rc.raw_crash FROM report_slice rs LEFT OUTER JOIN dupes USING (uuid) LEFT OUTER JOIN raw_crashes rc ON rs.uuid = rc.uuid AND rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s """ sql_from = self.build_reports_sql_from(params) if not include_raw_crash: sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from sql_where, sql_params = self.build_reports_sql_where( params, sql_params, self.context ) sql_order = """ ORDER BY %(key)s %(direction)s """ % sort_order sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params ) # Assembling the query if include_raw_crash: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) else: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) # Query for counting the results sql_count_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where) ) # Querying the DB with self.get_connection() as connection: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from reports.", connection=connection ) # No need to call Postgres if we know there will be no results if total: if include_raw_crash: sql_query = wrapped_select % sql_query results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from reports", connection=connection ).zipped() else: results = [] crashes = [] for crash in results: assert crash['uuid'] == crash['uuid_text'] crash.pop('uuid_text') if not include_raw_crash and 'raw_crash' in crash: crash.pop('raw_crash') for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return { "hits": crashes, "total": total }
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Require that the list of fields be passed. if not kwargs.get('_fields'): raise MissingArgumentError('_fields') self.all_fields = kwargs['_fields'] # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = [] histogram_intervals = {} for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): # By default, all param values are turned into lists, # even when they have and can have only one value. # For those we know there can only be one value, # so we just extract it from the made-up list. if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] if results_number > 1000: raise BadArgumentError( '_results_number', msg=('_results_number cannot be greater ' 'than 1,000')) if results_number < 0: raise BadArgumentError( '_results_number', msg='_results_number cannot be negative') elif param.name == '_facets_size': facets_size = param.value[0] for f in self.histogram_fields: if param.name == '_histogram_interval.%s' % f: histogram_intervals[f] = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = '%s.%s' % (field_data['namespace'], field_data['in_database_name']) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] # Operators needing wildcards, and the associated value # transformation with said wildcards. operator_wildcards = { '~': '*%s*', # contains '^': '%s*', # starts with '$': '*%s' # ends with } # Operators needing ranges, and the associated Elasticsearch # comparison operator. operator_range = { '>': 'gt', '<': 'lt', '>=': 'gte', '<=': 'lte', } args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or ' ' not in val: # There's only one term and no white space, this # is a simple term filter. filter_value = val else: # If the term contains white spaces, we want to # perform a phrase query. filter_type = 'query' args = Q( 'simple_query_string', query=param.value[0], fields=[name], default_operator='and', ).to_dict() else: # There are several terms, this is a terms filter. filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_range: filter_type = 'range' filter_value = { operator_range[param.operator]: param.value } elif param.operator == '__null__': filter_type = 'missing' args['field'] = name elif param.operator == '__true__': filter_type = 'term' filter_value = True elif param.operator == '@': filter_type = 'regexp' if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_wildcards: filter_type = 'query' # Wildcard operations are better applied to a non-analyzed # field (called "full") if there is one. if field_data['has_full_version']: name = '%s.full' % name q_args = {} q_args[name] = (operator_wildcards[param.operator] % param.value) query = Q('wildcard', **q_args) args = query.to_dict() if filter_value is not None: args[name] = filter_value if args: new_filter = F(filter_type, **args) if param.operator_not: new_filter = ~new_filter if sub_filters is None: sub_filters = new_filter elif filter_type == 'range': sub_filters &= new_filter else: sub_filters |= new_filter continue if sub_filters is not None: filters.append(sub_filters) search = search.filter(F('bool', must=filters)) # Restricting returned fields. fields = [] # We keep track of the requested columns in order to make sure we # return those column names and not aliases for example. self.request_columns = [] for param in params['_columns']: for value in param.value: if not value: continue self.request_columns.append(value) field_name = self.get_field_name(value, full=False) fields.append(field_name) search = search.fields(fields) # Sorting. sort_fields = [] for param in params['_sort']: for value in param.value: if not value: continue # Values starting with a '-' are sorted in descending order. # In order to retrieve the database name of the field, we # must first remove the '-' part and add it back later. # Example: given ['product', '-version'], the results will be # sorted by ascending product then descending version. desc = False if value.startswith('-'): desc = True value = value[1:] field_name = self.get_field_name(value) if desc: # The underlying library understands that '-' means # sorting in descending order. field_name = '-' + field_name sort_fields.append(field_name) search = search.sort(*sort_fields) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. if facets_size: self._create_aggregations(params, search, facets_size, histogram_intervals) # Query and compute results. hits = [] if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } errors = [] # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = getattr(results, 'aggregations', {}) if aggregations: aggregations = self.format_aggregations(aggregations) shards = getattr(results, '_shards', {}) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise errors.append({ 'type': 'missing_index', 'index': missing_index, }) if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} shards = None break
def get_exploitability(self, **kwargs): """Return a list of exploitable crash reports. See socorrolib.lib.external_common.parse_arguments() for all filters. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("product", None, "str"), ("version", None, "str"), ("page", None, "int"), ("batch", None, "int"), ] params = external_common.parse_arguments(filters, kwargs) sql_where = """ report_date BETWEEN %(start_date)s AND %(end_date)s AND null_count + none_count + low_count + medium_count + high_count > 4 """ if params.product: sql_where += " AND pv.product_name = %(product)s" if params.version: sql_where += " AND pv.version_string = %(version)s" inner_with_sql = """ SELECT signature, SUM(high_count) AS high_count, SUM(medium_count) AS medium_count, SUM(low_count) AS low_count, SUM(null_count) AS null_count, SUM(none_count) AS none_count, SUM(high_count) + SUM(medium_count) AS med_or_high FROM exploitability_reports JOIN product_versions AS pv USING (product_version_id) WHERE high_count + medium_count + null_count + none_count > 4 AND %s GROUP BY signature """ % (sql_where, ) count_sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ WITH sums AS ( %s ) SELECT count(signature) FROM sums """ % (inner_with_sql, ) results = self.query( count_sql_query, params, error_message="Failed to retrieve exploitable crashes count") total_crashes_count, = results[0] sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ WITH sums AS ( %s ) SELECT signature, high_count, medium_count, low_count, null_count, none_count FROM sums ORDER BY med_or_high DESC, signature ASC """ % (inner_with_sql, ) if params['page'] is not None: if params['page'] <= 0: raise BadArgumentError('page', params['page'], 'starts on 1') if params['batch'] is None: raise MissingArgumentError('batch') sql_query += """ LIMIT %(limit)s OFFSET %(offset)s """ params['limit'] = params['batch'] params['offset'] = params['batch'] * (params['page'] - 1) error_message = ( "Failed to retrieve exploitable crashes from PostgreSQL") results = self.query(sql_query, params, error_message=error_message) # Transforming the results into what we want crashes = results.zipped() return {"hits": crashes, "total": total_crashes_count}
def get_signature_history(self, **kwargs): """Return the history of a signature. See https://socorro.readthedocs.io/en/latest/middleware.html """ now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(days=7) filters = [ ('product', None, 'str'), ('version', None, 'str'), ('signature', None, 'str'), ('end_date', now, 'datetime'), ('start_date', lastweek, 'datetime'), ] params = external_common.parse_arguments(filters, kwargs) for param in ('product', 'version', 'signature'): if not params[param]: raise MissingArgumentError(param) if params.signature == '##null##': signature_where = 'AND signature IS NULL' else: signature_where = 'AND signature = %(signature)s' if params.signature == '##empty##': params.signature = '' sql = """ /* external.postgresql.crashes.Crashes.get_signature_history */ WITH hist AS ( SELECT report_date, report_count FROM tcbs JOIN signatures using (signature_id) JOIN product_versions using (product_version_id) WHERE report_date BETWEEN %%(start_date)s AND %%(end_date)s AND product_name = %%(product)s AND version_string = %%(version)s %s GROUP BY report_date, report_count ORDER BY 1 ), scaling_window AS ( SELECT hist.*, SUM(report_count) over () AS total_crashes FROM hist ) SELECT report_date AS date, report_count AS count, report_count / total_crashes::float * 100 AS percent_of_total FROM scaling_window ORDER BY report_date DESC """ % signature_where error_message = 'Failed to retrieve signature history from PostgreSQL' results = self.query(sql, params, error_message=error_message) # Transforming the results into what we want history = [] for dot in results.zipped(): dot['date'] = datetimeutil.date_to_string(dot['date']) history.append(dot) return {'hits': history, 'total': len(history)}
def get_daily(self, **kwargs): """Return crashes by active daily users. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("versions", None, ["list", "str"]), ("from_date", lastweek, "date"), ("to_date", now, "date"), ("os", None, ["list", "str"]), ("report_type", None, ["list", "str"]), ("date_range_type", "date", "str"), ] # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = external_common.parse_arguments(filters, kwargs) if not params.product: raise MissingArgumentError('product') if not params.versions or not params.versions[0]: raise MissingArgumentError('versions') params.versions = tuple(params.versions) # simple version, for home page graphs mainly if ((not params.os or not params.os[0]) and (not params.report_type or not params.report_type[0])): if params.date_range_type == "build": table_to_use = "home_page_graph_build_view" date_range_field = "build_date" else: table_to_use = "home_page_graph_view" date_range_field = "report_date" db_fields = ("product_name", "version_string", date_range_field, "report_count", "adu", "crash_hadu") out_fields = ("product", "version", "date", "report_count", "adu", "crash_hadu") sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use } # complex version, for daily crashes page mainly else: if params.date_range_type == "build": table_to_use = "crashes_by_user_build_view" date_range_field = "build_date" else: table_to_use = "crashes_by_user_view" date_range_field = "report_date" db_fields = [ "product_name", "version_string", date_range_field, "sum(adjusted_report_count)::bigint as report_count", "sum(adu)::bigint as adu", """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint, avg(throttle)) as crash_hadu""", "avg(throttle) as throttle" ] out_fields = [ "product", "version", "date", "report_count", "adu", "crash_hadu", "throttle" ] db_group = ["product_name", "version_string", date_range_field] sql_where = [] if params.os and params.os[0]: sql_where.append("os_short_name IN %(os)s") params.os = tuple(x[0:3].lower() for x in params.os) if params.report_type and params.report_type[0]: sql_where.append("crash_type_short IN %(report_type)s") params.report_type = tuple(params.report_type) if sql_where: sql_where = "AND %s" % " AND ".join(sql_where) else: sql_where = '' sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM ( SELECT product_name, version_string, %(date_range_field)s, os_name, os_short_name, SUM(report_count)::int as report_count, SUM(adjusted_report_count)::int as adjusted_report_count, MAX(adu) as adu, AVG(throttle) as throttle FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s %(sql_where)s GROUP BY product_name, version_string, %(date_range_field)s, os_name, os_short_name ) as aggregated_crashes_by_user """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use, "sql_where": sql_where } if db_group: sql = "%s GROUP BY %s" % (sql, ", ".join(db_group)) error_message = "Failed to retrieve daily crashes data from PostgreSQL" results = self.query(sql, params, error_message=error_message) hits = {} for row in results: daily_data = dict(zip(out_fields, row)) if "throttle" in daily_data: daily_data["throttle"] = float(daily_data["throttle"]) daily_data["crash_hadu"] = float(daily_data["crash_hadu"]) daily_data["date"] = datetimeutil.date_to_string( daily_data["date"]) key = "%s:%s" % (daily_data["product"], daily_data["version"]) if "os_short" in daily_data: del daily_data["os_short"] if key not in hits: hits[key] = {} hits[key][daily_data["date"]] = daily_data return {"hits": hits}
def get(self, **kwargs): """ Return urls for signature """ filters = [ ("signature", None, "str"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("products", None, ["list", "str"]), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) #Because no parameters are optional, we need to loop through #all parameters to ensure each has been set and is not None missingParams = [] for param in params: if not params[param]: if param == 'versions': # force versions parameter to being 'ALL' if empty params[param] = 'ALL' continue missingParams.append(param) if len(missingParams) > 0: raise MissingArgumentError(", ".join(missingParams)) all_products_versions_sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' """ sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) JOIN product_versions USING ( product_version_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' AND ( """ sql_group_order = """ GROUP BY url ORDER BY crash_count DESC LIMIT 100""" sql_params = { "start_date": params.start_date, "end_date": params.end_date, "signature": params.signature } # if this query is for all products the 'ALL' keyword will be # the only item in the products list and this will then also # be for all versions. if 'ALL' in params['products']: sql_query = " ".join((all_products_versions_sql, sql_group_order)) # if this query is for all versions the 'ALL' keyword will be # the only item in the versions list. elif 'ALL' in params['versions']: sql_products = " product_name IN %(products)s )" sql_params['products'] = tuple(params.products) sql_date_range_limit = """AND %(end_date)s BETWEEN product_versions.build_date AND product_versions.sunset_date""" sql_query = " ".join((sql, sql_products, sql_date_range_limit, sql_group_order)) else: products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) if len(params["products_versions"]) == 0: raise BadArgumentError(", ".join(params["versions"])) versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) product_version_list = [] for prod in params["products"]: versions = [] [versions.append(versions_list[i]) for i, x in enumerate(products_list) if x == prod] product_version_list.append(tuple(versions)) sql_product_version_ids = [ """( product_name = %%(product%s)s AND version_string IN %%(version%s)s ) """ % (x, x) for x in range(len(product_version_list))] sql_params = add_param_to_dict(sql_params, "version", product_version_list) sql_params = add_param_to_dict(sql_params, "product", params.products) sql_query = " ".join((sql, " OR ".join(sql_product_version_ids), " ) " + sql_group_order)) error_message = "Failed to retrieve urls for signature from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) urls = results.zipped() return { "hits": urls, "total": len(urls) }
def get_parameters(self, **kwargs): parameters = {} fields = kwargs['_fields'] assert fields if fields: self.build_filters(fields) for param in self.filters: values = kwargs.get(param.name, param.default) if values in ('', []): # Those values are equivalent to None here. # Note that we cannot use bool(), because 0 is not equivalent # to None in our case. values = None if values is None and param.mandatory: raise MissingArgumentError(param.name) if values is None and param.default is not None: values = param.default # all values can be a list, so we make them all lists to simplify if values is not None and not isinstance(values, (list, tuple)): values = [values] if values is not None: # There should only be one parameter with no operator, and # we want to stack all values into it. That's why we want # to keep track of it. # Actually, we want _two_ parameters with no operator: one # for each possible value of "operator_not". no_operator_param = { True: None, False: None } for value in values: operator = None operator_not = False operators = OPERATORS_MAP.get( param.data_type, OPERATORS_MAP['default'] ) if isinstance(value, basestring): if value.startswith(OPERATOR_NOT): operator_not = True value = value[1:] for ope in operators: if value.startswith(ope): operator = ope value = value[len(ope):] break # ensure the right data type try: value = convert_to_type(value, param.data_type) except ValueError: raise BadArgumentError( param.name, msg='Bad value for parameter %s:' ' "%s" is not a valid %s' % (param.name, value, param.data_type) ) if param.name not in parameters: parameters[param.name] = [] if not operator: if not no_operator_param[operator_not]: no_operator_param[operator_not] = SearchParam( param.name, [value], operator, param.data_type, operator_not ) else: no_operator_param[operator_not].value.append(value) else: parameters[param.name].append(SearchParam( param.name, value, operator, param.data_type, operator_not )) for value in no_operator_param.values(): if value: parameters[value.name].append(value) self.fix_date_parameter(parameters) self.fix_process_type_parameter(parameters) self.fix_hang_type_parameter(parameters) self.fix_version_parameter(parameters) return parameters