def get(self, **kwargs): filters = [ ("vendor_hex", None, ["list", "str"]), ("adapter_hex", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) for key in ('vendor_hex', 'adapter_hex'): param = params[key] if not param: raise MissingArgumentError(key) params[key] = tuple(params[key]) sql_query = """ SELECT vendor_hex, adapter_hex, vendor_name, adapter_name FROM graphics_device WHERE vendor_hex IN %(vendor_hex)s AND adapter_hex IN %(adapter_hex)s """ results = self.query(sql_query, params) hits = results.zipped() return { 'hits': hits, 'total': len(hits) }
def post(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = [] for sig in results.zipped(): sig['first_date'] = datetimeutil.date_to_string(sig['first_date']) signatures.append(sig) return { 'hits': signatures, 'total': len(signatures) }
def create(self, **kwargs): """Add a new job to the priority queue """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingArgumentError('uuid') with self.context() as connection: try: self.config.logger.debug( 'Inserting priority job into RabbitMQ %s', params.uuid) connection.channel.basic_publish( exchange='', routing_key=self.config.priority_queue_name, body=params.uuid, properties=pika.BasicProperties(delivery_mode=2)) except ChannelClosed: self.config.logger.error( "Failed inserting priorityjobs data into RabbitMQ", exc_info=True) return False return True
def get_signatures(self, **kwargs): """Return top crashers by signatures. See http://socorro.readthedocs.org/en/latest/middleware.html#tcbs """ filters = [ ("product", None, "str"), ("version", None, "str"), ("crash_type", "all", "str"), ("to_date", datetimeutil.utc_now(), "datetime"), ("duration", datetime.timedelta(7), "timedelta"), ("os", None, "str"), ("limit", 100, "int"), ("date_range_type", None, "str") ] params = external_common.parse_arguments(filters, kwargs) params.logger = logger # what the twoPeriodTopCrasherComparison() function does is that it # makes a start date from taking the to_date - duration if params.duration > datetime.timedelta(30): raise BadArgumentError('Duration too long. Max 30 days.') with self.get_connection() as connection: return tcbs.twoPeriodTopCrasherComparison(connection, params)
def get_default_version(self, **kwargs): """Return the default version of one or several products. """ filters = [ ("products", None, ["list", "str"]) ] params = external_common.parse_arguments(filters, kwargs) sql = """ /* socorro.external.postgresql.products.get_default_version */ SELECT product_name AS product, version_string AS version FROM default_versions """ if params.products and params.products[0] != "": params.products = tuple(params.products) sql = "%s WHERE product_name IN %%(products)s" % sql error_message = "Failed to retrieve default versions from PostgreSQL" results = self.query(sql, params, error_message=error_message) products = {} for product in results.zipped(): products[product['product']] = product['version'] return { "hits": products }
def get(self, **kwargs): """Return a dict that holds the throttling value per build type for a specific product.""" filters = [ ('product', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) required = ('product',) for key in required: if not params.get(key): raise MissingArgumentError(key) sql = """ SELECT build_type, throttle FROM product_build_types WHERE product_name = %(product)s """ results = self.query(sql, params) build_types = {} for row in results.zipped(): build_types[row['build_type']] = row['throttle'] return { 'hits': build_types, }
def get_default_version(self, **kwargs): """Return the default version of one or several products. """ filters = [("products", None, ["list", "str"])] params = external_common.parse_arguments(filters, kwargs) sql = """ /* socorro.external.postgresql.products.get_default_version */ SELECT product_name AS product, version_string AS version FROM default_versions """ if params.products and params.products[0] != "": params.products = tuple(params.products) sql = "%s WHERE product_name IN %%(products)s" % sql error_message = "Failed to retrieve default versions from PostgreSQL" results = self.query(sql, params, error_message=error_message) products = {} for product in results.zipped(): products[product['product']] = product['version'] return {"hits": products}
def _get_sql_params(self, **kwargs): filters = [ ( 'date', ( datetime.datetime.utcnow() - datetime.timedelta(days=1) ).date(), 'date' ), ( 'limit', None, int ), ] params = external_common.parse_arguments(filters, kwargs) sql = """ SELECT debug_file, debug_id, code_file, code_id FROM missing_symbols WHERE date_processed = %(date)s AND debug_file != '' AND debug_id != '' GROUP BY debug_file, debug_id, code_file, code_id """ if params['limit'] is not None: sql += '\nLIMIT %(limit)s' return sql, params
def get_channels(self, **kwargs): """Return a list of release channels for one, several or all products. """ filters = [ ("products", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) sql = """ SELECT build_type AS channel, product_name AS product FROM product_info """ sql_params = {} if params.products and params.products[0]: sql += " WHERE product_name IN %(products)s" sql_params['products'] = tuple(params.products) error_message = "Failed to retrieve release channels from PostgreSQL" sql_results = self.query(sql, sql_params, error_message=error_message) channels = {} for res in sql_results.zipped(): if res['product'] not in channels: channels[res['product']] = [res['channel']] else: channels[res['product']].append(res['channel']) return channels
def get(self, **kwargs): filters = [ ("report_date", None, "datetime"), ("report_type", None, "str"), ("product", None, "str"), ("version", None, "str"), ("signature", None, "str"), ("platform", None, "str"), ("min_crashes", 10, "int"), ("min_baseline_diff", 0.05, "float"), ] params = external_common.parse_arguments(filters, kwargs) hits = [] if params['report_type'] == 'interesting-addons': hits = self.interesting_addons(params) elif params['report_type'] == 'interesting-modules': hits = self.interesting_modules(params) elif params['report_type'] == 'interesting-addons-with-version': hits = self.interesting_addons_with_version(params) elif params['report_type'] == 'interesting-modules-with-version': hits = self.interesting_modules_with_version(params) elif params['report_type'] == 'core-counts': hits = self.core_counts(params) else: raise BadArgumentError( 'report_type', received=report_type ) return { 'hits': hits, 'total': len(hits) }
def get(self, **kwargs): """Return a dict that holds the throttling value per build type for a specific product.""" filters = [ ('product', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) required = ('product', ) for key in required: if not params.get(key): raise MissingArgumentError(key) sql = """ SELECT build_type, throttle::REAL FROM product_build_types WHERE product_name = %(product)s """ results = self.query(sql, params) build_types = {} for row in results.zipped(): build_types[row['build_type']] = row['throttle'] return { 'hits': build_types, }
def get(self, **kwargs): """Return JSON data of a crash report, given its uuid. """ filters = [ ('uuid', None, str), ('datatype', None, str), ('name', None, str) # only applicable if datatype == 'raw' ] params = external_common.parse_arguments(filters, kwargs, modern=True) if not params.uuid: raise MissingArgumentError('uuid') if not params.datatype: raise MissingArgumentError('datatype') datatype_method_mapping = { 'raw': 'get_raw_dump', 'meta': 'get_raw_crash', 'processed': 'get_processed', 'unredacted': 'get_unredacted_processed', } get = self.__getattribute__(datatype_method_mapping[params.datatype]) try: if params.datatype == 'raw': return get(params.uuid, name=params.name) else: return get(params.uuid) except CrashIDNotFound: # The CrashIDNotFound exception that happens inside the # crashstorage is too revealing as exception message # contains information about buckets and prefix keys. # Re-wrap it here so the message is just the crash ID. raise CrashIDNotFound(params.uuid)
def test_parse_arguments_with_class_validators(self): class NumberConverter(object): def clean(self, value): conv = {'one': 1, 'two': 2, 'three': 3} try: return conv[value] except KeyError: raise ValueError('No idea?!') # Define a set of filters with some types being non-trivial types # but instead a custom validator. filters = [ ("param1", 0, NumberConverter()), ] arguments = { "param1": "one", } params_exp = util.DotDict() params_exp.param1 = 1 params = external_common.parse_arguments(filters, arguments, modern=True) eq_(params, params_exp) # note that a ValueError becomes a BadArgumentError arguments = { "param1": "will cause a ValueError in NumberConverter.clean", } assert_raises(BadArgumentError, external_common.parse_arguments, filters, arguments, modern=True)
def post(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params.category: raise MissingArgumentError('category') if not params.rule: raise MissingArgumentError('rule') sql = """ /* socorro.external.postgresql.skiplist.SkipList.post */ INSERT INTO skiplist (category, rule) VALUES (%s, %s); """ sql_params = [params.category, params.rule] connection = self.database.connection() try: with connection.cursor() as cur: cur.execute(sql, sql_params) connection.commit() except psycopg2.Error: connection.rollback() error_message = "Failed updating skip list in PostgreSQL" logger.error(error_message) raise DatabaseError(error_message) finally: connection.close() return True
def get(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) sql_params = [] sql = """ /* socorro.external.postgresql.skiplist.SkipList.get */ SELECT category, rule FROM skiplist WHERE 1=1 """ if params.category: sql += 'AND category=%s' sql_params.append(params.category) if params.rule: sql += 'AND rule=%s' sql_params.append(params.rule) # Use `UPPER()` to make the sort case insensitive # which makes it more user-friendly on the UI later sql += """ ORDER BY UPPER(category), UPPER(rule) """ error_message = "Failed to retrieve skip list data from PostgreSQL" sql_results = self.query(sql, sql_params, error_message=error_message) results = sql_results.zipped() return {'hits': results, 'total': len(results)}
def get(self, **kwargs): '''Return data about a field from its name. ''' filters = [ ('name', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) if not params.name: raise MissingArgumentError("name") sql = '''/* socorro.external.postgresql.field.Field.get */ SELECT raw_field AS name, transforms, product FROM data_dictionary WHERE raw_field=%(name)s ''' error_message = 'Failed to retrieve field data from PostgreSQL' results = self.query(sql, params, error_message=error_message) field_data = { 'name': None, 'transforms': None, 'product': None } if not results: return field_data field_data = results.zipped()[0] return field_data
def get(self, **kwargs): filters = [ ("vendor_hex", None, ["list", "str"]), ("adapter_hex", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) for key in ('vendor_hex', 'adapter_hex'): param = params[key] if not param: raise MissingArgumentError(key) params[key] = tuple(params[key]) sql_query = """ SELECT vendor_hex, adapter_hex, vendor_name, adapter_name FROM graphics_device WHERE vendor_hex IN %(vendor_hex)s AND adapter_hex IN %(adapter_hex)s """ results = self.query(sql_query, params) hits = results.zipped() return {'hits': hits, 'total': len(hits)}
def create(self, **kwargs): """Add a new job to the priority queue """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingArgumentError('uuid') with self.context() as connection: try: self.config.logger.debug( 'Inserting priority job into RabbitMQ %s', params.uuid ) connection.channel.basic_publish( exchange='', routing_key=self.config.priority_queue_name, body=params.uuid, properties=pika.BasicProperties(delivery_mode=2) ) except ChannelClosed: self.config.logger.error( "Failed inserting priorityjobs data into RabbitMQ", exc_info=True ) return False return True
def get_adu_by_signature(self, **kwargs): """Return a list of ADUs and crash counts by signature and ADU date """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("signature", None, "str"), ("channel", None, "str"), ("product_name", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) for param in ("start_date", "end_date", "signature", "channel"): if not params[param]: raise MissingArgumentError(param) if (params.end_date - params.start_date) > datetime.timedelta(days=365): raise BadArgumentError('Duration too long. Max 365 days.') sql_query = """ SELECT product_name, signature, adu_date::TEXT, build_date::TEXT, buildid::TEXT, crash_count, adu_count, os_name, channel FROM crash_adu_by_build_signature WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s AND product_name = %(product_name)s AND channel = %(channel)s AND signature = %(signature)s ORDER BY buildid """ error_message = ( "Failed to retrieve crash ADU by build signature from PostgreSQL" ) results = self.query(sql_query, params, error_message=error_message) crashes = results.zipped() return { "hits": crashes, "total": len(crashes) }
def get(self, **kwargs): filters = [ ('date', datetime.datetime.utcnow().date(), 'date'), ('product', 'Firefox', 'str'), ] params = external_common.parse_arguments(filters, kwargs) params['tomorrow'] = params['date'] + datetime.timedelta(days=1) results = self.query(SQL, params) hits = results.zipped() return { 'hits': hits, 'total': len(hits), }
def get(self, **kwargs): filters = [ ('report_type', None, 'str'), ('product', None, 'str'), ('version', None, 'str'), ('platforms', None, 'list'), ] params = external_common.parse_arguments(filters, kwargs) try: content = self._get_content(params) except NotFoundError, msg: self.config.logger.info('Failed to download %s' % msg) return
def get_adu_by_signature(self, **kwargs): """Return a list of ADUs and crash counts by signature and ADU date """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("signature", None, "str"), ("channel", None, "str"), ("product_name", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) for param in ("start_date", "end_date", "signature", "channel"): if not params[param]: raise MissingArgumentError(param) if (params.end_date - params.start_date) > datetime.timedelta(days=365): raise BadArgumentError('Duration too long. Max 365 days.') sql_query = """ SELECT product_name, signature, adu_date::TEXT, build_date::TEXT, buildid::TEXT, crash_count, adu_count, os_name, channel FROM crash_adu_by_build_signature WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s AND product_name = %(product_name)s AND channel = %(channel)s AND signature = %(signature)s ORDER BY buildid """ error_message = ( "Failed to retrieve crash ADU by build signature from PostgreSQL") results = self.query(sql_query, params, error_message=error_message) crashes = results.zipped() return {"hits": crashes, "total": len(crashes)}
def post(self, **kwargs): '''Return the result of a custom query. ''' params = external_common.parse_arguments(self.filters, kwargs) if not params.query: raise MissingArgumentError('query') try: query = json.loads(params.query) except ValueError: raise BadArgumentError( 'query', msg="Invalid JSON value for parameter 'query'" ) # Set indices. indices = [] if not params.indices: # By default, use the last two indices. today = datetimeutil.utc_now() last_week = today - datetime.timedelta(days=7) indices = self.generate_list_of_indexes(last_week, today) elif len(params.indices) == 1 and params.indices[0] == 'ALL': # If we want all indices, just do nothing. pass else: indices = params.indices search_args = {} if indices: search_args['index'] = indices search_args['doc_type'] = ( self.config.elasticsearch.elasticsearch_doctype ) connection = self.get_connection() try: results = connection.search( body=query, **search_args ) except elasticsearch.exceptions.NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] raise ResourceNotFound( "elasticsearch index '%s' does not exist" % missing_index )
def test_parse_arguments(self): """Test external_common.parse_arguments(). """ filters = [ ("param1", "default", [str]), ("param2", None, int), ("param3", ["some", "default", "list"], [str]), ("param4", ["list", "of", 4, "values"], [str]), ("param5", None, bool), ("param6", None, datetime.date), ("param7", None, datetime.date), ("param8", None, datetime.datetime), ("param9", None, [str]), ] arguments = { "param1": "value1", "unknown": 12345, "param5": "true", "param7": datetime.date(2016, 2, 9).isoformat(), "param8": datetime.datetime(2016, 2, 9).isoformat(), # note the 'param9' is deliberately not specified. } params_exp = util.DotDict() params_exp.param1 = ["value1"] params_exp.param2 = None params_exp.param3 = ['some', 'default', 'list'] params_exp.param4 = ["list", "of", "4", "values"] params_exp.param5 = True params_exp.param6 = None params_exp.param7 = datetime.date(2016, 2, 9) params_exp.param8 = datetime.datetime(2016, 2, 9).replace( tzinfo=isodate.UTC ) params_exp.param9 = None params = external_common.parse_arguments( filters, arguments, modern=True ) for key in params: eq_(params[key], params_exp[key], '{}: {!r} != {!r}'.format( key, params[key], params_exp[key] )) eq_(params, params_exp)
def get_count_by_day(self, **kwargs): """Returns the number of crashes on a daily basis""" filters = [ ("signature", None, "str"), ("start_date", None, "date"), ("end_date", None, "date") ] DATE_FORMAT = "%Y-%m-%d" params = external_common.parse_arguments(filters, kwargs) for param in ("signature", "start_date"): if not params[param]: raise MissingArgumentError(param) if not params.end_date: params.end_date = params.start_date + datetime.timedelta(1) sql = """ SELECT COUNT(*), date_processed::date FROM reports_clean rc JOIN signatures ON rc.signature_id=signatures.signature_id WHERE rc.date_processed >= %(start_date)s AND rc.date_processed::date < %(end_date)s AND signatures.signature=%(signature)s GROUP BY rc.date_processed::date """ hits = {} for count, date in self.query(sql, params): hits[date.strftime(DATE_FORMAT)] = count current = params.start_date while current < params.end_date: hits.setdefault(current.strftime(DATE_FORMAT), 0) current += datetime.timedelta(1) return {"hits": hits, "total": len(hits)}
def test_parse_arguments_old_way(self): """Test external_common.parse_arguments(). """ filters = [("param1", "default", ["list", "str"]), ("param2", None, "int"), ("param3", ["list", "of", 4, "values"], ["list", "str"])] arguments = {"param1": "value1", "unknown": 12345} params_exp = util.DotDict() params_exp.param1 = ["value1"] params_exp.param2 = None params_exp.param3 = ["list", "of", "4", "values"] params = external_common.parse_arguments( filters, arguments, modern=False, ) eq_(params, params_exp)
def test_parse_arguments(self): """Test external_common.parse_arguments(). """ filters = [ ("param1", "default", ["list", "str"]), ("param2", None, "int"), ("param3", ["list", "of", 4, "values"], ["list", "str"]) ] arguments = { "param1": "value1", "unknown": 12345 } params_exp = util.DotDict() params_exp.param1 = ["value1"] params_exp.param2 = None params_exp.param3 = ["list", "of", "4", "values"] params = external_common.parse_arguments(filters, arguments) eq_(params, params_exp)
def get_count_by_day(self, **kwargs): """Returns the number of crashes on a daily basis""" filters = [("signature", None, "str"), ("start_date", None, "date"), ("end_date", None, "date")] DATE_FORMAT = "%Y-%m-%d" params = external_common.parse_arguments(filters, kwargs) for param in ("signature", "start_date"): if not params[param]: raise MissingArgumentError(param) if not params.end_date: params.end_date = params.start_date + datetime.timedelta(1) sql = """ SELECT COUNT(*), date_processed::date FROM reports_clean rc JOIN signatures ON rc.signature_id=signatures.signature_id WHERE rc.date_processed >= %(start_date)s AND rc.date_processed::date < %(end_date)s AND signatures.signature=%(signature)s GROUP BY rc.date_processed::date """ hits = {} for count, date in self.query(sql, params): hits[date.strftime(DATE_FORMAT)] = count current = params.start_date while current < params.end_date: hits.setdefault(current.strftime(DATE_FORMAT), 0) current += datetime.timedelta(1) return {"hits": hits, "total": len(hits)}
def get(self, **kwargs): """Return a list of signatures-to-bug_ids or bug_ids-to-signatures associations. """ params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures'] and not params['bug_ids']: raise MissingArgumentError('specify one of signatures or bug_ids') elif params['signatures'] and params['bug_ids']: raise BadArgumentError('specify only one of signatures or bug_ids') sql_params = [] if params['signatures']: sql_params.append(tuple(params.signatures)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE EXISTS( SELECT 1 FROM bug_associations WHERE bug_associations.bug_id = bugs.id AND signature IN %s ) """ elif params['bug_ids']: sql_params.append(tuple(params.bug_ids)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE bugs.id IN %s """ error_message = "Failed to retrieve bug associations from PostgreSQL" results = self.query(sql, sql_params, error_message=error_message) bugs = results.zipped() return { "hits": bugs, "total": len(bugs) }
def get(self, **kwargs): """Return a list of signatures-to-bug_ids or bug_ids-to-signatures associations. """ params = external_common.parse_arguments(self.filters, kwargs, modern=True) if not params['signatures'] and not params['bug_ids']: raise MissingArgumentError('specify one of signatures or bug_ids') elif params['signatures'] and params['bug_ids']: raise BadArgumentError('specify only one of signatures or bug_ids') sql_params = [] if params['signatures']: sql_params.append(tuple(params.signatures)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE EXISTS( SELECT 1 FROM bug_associations WHERE bug_associations.bug_id = bugs.id AND signature IN %s ) """ elif params['bug_ids']: sql_params.append(tuple(params.bug_ids)) sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE bugs.id IN %s """ error_message = "Failed to retrieve bug associations from PostgreSQL" results = self.query(sql, sql_params, error_message=error_message) bugs = results.zipped() return {"hits": bugs, "total": len(bugs)}
def post(self, **kwargs): """adding a new product""" filters = [ ("product", None, "str"), ("version", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) with self.get_connection() as connection: try: result, = single_row_sql( connection, "SELECT add_new_product(%s, %s)", (params['product'], params['version']), ) except psycopg2.Error: connection.rollback() return False else: connection.commit() return result
def _get_sql_params(self, **kwargs): filters = [ ('date', (datetime.datetime.utcnow() - datetime.timedelta(days=1)).date(), 'date'), ('limit', None, int), ] params = external_common.parse_arguments(filters, kwargs) sql = """ SELECT debug_file, debug_id, code_file, code_id FROM missing_symbols WHERE date_processed = %(date)s AND debug_file != '' AND debug_id != '' GROUP BY debug_file, debug_id, code_file, code_id """ if params['limit'] is not None: sql += '\nLIMIT %(limit)s' return sql, params
def test_parse_arguments_with_class_validators(self): class NumberConverter(object): def clean(self, value): conv = {'one': 1, 'two': 2, 'three': 3} try: return conv[value] except KeyError: raise ValueError('No idea?!') # Define a set of filters with some types being non-trivial types # but instead a custom validator. filters = [ ("param1", 0, NumberConverter()), ] arguments = { "param1": "one", } params_exp = util.DotDict() params_exp.param1 = 1 params = external_common.parse_arguments( filters, arguments, modern=True ) eq_(params, params_exp) # note that a ValueError becomes a BadArgumentError arguments = { "param1": "will cause a ValueError in NumberConverter.clean", } assert_raises( BadArgumentError, external_common.parse_arguments, filters, arguments, modern=True )
def post(self, **kwargs): '''Return the result of a custom query. ''' params = external_common.parse_arguments(self.filters, kwargs) if not params.query: raise MissingArgumentError('query') try: query = json.loads(params.query) except ValueError: raise BadArgumentError( 'query', msg="Invalid JSON value for parameter 'query'") # Set indices. indices = [] if not params.indices: # By default, use the last two indices. today = datetimeutil.utc_now() last_week = today - datetime.timedelta(days=7) indices = self.generate_list_of_indexes(last_week, today) elif len(params.indices) == 1 and params.indices[0] == 'ALL': # If we want all indices, just do nothing. pass else: indices = params.indices search_args = {} if indices: search_args['index'] = indices search_args['doc_type'] = ( self.config.elasticsearch.elasticsearch_doctype) connection = self.get_connection() try: results = connection.search(body=query, **search_args) except elasticsearch.exceptions.NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] raise ResourceNotFound("elasticsearch index '%s' does not exist" % missing_index)
def get(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build::VARCHAR FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = results.zipped() return {'hits': signatures, 'total': len(signatures)}
def test_parse_arguments(self): """Test external_common.parse_arguments(). """ filters = [ ("param1", "default", [str]), ("param2", None, int), ("param3", ["some", "default", "list"], [str]), ("param4", ["list", "of", 4, "values"], [str]), ("param5", None, bool), ("param6", None, datetime.date), ("param7", None, datetime.date), ("param8", None, datetime.datetime), ("param9", None, [str]), ] arguments = { "param1": "value1", "unknown": 12345, "param5": "true", "param7": datetime.date(2016, 2, 9).isoformat(), "param8": datetime.datetime(2016, 2, 9).isoformat(), # note the 'param9' is deliberately not specified. } params_exp = util.DotDict() params_exp.param1 = ["value1"] params_exp.param2 = None params_exp.param3 = ['some', 'default', 'list'] params_exp.param4 = ["list", "of", "4", "values"] params_exp.param5 = True params_exp.param6 = None params_exp.param7 = datetime.date(2016, 2, 9) params_exp.param8 = datetime.datetime(2016, 2, 9).replace(tzinfo=isodate.UTC) params_exp.param9 = None params = external_common.parse_arguments(filters, arguments, modern=True) for key in params: eq_(params[key], params_exp[key], '{}: {!r} != {!r}'.format(key, params[key], params_exp[key])) eq_(params, params_exp)
def get(self, **kwargs): """ return GC crashes per build ID """ for arg in ['product', 'version']: if not kwargs.get(arg): raise MissingArgumentError(arg) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "date"), ("to_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) result = self.query( """ /* socorro.external.postgresql.gccrashes.GCCrashes.get */ SELECT build::text, sum(gc_count_madu) FROM gccrashes JOIN product_versions USING (product_version_id) WHERE product_name = %(product)s AND version_string = %(version)s AND report_date BETWEEN %(from_date)s AND %(to_date)s AND build IS NOT NULL GROUP BY build ORDER BY build """, params) # Because we don't return a list of dicts, we turn it into a # pure list first so it becomes a list of tuples. rows = list(result) return {'hits': rows, 'total': len(rows)}
def get(self, **kwargs): """ return GC crashes per build ID """ for arg in ['product', 'version']: if not kwargs.get(arg): raise MissingArgumentError(arg) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "date"), ("to_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) result = self.query(""" /* socorro.external.postgresql.gccrashes.GCCrashes.get */ SELECT build::text, sum(gc_count_madu) FROM gccrashes JOIN product_versions USING (product_version_id) WHERE product_name = %(product)s AND version_string = %(version)s AND report_date BETWEEN %(from_date)s AND %(to_date)s AND build IS NOT NULL GROUP BY build ORDER BY build """, params) # Because we don't return a list of dicts, we turn it into a # pure list first so it becomes a list of tuples. rows = list(result) return {'hits': rows, 'total': len(rows)}
def get_signatures(self, **kwargs): """Return top crashers by signatures. See https://socorro.readthedocs.io/en/latest/middleware.html#tcbs """ filters = [("product", None, "str"), ("version", None, "str"), ("crash_type", "all", "str"), ("to_date", datetimeutil.utc_now(), "datetime"), ("duration", datetime.timedelta(7), "timedelta"), ("os", None, "str"), ("limit", 100, "int"), ("date_range_type", None, "str")] params = external_common.parse_arguments(filters, kwargs) params.logger = logger # what the twoPeriodTopCrasherComparison() function does is that it # makes a start date from taking the to_date - duration if params.duration > datetime.timedelta(30): raise BadArgumentError('Duration too long. Max 30 days.') with self.get_connection() as connection: return tcbs.twoPeriodTopCrasherComparison(connection, params)
def get(self, **kwargs): filters = [ ("backfill_type", None, "str"), ("reports_clean", True, "bool"), ("check_period", '01:00:00', "str"), ("table_name", None, "str"), ("update_day", None, "datetime"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) if not params.backfill_type: raise MissingArgumentError('backfill_type') date_param = ['update_day', 'start_date', 'end_date'] for i in date_param: if i in kwargs: params[i] = str(params[i].date()) try: query = 'SELECT backfill_%(backfill_type)s (%(params)s); ' required_params = BACKFILL_PARAMETERS[params.backfill_type] query_params = [(i, params[i]) for i in required_params] query_params_str = ', '.join( '%(' + str(i[0]) + ')s' for i in query_params ) query = query % {'backfill_type': params.backfill_type, 'params': query_params_str} except: raise BadArgumentError(kwargs['backfill_type']) error_message = "Failed to retrieve backfill %s from PostgreSQL" error_message = error_message % kwargs['backfill_type'] results = self.query(query, params, error_message=error_message) return results
def get_featured(self, **kwargs): """Return a list of featured versions for one, several or all products. """ filters = [ ("products", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) sql = """ SELECT product_name AS product, version_string AS version FROM product_info WHERE is_featured = true """ sql_params = {} if params.products and params.products[0]: sql += " AND product_name IN %(product)s" sql_params['product'] = tuple(params.products) error_message = "Failed to retrieve featured versions from PostgreSQL" sql_results = self.query(sql, sql_params, error_message=error_message) hits = {} total = 0 for version in sql_results.zipped(): total += 1 if version['product'] not in hits: hits[version['product']] = [version['version']] else: hits[version['product']].append(version['version']) return { "total": total, "hits": hits }
def delete(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params.category: raise MissingArgumentError('category') if not params.rule: raise MissingArgumentError('rule') sql_params = [params.category, params.rule] count_sql = """ /* socorro.external.postgresql.skiplist.SkipList.delete */ SELECT COUNT(*) FROM skiplist WHERE category=%s AND rule=%s """ sql = """ /* socorro.external.postgresql.skiplist.SkipList.delete */ DELETE FROM skiplist WHERE category=%s AND rule=%s """ connection = self.database.connection() try: cur = connection.cursor() count = self.count(count_sql, sql_params, connection=connection) if not count: return False cur.execute(sql, sql_params) connection.commit() except psycopg2.Error: connection.rollback() error_message = "Failed delete skip list in PostgreSQL" logger.error(error_message) raise DatabaseError(error_message) finally: connection.close() return True
def get(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build::VARCHAR FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = results.zipped() return { 'hits': signatures, 'total': len(signatures) }
def delete_field(self, **kwargs): """Remove a field from the database. Removing a field means that it won't be indexed in elasticsearch anymore, nor will it be exposed or accessible via supersearch. It doesn't delete the data from crash reports though, so it would be possible to re-create the field and reindex some indices to get that data back. """ filters = [ ('name', None, 'str'), ] params = external_common.parse_arguments(filters, kwargs) if not params['name']: raise MissingArgumentError('name') es_connection = self.get_connection() es_connection.delete( index=self.config.elasticsearch.elasticsearch_default_index, doc_type='supersearch_fields', id=params['name'], refresh=True, )
def get_signature_history(self, **kwargs): """Return the history of a signature. See https://socorro.readthedocs.io/en/latest/middleware.html """ now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(days=7) filters = [ ('product', None, 'str'), ('version', None, 'str'), ('signature', None, 'str'), ('end_date', now, 'datetime'), ('start_date', lastweek, 'datetime'), ] params = external_common.parse_arguments(filters, kwargs) for param in ('product', 'version', 'signature'): if not params[param]: raise MissingArgumentError(param) if params.signature == '##null##': signature_where = 'AND signature IS NULL' else: signature_where = 'AND signature = %(signature)s' if params.signature == '##empty##': params.signature = '' sql = """ /* external.postgresql.crashes.Crashes.get_signature_history */ WITH hist AS ( SELECT report_date, report_count FROM tcbs JOIN signatures using (signature_id) JOIN product_versions using (product_version_id) WHERE report_date BETWEEN %%(start_date)s AND %%(end_date)s AND product_name = %%(product)s AND version_string = %%(version)s %s GROUP BY report_date, report_count ORDER BY 1 ), scaling_window AS ( SELECT hist.*, SUM(report_count) over () AS total_crashes FROM hist ) SELECT report_date AS date, report_count AS count, report_count / total_crashes::float * 100 AS percent_of_total FROM scaling_window ORDER BY report_date DESC """ % signature_where error_message = 'Failed to retrieve signature history from PostgreSQL' results = self.query(sql, params, error_message=error_message) # Transforming the results into what we want history = [] for dot in results.zipped(): dot['date'] = datetimeutil.date_to_string(dot['date']) history.append(dot) return {'hits': history, 'total': len(history)}
def get(self, **kwargs): filters = [ ("version", None, [str]), ("product", None, [str]), ("is_featured", None, bool), ("start_date", None, SmartDate()), ("end_date", None, SmartDate()), ("active", None, bool), ("is_rapid_beta", None, bool), ("build_type", None, [str]), ] params = external_common.parse_arguments(filters, kwargs, modern=True) where = [] sql_params = {} for param, value in params.items(): if value is None: continue param = { 'product': 'pv.product_name', 'version': 'version_string', 'is_featured': 'featured_version', 'end_date': 'sunset_date', 'start_date': 'build_date', }.get(param, param) if param == 'active': # This is a convenient exception. It makes it possible # to query for only productversions that are NOT sunset # without having to do any particular date arithmetic. param = 'sunset_date' operator_ = value and '>=' or '<' value = datetime.datetime.utcnow().date().isoformat() elif isinstance(value, list): operator_ = 'IN' value = tuple(value) elif isinstance(value, tuple): assert len(value) == 2 operator_, value = value else: operator_ = '=' where.append('{} {} %({})s'.format(param, operator_, param)) sql_params[param] = value # rewrite it to a string if where: sql_where = 'WHERE ' + ' AND '.join(where) else: sql_where = '' sql = """ /* socorro.external.postgresql.products.ProductVersions.get */ SELECT pv.product_name AS product, pv.version_string AS version, pv.build_date AS start_date, pv.sunset_date AS end_date, ((prc.throttle * (100)::numeric))::REAL AS throttle, pv.featured_version AS is_featured, pv.build_type, pv.has_builds, pv.is_rapid_beta FROM ( ( product_versions pv JOIN product_release_channels prc ON ( pv.product_name = prc.product_name AND pv.build_type = prc.release_channel ) JOIN products ON pv.product_name = products.product_name ) JOIN release_channels ON pv.build_type = release_channels.release_channel ) {} ORDER BY products.sort, version_sort DESC, release_channels.sort """.format(sql_where) results = self.query(sql, sql_params).zipped() return { 'hits': results, 'total': len(results), }
def get(self, **kwargs): """ Return product information, or version information for one or more product:version combinations """ warnings.warn('This class is deprecated. Use ProductVersions instead.', DeprecationWarning) filters = [ ("versions", None, ["list", "str"]), # for legacy, to be removed ] params = external_common.parse_arguments(filters, kwargs) if params.versions and params.versions[0]: return self._get_versions(params) sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name AS product, version_string AS version, start_date, end_date, throttle, is_featured AS featured, build_type AS release, has_builds FROM product_info ORDER BY product_sort, version_sort DESC, channel_sort """ error_message = "Failed to retrieve products/versions from PostgreSQL" results = self.query(sql, error_message=error_message) products = [] versions_per_product = {} for version in results.zipped(): try: version['end_date'] = datetimeutil.date_to_string( version['end_date']) except TypeError: pass try: version['start_date'] = datetimeutil.date_to_string( version['start_date']) except TypeError: pass version['throttle'] = float(version['throttle']) product = version['product'] if product not in products: products.append(product) if product not in versions_per_product: versions_per_product[product] = [version] else: versions_per_product[product].append(version) return { 'products': products, 'hits': versions_per_product, 'total': len(results) }
def get(self, **kwargs): filters = [ ("report_types", None, ["list", "str"]), ("report_type", None, "str"), ("signature", None, "str"), ("start_date", None, "date"), ("end_date", None, "date"), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) if not params.get('report_types') and params.get('report_type'): # to support the legacy case individual_report = True report_types = [params['report_type']] else: individual_report = False report_types = params['report_types'] # check that all the report types are recognized for report_type in report_types: query_params = report_type_sql.get(report_type, {}) known_report_types = ('products', 'distinct_install', 'exploitability', 'devices', 'graphics') if (report_type not in known_report_types and 'first_col' not in query_params): raise BadArgumentError(report_type) products = [] versions = [] # Get information about the versions util_service = Util(config=self.context) versions_info = util_service.versions_info(**params) if versions_info: for i, elem in enumerate(versions_info): products.append(versions_info[elem]["product_name"]) versions.append(str(versions_info[elem]["version_string"])) # This MUST be a tuple otherwise it gets cast to an array params['product'] = tuple(products) params['version'] = tuple(versions) all_results = {} assert isinstance(params['start_date'], datetime.date) assert isinstance(params['end_date'], datetime.date) with self.get_connection() as connection: for report_type in report_types: result_cols, query_string, query_parameters = self._get_query( report_type, params) sql_results = self.query(query_string, params=query_parameters, connection=connection) results = [dict(zip(result_cols, row)) for row in sql_results] all_results[report_type] = results if individual_report: return all_results.values()[0] else: return {'reports': all_results}
def get(self, **kwargs): """Return JSON data of a crash report, given its uuid. """ filters = [ ('uuid', None, 'str'), ('datatype', None, 'str'), ('name', None, 'str') # only applicable if datatype == 'raw' ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingArgumentError('uuid') if not params.datatype: raise MissingArgumentError('datatype') # get a generic crashstorage instance from whatever external resource # is implementing this service. store = self.get_storage() datatype_method_mapping = { 'raw': 'get_raw_dump', 'meta': 'get_raw_crash', 'processed': 'get_processed', 'unredacted': 'get_unredacted_processed', } get = store.__getattribute__(datatype_method_mapping[params.datatype]) try: if params.datatype == 'raw': return ( get(params.uuid, name=params.name), 'application/octet-stream' ) else: return get(params.uuid) except CrashIDNotFound: if params.datatype in ('processed', 'unredacted'): # try to fetch a raw crash just to ensure that the raw crash # exists. If this line fails, there's no reason to actually # submit the priority job. try: store.get_raw_crash(params.uuid) except CrashIDNotFound: raise ResourceNotFound(params.uuid) # search through the existing other services to find the # Priorityjob service. try: priorityjob_service_impl = self.all_services[ 'Priorityjobs' ] except KeyError: raise ServiceUnavailable('Priorityjobs') # get the underlying implementation of the Priorityjob # service and instantiate it. priority_job_service = priorityjob_service_impl.cls( config=self.config ) # create the priority job for this crash_ids priority_job_service.create(uuid=params.uuid) raise ResourceUnavailable(params.uuid) raise ResourceNotFound(params.uuid)
def versions_info(self, **kwargs): """ Return information about versions of a product. See https://socorro.readthedocs.io/en/latest/middleware.html Keyword arguments: versions - List of products and versions. Return: None if versions is null or empty ; Otherwise a dictionary of data about a version, i.e.: { "product_name:version_string": { "product_version_id": integer, "version_string": "string", "product_name": "string", "major_version": "string" or None, "release_channel": "string" or None, "build_id": [list, of, decimals] or None } } """ # Parse arguments filters = [("versions", None, ["list", "str"])] params = external_common.parse_arguments(filters, kwargs) if "versions" not in params or not params["versions"]: return None products_list = [] (versions_list, products_list) = self.parse_versions(params["versions"], products_list) if not versions_list: return None versions = [] products = [] for x in xrange(0, len(versions_list), 2): products.append(versions_list[x]) versions.append(versions_list[x + 1]) params = {} params = self.dispatch_params(params, "product", products) params = self.dispatch_params(params, "version", versions) prefixed_versions = ["^%s" % x for x in versions] params = self.dispatch_params(params, "start_with_version", prefixed_versions) where_product = [] where_rapid_beta = [] for i in range(len(products)): where_product.append(""" (pv.product_name = %%(product%(i)s)s AND pv.version_string ~ %%(start_with_version%(i)s)s) """ % {'i': i}) where_rapid_beta.append(""" ( i1.version_string = %%(version%(i)s)s AND i1.version_string = i2.version_string ) OR ( i1.rapid_beta_id = i2.product_version_id AND i2.version_string = %%(version%(i)s)s AND i2.is_rapid_beta IS TRUE ) """ % {'i': i}) sql = """ /* socorro.external.postgresql.util.Util.versions_info */ WITH infos AS ( SELECT pv.product_version_id, pv.version_string, pv.product_name, pv.release_version, pv.build_type, pvb.build_id, pv.is_rapid_beta, pv.rapid_beta_id, pv.version_sort FROM product_versions pv LEFT JOIN product_version_builds pvb ON (pv.product_version_id = pvb.product_version_id) WHERE %(product_filters)s ) SELECT DISTINCT i1.product_version_id, i1.product_name, i1.version_string, i1.release_version, i1.build_type, i1.build_id, i1.is_rapid_beta, i2.is_rapid_beta AS is_from_rapid_beta, (i2.product_name || ':' || i2.version_string) AS from_beta_version, i1.version_sort FROM infos i1 LEFT JOIN infos i2 ON ( i1.product_name = i2.product_name AND i1.release_version = i2.release_version AND i1.build_type = i2.build_type ) WHERE %(rapid_beta_filters)s ORDER BY i1.version_sort """ % { "product_filters": " OR ".join(where_product), "rapid_beta_filters": " OR ".join(where_rapid_beta), } error_message = "Failed to retrieve versions data from PostgreSQL" results = self.query(sql, params, error_message=error_message) res = {} for row in results: version = dict( zip(( "product_version_id", "product_name", "version_string", "major_version", "release_channel", "build_id", "is_rapid_beta", "is_from_rapid_beta", "from_beta_version", "version_sort", ), row)) key = ":".join( (version["product_name"], version["version_string"])) del version["version_sort"] # no need to send this back if key in res: # That key already exists, just add it the new buildid res[key]["build_id"].append(int(version["build_id"])) else: if version["build_id"]: version["build_id"] = [int(version["build_id"])] res[key] = version return res
def get_parameters(kwargs): """ Return a dictionary of parameters with default values. Optional arguments: data_type -- Type of data to return. Default is None, to be determined by each service if needed. terms -- Terms to search for. Can be a string or a list of strings. Default is none. fields -- Fields to search into. Can be a string or a list of strings. Default to signature, not implemented for PostgreSQL. search_mode -- How to search for terms. Must be one of the following: "default", "contains", "is_exactly" or "starts_with". Default to "default" for ElasticSearch, "starts_with" for PostgreSQL. from_date -- Only elements after this date. Format must be "YYYY-mm-dd HH:ii:ss.S". Default is a week ago. to_date -- Only elements before this date. Format must be "YYYY-mm-dd HH:ii:ss.S". Default is now. products -- Products concerned by this search. Can be a string or a list of strings. Default is Firefox. os -- Restrict search to those operating systems. Can be a string or a list of strings. Default is all. versions -- Version of the software. Can be a string or a list of strings. Default is all. build_ids -- Restrict search to a particular build of the software. Can be a string or a list of strings. Default is all. reasons -- Restrict search to crashes caused by this reason. Default is all. release_channels -- Restrict search to crashes in this release channels. Default is all. report_type -- Retrict to a type of report. Can be any, crash or hang. Default is any. report_process -- How was the report processed. Can be any, crash or hang. Default is any. plugin_terms -- Search for terms concerning plugins. Can be a string or a list of strings. Default is none. plugin_in -- What field to look into. Can be "name" or "filename". Default is 'name'. plugin_search_mode -- How to search into plugins. Must be one of the following: "contains", "is_exactly" or "starts_with". Default to "contains". result_number -- Number of results to get. Default is 100. result_offset -- Get results from this offset. Default is 0. """ # Default dates now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(7) filters = [ ("data_type", "signatures", "str"), ("terms", None, ["list", "str"]), ("signature", None, "str"), ("fields", "signature", ["list", "str"]), ("search_mode", "default", "str"), ("from_date", lastweek, "datetime"), ("to_date", now, "datetime"), ("products", None, ["list", "str"]), ("versions", None, ["list", "str"]), ("os", None, ["list", "str"]), ("reasons", None, ["list", "str"]), ("release_channels", None, ["list", "str"]), ("build_ids", None, ["list", "str"]), ("build_from", lastweek, "datetime"), ("build_to", now, "datetime"), ("report_process", "any", "str"), ("report_type", "any", "str"), ("plugin_terms", None, ["list", "str"]), ("plugin_in", "name", ["list", "str"]), ("plugin_search_mode", "default", "str"), ("result_number", 100, "int"), ("result_offset", 0, "int") ] params = extern.parse_arguments(filters, kwargs) # To be moved into a config file? authorized_modes = [ "default", "starts_with", "contains", "is_exactly" ] if params["search_mode"] not in authorized_modes: params["search_mode"] = "default" if params["plugin_search_mode"] not in authorized_modes: params["plugin_search_mode"] = "default" # Do not search in the future and make sure we have dates where expected if params["to_date"] is None or params["to_date"] > now: params["to_date"] = now if params["from_date"] is None: params["from_date"] = lastweek if params["build_to"] is None or params["build_to"] > now: params["build_to"] = now if params["build_from"] is None: params["build_from"] = lastweek # Securing fields params['fields'] = restrict_fields( params['fields'], ['signature', 'dump'] ) params['plugin_in'] = restrict_fields( params['plugin_in'], ['filename', 'name'] ) return params
def get(self, **kwargs): """ Return urls for signature """ filters = [ ("signature", None, "str"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("products", None, ["list", "str"]), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) #Because no parameters are optional, we need to loop through #all parameters to ensure each has been set and is not None missingParams = [] for param in params: if not params[param]: if param == 'versions': # force versions parameter to being 'ALL' if empty params[param] = 'ALL' continue missingParams.append(param) if len(missingParams) > 0: raise MissingArgumentError(", ".join(missingParams)) all_products_versions_sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' """ sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) JOIN product_versions USING ( product_version_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' AND ( """ sql_group_order = """ GROUP BY url ORDER BY crash_count DESC LIMIT 100""" sql_params = { "start_date": params.start_date, "end_date": params.end_date, "signature": params.signature } # if this query is for all products the 'ALL' keyword will be # the only item in the products list and this will then also # be for all versions. if 'ALL' in params['products']: sql_query = " ".join((all_products_versions_sql, sql_group_order)) # if this query is for all versions the 'ALL' keyword will be # the only item in the versions list. elif 'ALL' in params['versions']: sql_products = " product_name IN %(products)s )" sql_params['products'] = tuple(params.products) sql_date_range_limit = """AND %(end_date)s BETWEEN product_versions.build_date AND product_versions.sunset_date""" sql_query = " ".join((sql, sql_products, sql_date_range_limit, sql_group_order)) else: products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) if len(params["products_versions"]) == 0: raise BadArgumentError(", ".join(params["versions"])) versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) product_version_list = [] for prod in params["products"]: versions = [] [versions.append(versions_list[i]) for i, x in enumerate(products_list) if x == prod] product_version_list.append(tuple(versions)) sql_product_version_ids = [ """( product_name = %%(product%s)s AND version_string IN %%(version%s)s ) """ % (x, x) for x in range(len(product_version_list))] sql_params = add_param_to_dict(sql_params, "version", product_version_list) sql_params = add_param_to_dict(sql_params, "product", params.products) sql_query = " ".join((sql, " OR ".join(sql_product_version_ids), " ) " + sql_group_order)) error_message = "Failed to retrieve urls for signature from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) urls = results.zipped() return { "hits": urls, "total": len(urls) }
def get_exploitability(self, **kwargs): """Return a list of exploitable crash reports. See socorrolib.lib.external_common.parse_arguments() for all filters. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("product", None, "str"), ("version", None, "str"), ("page", None, "int"), ("batch", None, "int"), ] params = external_common.parse_arguments(filters, kwargs) sql_where = """ report_date BETWEEN %(start_date)s AND %(end_date)s AND null_count + none_count + low_count + medium_count + high_count > 4 """ if params.product: sql_where += " AND pv.product_name = %(product)s" if params.version: sql_where += " AND pv.version_string = %(version)s" inner_with_sql = """ SELECT signature, SUM(high_count) AS high_count, SUM(medium_count) AS medium_count, SUM(low_count) AS low_count, SUM(null_count) AS null_count, SUM(none_count) AS none_count, SUM(high_count) + SUM(medium_count) AS med_or_high FROM exploitability_reports JOIN product_versions AS pv USING (product_version_id) WHERE high_count + medium_count + null_count + none_count > 4 AND %s GROUP BY signature """ % (sql_where, ) count_sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ WITH sums AS ( %s ) SELECT count(signature) FROM sums """ % (inner_with_sql, ) results = self.query( count_sql_query, params, error_message="Failed to retrieve exploitable crashes count") total_crashes_count, = results[0] sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ WITH sums AS ( %s ) SELECT signature, high_count, medium_count, low_count, null_count, none_count FROM sums ORDER BY med_or_high DESC, signature ASC """ % (inner_with_sql, ) if params['page'] is not None: if params['page'] <= 0: raise BadArgumentError('page', params['page'], 'starts on 1') if params['batch'] is None: raise MissingArgumentError('batch') sql_query += """ LIMIT %(limit)s OFFSET %(offset)s """ params['limit'] = params['batch'] params['offset'] = params['batch'] * (params['page'] - 1) error_message = ( "Failed to retrieve exploitable crashes from PostgreSQL") results = self.query(sql_query, params, error_message=error_message) # Transforming the results into what we want crashes = results.zipped() return {"hits": crashes, "total": total_crashes_count}