def test_get_exploitibility(self): crashes = Crashes(config=self.config) today = datetimeutil.date_to_string(self.now.date()) yesterday_date = (self.now - datetime.timedelta(days=1)).date() yesterday = datetimeutil.date_to_string(yesterday_date) res_expected = { "hits": [ { "signature": "canIhaveYourSignature()", "report_date": today, "null_count": 0, "none_count": 1, "low_count": 2, "medium_count": 3, "high_count": 4, }, { "signature": "ofCourseYouCan()", "report_date": yesterday, "null_count": 4, "none_count": 3, "low_count": 2, "medium_count": 1, "high_count": 0, } ], "total": 2, } res = crashes.get_exploitability() self.assertEqual(res, res_expected)
def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): crash_id = raw_crash.uuid old_processed_crash = self.crashstore.get_unredacted_processed(crash_id) for key, value in old_processed_crash.iteritems(): if 'date_processed' in key: processed_crash[key] = date_to_string( string_to_datetime(value) - self.config.time_delta ) print processed_crash.uuid, value, processed_crash[key] else: if key != 'uptime' and key != 'crash_time' and ( 'time' in key or "date" in key or 'Date' in key ): value = date_to_string(string_to_datetime(value)) processed_crash[key] = value processor_meta.processor_notes.append( 'DateProcessedTimeMachine has pushed date_processed into the past' ' by "%s" (D HH:MM:SS)' % to_str(self.config.time_delta) ) processor_meta.processor_notes.append( 'Original processor_notes: %s' % old_processed_crash['processor_notes'] ) return True
def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBase( config=config, fields=SUPERSEARCH_FIELDS_MOCKED_RESULTS, ) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() ok_('date' in params) eq_(len(params['date']), 2) # Pass only the high value args = { 'date': '<%s' % datetimeutil.date_to_string(now) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_( params['date'][1].value.date(), now.date() - datetime.timedelta(days=7) ) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = { 'date': '>=%s' % datetimeutil.date_to_string(pasttime) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<=') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date()) # Pass the two values pasttime = now - datetime.timedelta(days=10) args = { 'date': [ '<%s' % datetimeutil.date_to_string(now), '>%s' % datetimeutil.date_to_string(pasttime), ] } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date())
def get(self, **kwargs): filters = [ ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("product", None, "str"), ("version", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) results = [] # So we have something to return. query_string = """SELECT product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out, sum(report_count) as report_count FROM nightly_builds JOIN product_versions USING ( product_version_id ) WHERE report_date <= %(end_date)s AND report_date >= %(start_date)s AND product_name = %(product)s AND version_string = %(version)s GROUP BY product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out""" try: connection = self.database.connection() cursor = connection.cursor() sql_results = db.execute(cursor, query_string, params) except psycopg2.Error: logger.error("Failed retrieving crashtrends data from PostgreSQL", exc_info=True) else: for trend in sql_results: row = dict(zip(( "product_name", "version_string", "product_version_id", "report_date", "build_date", "days_out", "report_count"), trend)) row['report_date'] = datetimeutil.date_to_string(row['report_date']) row['build_date'] = datetimeutil.date_to_string(row['build_date']) results.append(row) finally: connection.close() results = {'crashtrends' : results} return results
def get(self, **kwargs): filters = [ ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("product", None, "str"), ("version", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) sql = """ /* socorro.external.postgresql.crash_trends.CrashTrends.get */ SELECT product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out, sum(report_count) as report_count FROM nightly_builds JOIN product_versions USING ( product_version_id ) WHERE report_date <= %(end_date)s AND report_date >= %(start_date)s AND product_name = %(product)s AND version_string = %(version)s GROUP BY product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out """ error_message = "Failed to retrieve crash trends data from PostgreSQL" sql_results = self.query(sql, params, error_message=error_message) results = [] for row in sql_results: trend = dict(zip(( "product_name", "version_string", "product_version_id", "report_date", "build_date", "days_out", "report_count" ), row)) trend['report_date'] = datetimeutil.date_to_string( trend['report_date']) trend['build_date'] = datetimeutil.date_to_string( trend['build_date']) results.append(trend) return {'crashtrends': results}
def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBaseWithFields( config=config, ) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() assert 'date' in params assert len(params['date']) == 2 # Pass only the high value args = { 'date': '<%s' % datetimeutil.date_to_string(now) } params = search.get_parameters(**args) assert 'date' in params assert len(params['date']) == 2 assert params['date'][0].operator == '<' assert params['date'][1].operator == '>=' assert params['date'][0].value.date() == now.date() assert params['date'][1].value.date() == now.date() - datetime.timedelta(days=7) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = { 'date': '>=%s' % datetimeutil.date_to_string(pasttime) } params = search.get_parameters(**args) assert 'date' in params assert len(params['date']) == 2 assert params['date'][0].operator == '<=' assert params['date'][1].operator == '>=' assert params['date'][0].value.date() == now.date() assert params['date'][1].value.date() == pasttime.date() # Pass the two values pasttime = now - datetime.timedelta(days=10) args = { 'date': [ '<%s' % datetimeutil.date_to_string(now), '>%s' % datetimeutil.date_to_string(pasttime), ] } params = search.get_parameters(**args) assert 'date' in params assert len(params['date']) == 2 assert params['date'][0].operator == '<' assert params['date'][1].operator == '>' assert params['date'][0].value.date() == now.date() assert params['date'][1].value.date() == pasttime.date()
def twoPeriodTopCrasherComparison( databaseConnection, context, closestEntryFunction=latestEntryBeforeOrEqualTo, listOfTopCrashersFunction=getListOfTopCrashersBySignature): try: context['logger'].debug('entered twoPeriodTopCrasherComparison') except KeyError: context['logger'] = util.SilentFakeLogger() assertions = ['to_date', 'duration', 'product', 'version'] for param in assertions: assert param in context, ( "%s is missing from the configuration" % param) context['numberOfComparisonPoints'] = 2 if not context['limit']: context['limit'] = 100 #context['logger'].debug('about to latestEntryBeforeOrEqualTo') context['to_date'] = closestEntryFunction(databaseConnection, context['to_date'], context['product'], context['version']) context['logger'].debug('New to_date: %s' % context['to_date']) context['startDate'] = context.to_date - (context.duration * context.numberOfComparisonPoints) #context['logger'].debug('after %s' % context) listOfTopCrashers = listOfListsWithChangeInRank( rangeOfQueriesGenerator( databaseConnection, context, listOfTopCrashersFunction))[0] #context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers) totalNumberOfCrashes = totalPercentOfTotal = 0 for x in listOfTopCrashers: if 'total_crashes' in x: totalNumberOfCrashes = x['total_crashes'] del x['total_crashes'] totalPercentOfTotal += x.get('percentOfTotal', 0) result = { 'crashes': listOfTopCrashers, 'start_date': datetimeutil.date_to_string( context.to_date - context.duration ), 'end_date': datetimeutil.date_to_string(context.to_date), 'totalNumberOfCrashes': totalNumberOfCrashes, 'totalPercentage': totalPercentOfTotal, } #logger.debug("about to return %s", result) return result
def test_search_combined_filters(self, mock_psql_util): with self.get_config_manager().context() as config: api = Search(config=config) # get the first, default crash report params = { 'terms': 'js::break_your_browser', 'search_mode': 'is_exactly', 'products': 'WaterWolf', 'versions': 'WaterWolf:1.0', 'release_channels': 'release', 'os': 'Linux', 'build_ids': '1234567890', 'reasons': 'MOZALLOC_WENT_WRONG', 'report_type': 'crash', 'report_process': 'browser', } res = api.get(**params) self.assertEqual(res['total'], 1) self.assertEqual( res['hits'][0]['signature'], 'js::break_your_browser' ) self.assertEqual(res['hits'][0]['is_linux'], 1) self.assertEqual(res['hits'][0]['is_windows'], 0) self.assertEqual(res['hits'][0]['is_mac'], 0) # get the crash report from last month now = datetimeutil.utc_now() three_weeks_ago = now - datetime.timedelta(weeks=3) three_weeks_ago = datetimeutil.date_to_string(three_weeks_ago) five_weeks_ago = now - datetime.timedelta(weeks=5) five_weeks_ago = datetimeutil.date_to_string(five_weeks_ago) params = { 'from_date': five_weeks_ago, 'to_date': three_weeks_ago, } res = api.get(**params) self.assertEqual(res['total'], 1) self.assertEqual( res['hits'][0]['signature'], 'my_little_signature' ) self.assertEqual(res['hits'][0]['is_linux'], 1) self.assertEqual(res['hits'][0]['is_windows'], 0) self.assertEqual(res['hits'][0]['is_mac'], 0)
def post(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = [] for sig in results.zipped(): sig.first_date = datetimeutil.date_to_string(sig.first_date) signatures.append(sig) return { 'hits': signatures, 'total': len(signatures) }
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.lib.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" # Assembling the query sql_query = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order)) error_message = "Failed to retrieve comments from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) # Transforming the results into what we want comments = [] for row in results: comment = dict(zip(( "date_processed", "user_comments", "uuid", "email"), row)) comment["date_processed"] = datetimeutil.date_to_string( comment["date_processed"]) comments.append(comment) return { "hits": comments, "total": len(comments) }
def twoPeriodTopCrasherComparison( databaseConnection, context, closestEntryFunction=latestEntryBeforeOrEqualTo, listOfTopCrashersFunction=getListOfTopCrashersBySignature, ): try: context["logger"].debug("entered twoPeriodTopCrasherComparison") except KeyError: context["logger"] = util.SilentFakeLogger() assertions = ["to_date", "duration", "product", "version"] for param in assertions: assert param in context, "%s is missing from the configuration" % param context["numberOfComparisonPoints"] = 2 if not context["limit"]: context["limit"] = 100 # context['logger'].debug('about to latestEntryBeforeOrEqualTo') context["to_date"] = closestEntryFunction( databaseConnection, context["to_date"], context["product"], context["version"] ) context["logger"].debug("New to_date: %s" % context["to_date"]) context["startDate"] = context.to_date - (context.duration * context.numberOfComparisonPoints) # context['logger'].debug('after %s' % context) listOfTopCrashers = listOfListsWithChangeInRank( rangeOfQueriesGenerator(databaseConnection, context, listOfTopCrashersFunction) )[0] # context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers) totalNumberOfCrashes = totalPercentOfTotal = 0 for x in listOfTopCrashers: if "total_crashes" in x: totalNumberOfCrashes = x["total_crashes"] del x["total_crashes"] totalPercentOfTotal += x.get("percentOfTotal", 0) result = { "crashes": listOfTopCrashers, "start_date": datetimeutil.date_to_string(context.to_date - context.duration), "end_date": datetimeutil.date_to_string(context.to_date), "totalNumberOfCrashes": totalNumberOfCrashes, "totalPercentage": totalPercentOfTotal, } # logger.debug("about to return %s", result) return result
def test_listOfListsWithChangeInRank(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) params = self.params params.startDate = self.now.date() - datetime.timedelta(days=14) query_list = tcbs.getListOfTopCrashersBySignature query_range = tcbs.rangeOfQueriesGenerator( self.connection, self.params, query_list ) res = tcbs.listOfListsWithChangeInRank(query_range) res_expected = [[{ 'count': 5L, 'mac_count': 0L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 0, 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.625, 'changeInRank': 0, 'is_gc_count': 10L, 'win_count': 0L, 'changeInPercentOfTotal': 0.041666666666666963, 'linux_count': 5L, 'hang_count': 5L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 0.58333333333333304, 'plugin_count': 0 }, { 'count': 3L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 1, 'currentRank': 1, 'startup_percent': None, 'versions': 'plugin1, plugin2, plugin3, plugin4, plugin5, plugin6', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.375, 'changeInRank': 0, 'is_gc_count': 1L, 'win_count': 1L, 'changeInPercentOfTotal': -0.041666666666667018, 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #2', 'versions_count': 6, 'previousPercentOfTotal': 0.41666666666666702, 'plugin_count': 0 }]]
def get(self, **kwargs): """Return a job in the job queue. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") fields = [ "id", "pathname", "uuid", "owner", "priority", "queueddatetime", "starteddatetime", "completeddatetime", "success", "message" ] sql = """ /* socorro.external.postgresql.job.Job.get */ SELECT %s FROM jobs WHERE uuid=%%(uuid)s """ % ", ".join(fields) json_result = { "total": 0, "hits": [] } connection = None try: # Creating the connection to the DB connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, params) except psycopg2.Error: logger.error("Failed retrieving jobs data from PostgreSQL", exc_info=True) else: for job in results: row = dict(zip(fields, job)) # Make sure all dates are turned into strings for i in row: if isinstance(row[i], datetime.datetime): row[i] = datetimeutil.date_to_string(row[i]) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) finally: if connection: connection.close() return json_result
def test_get(self): signature_urls = SignatureURLs(config=self.config) now = datetimeutil.utc_now() now = datetime.datetime(now.year, now.month, now.day) now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: find one exact match for products and versions passed params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [ { "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 } ], "total": 1 } self.assertEqual(res, res_expected) #...................................................................... # Test 2: Raise error if parameter is not passed params = { "signature": "", "start_date": "", "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } self.assertRaises(MissingOrBadArgumentException, signature_urls.get, **params) #...................................................................... # Test 3: Query returning no results params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Fennec'], "versions": ["Fennec:10.0", "Fennec:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [], "total": 0 } self.assertEqual(res, res_expected)
def test_date_to_string(): # Datetime with timezone date = datetime.datetime(2012, 1, 3, 12, 23, 34, tzinfo=UTC) res_exp = '2012-01-03T12:23:34+00:00' res = datetimeutil.date_to_string(date) assert res == res_exp # Datetime without timezone date = datetime.datetime(2012, 1, 3, 12, 23, 34) res_exp = '2012-01-03T12:23:34' res = datetimeutil.date_to_string(date) assert res == res_exp # Date (no time, no timezone) date = datetime.date(2012, 1, 3) res_exp = '2012-01-03' res = datetimeutil.date_to_string(date) assert res == res_exp
def test_get(self): extensions = Extensions(config=self.config) now = datetimeutil.utc_now() now = datetime.datetime(now.year, now.month, now.day, tzinfo=now.tzinfo) uuid = "%%s-%s" % now.strftime("%y%m%d") now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: a valid crash with duplicates params = { "uuid": uuid % "a1", "date": now_str } res = extensions.get(**params) res_expected = { "hits": [ { "report_id": 1, "date_processed": now_str, "extension_key": 10, "extension_id": 'id1', "extension_version": 'version1' }, { "report_id": 1, "date_processed": now_str, "extension_key": 11, "extension_id": 'id2', "extension_version": 'version2' }, { "report_id": 1, "date_processed": now_str, "extension_key": 12, "extension_id": 'id3', "extension_version": 'version3' } ], "total": 3 } self.assertEqual(res, res_expected) #...................................................................... # Test 2: a crash without extensions params = { "uuid": uuid % "a2", "date": now_str } res = extensions.get(**params) res_expected = { "hits": [], "total": 0 } self.assertEqual(res, res_expected)
def setUp(self): super(IntegrationTestQuery, self).setUp() config = self.get_config_context() self.storage = crashstorage.ElasticSearchCrashStorage(config) self.api = Query(config=config) # clear the indices cache so the index is created on every test self.storage.indices_cache = set() # Create the supersearch fields. self.storage.es.bulk_index( index=config.webapi.elasticsearch_default_index, doc_type='supersearch_fields', docs=SUPERSEARCH_FIELDS.values(), id_field='name', refresh=True, ) self.now = datetimeutil.utc_now() yesterday = self.now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) # insert data into elasticsearch default_crash_report = { 'uuid': 100, 'signature': 'js::break_your_browser', 'date_processed': yesterday, 'product': 'WaterWolf', 'version': '1.0', 'release_channel': 'release', 'os_name': 'Linux', 'build': '1234567890', 'reason': 'MOZALLOC_WENT_WRONG', 'hangid': None, 'process_type': None, } self.storage.save_processed(default_crash_report) self.storage.save_processed( dict(default_crash_report, uuid=1, product='EarthRaccoon') ) self.storage.save_processed( dict(default_crash_report, uuid=2, version='2.0') ) self.storage.save_processed( dict(default_crash_report, uuid=3, release_channel='aurora') ) # As indexing is asynchronous, we need to force elasticsearch to # make the newly created content searchable before we run the tests self.storage.es.refresh()
def test_twoPeriodTopCrasherComparisonLimited(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14)) self.params.limit = 1 res = tcbs.twoPeriodTopCrasherComparison( self.connection, self.params ) res_expected = { 'totalPercentage': 0.58333333333333304, 'end_date': lastweek_str, 'start_date': two_weeks, 'crashes': [{ 'count': 14L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.58333333333333304, 'changeInRank': 'new', 'is_gc_count': 1L, 'win_count': 12L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }], 'totalNumberOfCrashes': 24L } eq_(res, res_expected)
def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBase(config=config) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() ok_("date" in params) eq_(len(params["date"]), 2) # Pass only the high value args = {"date": "<%s" % datetimeutil.date_to_string(now)} params = search.get_parameters(**args) ok_("date" in params) eq_(len(params["date"]), 2) eq_(params["date"][0].operator, "<") eq_(params["date"][1].operator, ">=") eq_(params["date"][0].value.date(), now.date()) eq_(params["date"][1].value.date(), now.date() - datetime.timedelta(days=7)) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = {"date": ">=%s" % datetimeutil.date_to_string(pasttime)} params = search.get_parameters(**args) ok_("date" in params) eq_(len(params["date"]), 2) eq_(params["date"][0].operator, "<=") eq_(params["date"][1].operator, ">=") eq_(params["date"][0].value.date(), now.date()) eq_(params["date"][1].value.date(), pasttime.date()) # Pass the two values pasttime = now - datetime.timedelta(days=10) args = {"date": ["<%s" % datetimeutil.date_to_string(now), ">%s" % datetimeutil.date_to_string(pasttime)]} params = search.get_parameters(**args) ok_("date" in params) eq_(len(params["date"]), 2) eq_(params["date"][0].operator, "<") eq_(params["date"][1].operator, ">") eq_(params["date"][0].value.date(), now.date()) eq_(params["date"][1].value.date(), pasttime.date())
def test_get_signature_history(self): api = Crashes(config=self.config) now = self.now lastweek = now - datetime.timedelta(days=7) params = { "product": "Firefox", "version": "8.0", "signature": "signature1", "start_date": lastweek, "end_date": now, } res = api.get_signature_history(**params) self.assertEqual(len(res["hits"]), 2) self.assertEqual(len(res["hits"]), res["total"]) date = datetimeutil.date_to_string(now.date()) self.assertEqual(res["hits"][0]["date"], date) self.assertEqual(res["hits"][1]["date"], date) self.assertEqual(res["hits"][0]["count"], 5) self.assertEqual(res["hits"][1]["count"], 14) self.assertEqual(round(res["hits"][0]["percent_of_total"], 2), round(5.0 / 19.0 * 100, 2)) self.assertEqual(round(res["hits"][1]["percent_of_total"], 2), round(14.0 / 19.0 * 100, 2)) # Test no results params = { "product": "Firefox", "version": "9.0", "signature": "signature1", "start_date": lastweek, "end_date": now, } res = api.get_signature_history(**params) res_expected = {"hits": [], "total": 0} self.assertEqual(res, res_expected) # Test default date parameters params = {"product": "Fennec", "version": "11.0.1", "signature": "signature3"} res = api.get_signature_history(**params) res_expected = {"hits": [{"date": now.date().isoformat(), "count": 14, "percent_of_total": 100}], "total": 1} self.assertEqual(res, res_expected) # Test missing parameters self.assertRaises(MissingOrBadArgumentError, api.get_signature_history) self.assertRaises(MissingOrBadArgumentError, api.get_signature_history, **{"product": "Firefox"}) self.assertRaises( MissingOrBadArgumentError, api.get_signature_history, **{"product": "Firefox", "version": "8.0"} ) self.assertRaises( MissingOrBadArgumentError, api.get_signature_history, **{"signature": "signature1", "version": "8.0"} )
def test_twoPeriodTopCrasherComparisonLimited(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14)) self.params.limit = 1 res = tcbs.twoPeriodTopCrasherComparison(self.connection, self.params) res_expected = { "totalPercentage": 0.58333333333333304, "end_date": lastweek_str, "start_date": two_weeks, "crashes": [ { "count": 14L, "mac_count": 1L, "content_count": 0, "first_report": lastweek_str, "previousRank": "null", "currentRank": 0, "startup_percent": None, "versions": "plugin1, plugin2", "first_report_exact": lastweek_str + " 00:00:00", "percentOfTotal": 0.58333333333333304, "changeInRank": "new", "is_gc_count": 1L, "win_count": 12L, "changeInPercentOfTotal": "new", "linux_count": 1L, "hang_count": 0L, "signature": "Fake Signature #1", "versions_count": 2, "previousPercentOfTotal": "null", "plugin_count": 0, } ], "totalNumberOfCrashes": 24L, } eq_(res, res_expected)
def get(self, **kwargs): """Return the current state of all Crontabber jobs""" sql = """ /* socorro.external.postgresql.crontabber_state.CrontabberState.get */ SELECT app_name, next_run, first_run, last_run, last_success, error_count, depends_on, last_error, ongoing FROM crontabber ORDER BY app_name """ error_message = ( "Failed to retrieve crontabber state data from PostgreSQL" ) results = self.query(sql, error_message=error_message) state = {} for row in results: app_name = row[0] state[app_name] = dict(zip(( 'next_run', 'first_run', 'last_run', 'last_success', 'error_count', 'depends_on', 'last_error', 'ongoing' ), row[1:])) possible_datetimes = ( 'next_run', 'first_run', 'last_run', 'last_success', 'ongoing' ) for key in possible_datetimes: value = state[app_name][key] if value is None: continue state[app_name][key] = datetimeutil.date_to_string(value) state[app_name]['last_error'] = json.loads( state[app_name]['last_error'] ) return {"state": state}
def test_get_adu_by_signature(self): crashes = Crashes(config=self.config) signature = "canIhaveYourSignature()" channel = "release" yesterday_date = (self.now - datetime.timedelta(days=1)).date() yesterday = datetimeutil.date_to_string(yesterday_date) res_expected = { "hits": [ { "product_name": "WaterWolf", "signature": signature, "adu_date": yesterday, "build_date": "2014-03-01", "buildid": '201403010101', "crash_count": 3, "adu_count": 1023, "os_name": "Mac OS X", "channel": channel, }, { "product_name": "WaterWolf", "signature": signature, "adu_date": yesterday, "build_date": "2014-04-01", "buildid": '201404010101', "crash_count": 4, "adu_count": 1024, "os_name": "Windows NT", "channel": channel, }, ], "total": 2, } res = crashes.get_adu_by_signature( product_name="WaterWolf", start_date=yesterday, end_date=yesterday, signature=signature, channel=channel, ) eq_(res, res_expected) assert_raises( BadArgumentError, crashes.get_adu_by_signature, start_date=(yesterday_date - datetime.timedelta(days=366)), end_date=yesterday, signature=signature, channel=channel )
def get_exploitability(self, **kwargs): """Return a list of exploitable crash reports. See socorro.lib.external_common.parse_arguments() for all filters. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ SELECT signature, report_date, null_count, none_count, low_count, medium_count, high_count FROM exploitability_reports WHERE report_date BETWEEN %(start_date)s AND %(end_date)s ORDER BY report_date DESC; """ error_message = "Failed to retrieve exploitable crashes from PostgreSQL" results = self.query(sql_query, params, error_message=error_message) # Transforming the results into what we want crashes = [] for row in results: crash = dict(zip(("signature", "report_date", "null_count", "none_count", "low_count", "medium_count", "high_count"), row)) crash["report_date"] = datetimeutil.date_to_string( crash["report_date"]) crashes.append(crash) return { "hits": crashes, "total": len(crashes) }
def get(self, **kwargs): """Return a list of extensions associated with a crash's UUID.""" filters = [ ("uuid", None, "str"), ("date", None, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) sql = """/* socorro.external.postgresql.extensions.Extensions.get */ SELECT extensions.* FROM extensions INNER JOIN reports ON extensions.report_id = reports.id WHERE reports.uuid = %(uuid)s AND reports.date_processed = %(crash_date)s AND extensions.date_processed = %(crash_date)s """ sql_params = { "uuid": params.uuid, "crash_date": params.date } result = { "total": 0, "hits": [] } try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: for crash in results: row = dict(zip(( "report_id", "date_processed", "extension_key", "extension_id", "extension_version"), crash)) result["hits"].append(row) row["date_processed"] = datetimeutil.date_to_string(row["date_processed"]) result["total"] = len(result["hits"]) finally: connection.close() return result
def format_dates_in_crash(self, processed_crash): # HBase returns dates in a format that elasticsearch does not # understand. To keep our elasticsearch mapping simple, we # transform all dates to a recognized format. for attr in processed_crash: try: processed_crash[attr] = datetimeutil.date_to_string( datetimeutil.string_to_datetime( processed_crash[attr] ) ) except (ValueError, TypeError, ISO8601Error): # the attribute is not a date pass return processed_crash
def get(self, **kwargs): """Return a job in the job queue. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") fields = [ "id", "pathname", "uuid", "owner", "priority", "queueddatetime", "starteddatetime", "completeddatetime", "success", "message" ] sql = """ /* socorro.external.postgresql.job.Job.get */ SELECT %s FROM jobs WHERE uuid=%%(uuid)s """ % ", ".join(fields) error_message = "Failed to retrieve jobs data from PostgreSQL" results = self.query(sql, params, error_message=error_message) jobs = [] for row in results: job = dict(zip(fields, row)) # Make sure all dates are turned into strings for i in job: if isinstance(job[i], datetime.datetime): job[i] = datetimeutil.date_to_string(job[i]) jobs.append(job) return { "hits": jobs, "total": len(jobs) }
def get(self, **kwargs): """Return the current state of all Crontabber jobs""" sql = ( '/* socorro.external.postgresql.crontabber_state.CrontabberState' '.get */\n' 'SELECT state, last_updated FROM crontabber_state;' ) error_message = ( "Failed to retrieve crontabber state data from PostgreSQL" ) results = self.query(sql, error_message=error_message) result, = results state, last_updated = result return { "state": json.loads(state), "last_updated": datetimeutil.date_to_string(last_updated) }
def main(self): es_storage = self.config.elasticsearch_storage_class(self.config) hb_storage = self.config.hbase_storage_class(self.config) hb_client = HBaseConnectionForCrashReports( self.config.hbase_host, self.config.hbase_port, self.config.hbase_timeout, ) current_date = self.config.end_date date = current_date.strftime('%y%m%d') one_day = datetime.timedelta(days=1) for i in range(self.config.duration): day = current_date.strftime('%y%m%d') self.config.logger.info('backfilling crashes for %s', day) reports = hb_client.get_list_of_processed_json_for_date( day, number_of_retries=5 ) for report in reports: processed_crash = json.loads(report) # HBase returns dates in a format that elasticsearch does not # understand. To keep our elasticsearch mapping simple, we # transform all dates to a recognized format. for attr in processed_crash: try: processed_crash[attr] = datetimeutil.date_to_string( datetimeutil.string_to_datetime( processed_crash[attr] ) ) except (ValueError, TypeError, ISO8601Error): # the attribute is not a date pass # print processed_crash['uuid'] es_storage.save_processed(processed_crash) current_date -= one_day return 0
def test_get_comments(self): crashes = Crashes(config=self.config) today = datetimeutil.date_to_string(self.now) # Test 1: results params = { "signature": "js", } res_expected = { "hits": [ { "email": None, "date_processed": today, "uuid": "def", "user_comments": "hello" }, { "email": None, "date_processed": today, "uuid": "hij", "user_comments": "hah" } ], "total": 2 } res = crashes.get_comments(**params) self.assertEqual(res, res_expected) # Test 2: no results params = { "signature": "blah", } res_expected = { "hits": [], "total": 0 } res = crashes.get_comments(**params) self.assertEqual(res, res_expected) # Test 3: missing parameter self.assertRaises(MissingArgumentError, crashes.get_comments)
def test_twoPeriodTopCrasherComparison(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14)) res = tcbs.twoPeriodTopCrasherComparison(self.connection, self.params) res_expected = { 'totalPercentage': 1.0, 'end_date': lastweek_str, 'start_date': two_weeks, 'crashes': [{ 'count': 14L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.58333333333333304, 'changeInRank': 'new', 'is_gc_count': 1L, 'win_count': 12L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }, { 'count': 10L, 'mac_count': 2L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 1, 'startup_percent': None, 'versions': 'plugin1, plugin2, plugin3, ' 'plugin4, plugin5, plugin6', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.41666666666666702, 'changeInRank': 'new', 'is_gc_count': 3L, 'win_count': 7L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #2', 'versions_count': 6, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }], 'totalNumberOfCrashes': 24L } eq_(res, res_expected)
def test_get(self): products = Products(config=self.config) now = self.now.date() now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: find one exact match for one product and one version params = {"versions": "Firefox:8.0"} res = products.get(**params) res_expected = { "hits": [{ "is_featured": False, "version": "8.0", "throttle": 10.0, "start_date": now_str, "end_date": now_str, "has_builds": False, "product": "Firefox", "build_type": "Release" }], "total": 1 } eq_(sorted(res['hits'][0]), sorted(res_expected['hits'][0])) #...................................................................... # Test 2: Find two different products with their correct verions params = {"versions": ["Firefox:8.0", "Thunderbird:10.0.2b"]} res = products.get(**params) res_expected = { "hits": [{ "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }, { "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }], "total": 2 } eq_(sorted(res['hits'][0]), sorted(res_expected['hits'][0])) #...................................................................... # Test 3: empty result, no products:version found params = {"versions": "Firefox:14.0"} res = products.get(**params) res_expected = {"hits": [], "total": 0} eq_(res, res_expected) #...................................................................... # Test 4: Test products list is returned with no parameters # Note that the expired version is not returned params = {} res = products.get(**params) res_expected = { "products": ["Firefox", "Thunderbird", "Fennec"], "hits": { "Firefox": [{ "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False }], "Thunderbird": [{ "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False, }], "Fennec": [{ "product": "Fennec", "version": "12.0b1", "start_date": now_str, "end_date": now_str, "throttle": 100.00, "featured": False, "release": "Beta", "has_builds": False }, { "product": "Fennec", "version": "11.0.1", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False }] }, "total": 4 } eq_(res['total'], res_expected['total']) eq_(sorted(res['products']), sorted(res_expected['products'])) eq_(sorted(res['hits']), sorted(res_expected['hits'])) for product in sorted(res['hits'].keys()): eq_(sorted(res['hits'][product][0]), sorted(res_expected['hits'][product][0])) # test returned order of versions assert len(res['hits']['Fennec']) == 2 eq_(res['hits']['Fennec'][0]['version'], '12.0b1') eq_(res['hits']['Fennec'][1]['version'], '11.0.1') #...................................................................... # Test 5: An invalid versions list is passed, all versions are returned params = {'versions': [1]} res = products.get(**params) eq_(res['total'], 4)
def test_get(self): signature_urls = SignatureURLs(config=self.config) now = self.now now = datetime.datetime(now.year, now.month, now.day) now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: find one exact match for products and versions passed params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }], "total": 1 } eq_(res, res_expected) #...................................................................... # Test 2: Raise error if parameter is not passed params = { "signature": "", "start_date": "", "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } assert_raises(MissingArgumentError, signature_urls.get, **params) #...................................................................... # Test 3: Query returning no results params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Fennec'], "versions": ["Fennec:10.0", "Fennec:11.0"] } res = signature_urls.get(**params) res_expected = {"hits": [], "total": 0} eq_(res, res_expected) # Test 4: Return results for all version of Firefox params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 }], "total": 2 } eq_(res, res_expected) # Test 5: Return results for all products and versions params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['ALL'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 }, { "url": "http://arewemobileyet.org/", "crash_count": 1 }], "total": 3 } eq_(res, res_expected) # Test when we send incorrectly formatted 'versions' parameter params = { "signature": 'Does not exist', "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ['27.0a1'] } assert_raises(BadArgumentError, signature_urls.get, **params)
def build_query_from_params(params, config): """ Build and return an ES query given a list of parameters. See socorro.lib.search_common.SearchCommon.get_parameters() for parameters and default values. """ # Dates need to be strings for ES params["from_date"] = dtutil.date_to_string(params["from_date"]) params["to_date"] = dtutil.date_to_string(params["to_date"]) # Preparing the different elements of the json query query = {"match_all": {}} queries = [] filters = {"and": []} # Creating the terms depending on the way we should search if params["terms"] and params["search_mode"] == "default": filters["and"].append( ElasticSearchBase.build_terms_query( params["fields"], [x.lower() for x in params["terms"]])) elif (params["terms"] and params["search_mode"] == "is_exactly" and params["fields"] == ["signature"]): filters["and"].append( ElasticSearchBase.build_terms_query("signature.full", params["terms"])) elif params["terms"]: params["terms"] = ElasticSearchBase.prepare_terms( params["terms"], params["search_mode"]) queries.append( ElasticSearchBase.build_wildcard_query(params["fields"], params["terms"])) # Generating the filters if params["products"]: filters["and"].append( ElasticSearchBase.build_terms_query("product.full", params["products"])) if params["os"]: filters["and"].append( ElasticSearchBase.build_terms_query( "os_name", [x.lower() for x in params["os"]])) if params["build_ids"]: filters["and"].append( ElasticSearchBase.build_terms_query("build", params["build_ids"])) if params["reasons"]: filters["and"].append( ElasticSearchBase.build_terms_query( "reason", [x.lower() for x in params["reasons"]])) if params["release_channels"]: filters["and"].append( ElasticSearchBase.build_terms_query( "release_channel", [x.lower() for x in params["release_channels"]])) # plugins filter if params['plugin_terms']: # change plugin field names to match what is in elasticsearch params['plugin_in'] = [ 'Plugin%s' % x.capitalize() for x in params['plugin_in'] ] if params['plugin_search_mode'] == 'default': filters['and'].append( ElasticSearchBase.build_terms_query( params['plugin_in'], [x.lower() for x in params['plugin_terms']])) elif (params['plugin_search_mode'] == 'is_exactly' and len(params['plugin_in']) == 1): filters['and'].append( ElasticSearchBase.build_terms_query( '%s.full' % params['plugin_in'][0], params['plugin_terms'])) else: params['plugin_terms'] = ElasticSearchBase.prepare_terms( params['plugin_terms'], params['plugin_search_mode']) queries.append( ElasticSearchBase.build_wildcard_query( ['%s.full' % x for x in params['plugin_in']], params['plugin_terms'])) filters["and"].append({ "range": { "date_processed": { "from": params["from_date"], "to": params["to_date"] } } }) if params["report_process"] == "browser": filters["and"].append({"missing": {"field": "process_type"}}) elif params["report_process"] in ("plugin", "content"): filters["and"].append( ElasticSearchBase.build_terms_query("process_type", params["report_process"])) if params["report_type"] == "crash": filters["and"].append({"missing": {"field": "hangid"}}) elif params["report_type"] == "hang": filters["and"].append({"exists": {"field": "hangid"}}) # Generating the filters for versions if params["versions"]: versions = ElasticSearchBase.format_versions(params["versions"]) versions_info = params["versions_info"] # There are several pairs product:version or_filter = [] for v in versions: version = v["version"] product = v["product"] if not version: # There is no valid version here. continue key = "%s:%s" % (product, version) version_data = {} if key in versions_info: version_data = versions_info[key] if version_data and version_data["is_rapid_beta"]: # If the version is a rapid beta, that means it's an # alias for a list of other versions. We thus don't filter # on that version, but on all versions listed in the # version_data that we have. # Get all versions that are linked to this rapid beta. rapid_beta_versions = [ x for x in versions_info if versions_info[x]["from_beta_version"] == key and not versions_info[x]["is_rapid_beta"] ] for rapid_beta in rapid_beta_versions: and_filter = ElasticSearchBase.build_version_filters( product, versions_info[rapid_beta]["version_string"], versions_info[rapid_beta], config) or_filter.append({"and": and_filter}) else: # This is a "normal" version, let's filter on it and_filter = ElasticSearchBase.build_version_filters( product, version, version_data, config) or_filter.append({"and": and_filter}) if or_filter: filters["and"].append({"or": or_filter}) if len(queries) > 1: query = {"bool": {"must": queries}} elif len(queries) == 1: query = queries[0] # Generating the full query from the parts return { "size": params["result_number"], "from": params["result_offset"], "query": { "filtered": { "query": query, "filter": filters } } }
def test_date_to_string_fail(): res = datetimeutil.date_to_string('2012-01-03')
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Debug logger.debug(sql_count_query) logger.debug(cur.mogrify(sql_count_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: row = dict( zip(("date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = datetimeutil.date_to_string(row[i]) json_result["hits"].append(row) self.connection.close() return json_result
def get_daily(self, **kwargs): """Return crashes by active daily users. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("versions", None, ["list", "str"]), ("from_date", lastweek, "date"), ("to_date", now, "date"), ("os", None, ["list", "str"]), ("report_type", None, ["list", "str"]), ("separated_by", None, "str"), ("date_range_type", "date", "str"), ] # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = external_common.parse_arguments(filters, kwargs) if not params.product: raise MissingOrBadArgumentError( "Mandatory parameter 'product' is missing or empty") if not params.versions or not params.versions[0]: raise MissingOrBadArgumentError( "Mandatory parameter 'versions' is missing or empty") params.versions = tuple(params.versions) # simple version, for home page graphs mainly if ((not params.os or not params.os[0]) and (not params.report_type or not params.report_type[0]) and (not params.separated_by or not params.separated_by[0])): if params.date_range_type == "build": table_to_use = "home_page_graph_build_view" date_range_field = "build_date" else: table_to_use = "home_page_graph_view" date_range_field = "report_date" db_fields = ("product_name", "version_string", date_range_field, "report_count", "adu", "crash_hadu") out_fields = ("product", "version", "date", "report_count", "adu", "crash_hadu") sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use } # complex version, for daily crashes page mainly else: if params.date_range_type == "build": table_to_use = "crashes_by_user_build_view" date_range_field = "build_date" else: table_to_use = "crashes_by_user_view" date_range_field = "report_date" db_fields = [ "product_name", "version_string", date_range_field, "sum(adjusted_report_count)::bigint as report_count", "sum(adu)::bigint as adu", """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint, avg(throttle)) as crash_hadu""", "avg(throttle) as throttle" ] out_fields = [ "product", "version", "date", "report_count", "adu", "crash_hadu", "throttle" ] db_group = ["product_name", "version_string", date_range_field] if params.separated_by == "os": db_fields += ["os_name", "os_short_name"] db_group += ["os_name", "os_short_name"] out_fields += ["os", "os_short"] sql_where = [] if params.os and params.os[0]: sql_where.append("os_short_name IN %(os)s") params.os = tuple(x[0:3].lower() for x in params.os) if params.report_type and params.report_type[0]: sql_where.append("crash_type_short IN %(report_type)s") params.report_type = tuple(params.report_type) if sql_where: sql_where = "AND %s" % " AND ".join(sql_where) else: sql_where = '' sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM ( SELECT product_name, version_string, %(date_range_field)s, os_name, os_short_name, SUM(report_count)::int as report_count, SUM(adjusted_report_count)::int as adjusted_report_count, MAX(adu) as adu, AVG(throttle) as throttle FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s %(sql_where)s GROUP BY product_name, version_string, %(date_range_field)s, os_name, os_short_name ) as aggregated_crashes_by_user """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use, "sql_where": sql_where } if db_group: sql = "%s GROUP BY %s" % (sql, ", ".join(db_group)) sql = str(" ".join(sql.split())) # better formatting of the sql string error_message = "Failed to retrieve daily crashes data from PostgreSQL" results = self.query(sql, params, error_message=error_message) hits = {} for row in results: daily_data = dict(zip(out_fields, row)) if "throttle" in daily_data: daily_data["throttle"] = float(daily_data["throttle"]) daily_data["crash_hadu"] = float(daily_data["crash_hadu"]) daily_data["date"] = datetimeutil.date_to_string( daily_data["date"]) key = "%s:%s" % (daily_data["product"], daily_data["version"]) if params.separated_by == "os": key = "%s:%s" % (key, daily_data["os_short"]) if "os_short" in daily_data: del daily_data["os_short"] if key not in hits: hits[key] = {} hits[key][daily_data["date"]] = daily_data return {"hits": hits}
def test_get_list(self): now = self.now yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) report = Report(config=self.config) base_params = { 'signature': 'sig1', 'from_date': yesterday, 'to_date': now, } # Basic test res = report.get_list(**base_params) eq_(res['total'], 5) eq_(len(res['hits']), 5) duplicates_map = dict((x['uuid'], x['duplicate_of']) for x in res['hits'] if x['duplicate_of']) eq_(duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'], '60597bdc-5dbe-4409-6b38-4309c0130833') # Test with products and versions params = dict( base_params, products='WaterWolf', versions='WaterWolf:2.0', ) res = report.get_list(**params) eq_(res['total'], 1) # Test with os, build_ids and reasons params = dict( base_params, products='WaterWolf', versions=['WaterWolf:1.0', 'WaterWolf:3.0'], os='win', build_ids='20001212010203', reasons='STACK_OVERFLOW', ) res = report.get_list(**params) eq_(res['total'], 2) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }, { 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'plugin', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '3.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 2 } eq_(res, res_expected) # Test with a signature with strange characters params = dict( base_params, signature='this/is+a=C|signature', ) res = report.get_list(**params) eq_(res['total'], 1) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WindBear', 'os_name': 'Linux', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'this/is+a=C|signature', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 1 } eq_(res, res_expected) # Test plugins params = dict( base_params, report_process='plugin', plugin_in='filename', plugin_terms='NPSWF', plugin_search_mode='contains', ) res = report.get_list(**params) eq_(res['total'], 1) # Test plugins with 'starts_with' search mode params = dict( base_params, report_process='plugin', plugin_in='name', plugin_terms='Flash', plugin_search_mode='starts_with', ) res = report.get_list(**params) eq_(res['total'], 1)
def get(self, **kwargs): """ Return product information, or version information for one or more product:version combinations """ filters = [ ("versions", None, ["list", "str"]), # for legacy, to be removed ("type", "desktop", "str"), ] params = external_common.parse_arguments(filters, kwargs) accepted_types = ("desktop", "webapp") if params.type not in accepted_types: raise BadArgumentError('type', params.type, accepted_types) if params.versions and params.versions[0]: return self._get_versions(params) if params.type == "desktop": sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name, version_string, start_date, end_date, throttle, is_featured, build_type, has_builds FROM product_info ORDER BY product_sort, version_sort DESC, channel_sort """ elif params.type == "webapp": sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name, version, NULL as start_date, NULL as end_date, 1.0 as throttle, FALSE as is_featured, build_type, FALSE as has_builds FROM bixie.raw_product_releases ORDER BY product_name, version DESC """ error_message = "Failed to retrieve products/versions from PostgreSQL" results = self.query(sql, error_message=error_message) products = [] versions_per_product = {} for row in results: version = dict( zip(( 'product', 'version', 'start_date', 'end_date', 'throttle', 'featured', 'release', 'has_builds', ), row)) try: version['end_date'] = datetimeutil.date_to_string( version['end_date']) except TypeError: pass try: version['start_date'] = datetimeutil.date_to_string( version['start_date']) except TypeError: pass version['throttle'] = float(version['throttle']) product = version['product'] if product not in products: products.append(product) if product not in versions_per_product: versions_per_product[product] = [version] else: versions_per_product[product].append(version) return { 'products': products, 'hits': versions_per_product, 'total': len(results) }
def get(self, **kwargs): """ Return product information, or version information for one or more product:version combinations """ filters = [ ("versions", None, ["list", "str"]), # for legacy, to be removed ] params = external_common.parse_arguments(filters, kwargs) if params.versions and params.versions[0]: return self._get_versions(params) sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name AS product, version_string AS version, start_date, end_date, throttle, is_featured AS featured, build_type AS release, has_builds FROM product_info ORDER BY product_sort, version_sort DESC, channel_sort """ error_message = "Failed to retrieve products/versions from PostgreSQL" results = self.query(sql, error_message=error_message) products = [] versions_per_product = {} for version in results.zipped(): try: version.end_date = datetimeutil.date_to_string( version.end_date ) except TypeError: pass try: version.start_date = datetimeutil.date_to_string( version.start_date ) except TypeError: pass version.throttle = float(version.throttle) product = version.product if product not in products: products.append(product) if product not in versions_per_product: versions_per_product[product] = [version] else: versions_per_product[product].append(version) return { 'products': products, 'hits': versions_per_product, 'total': len(results) }
def _get_versions(self, params): """ Return product information for one or more product:version combinations """ products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) sql_select = """ SELECT product_name as product, version_string as version, start_date, end_date, is_featured, build_type, throttle::float, has_builds FROM product_info """ sql_where = [] versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) sql_where = ["(product_name = %(product" + str(x) + ")s AND version_string = %(version" + str(x) + ")s)" for x in range(len(products_list))] sql_params = {} sql_params = add_param_to_dict(sql_params, "product", products_list) sql_params = add_param_to_dict(sql_params, "version", versions_list) if len(sql_where) > 0: sql_query = " WHERE ".join((sql_select, " OR ".join(sql_where))) else: sql_query = sql_select sql_query = """ /* socorro.external.postgresql.Products.get_versions */ %s """ % sql_query error_message = "Failed to retrieve products versions from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) products = [] for product in results.zipped(): product.start_date = datetimeutil.date_to_string( product.start_date ) product.end_date = datetimeutil.date_to_string( product.end_date ) products.append(product) return { "hits": products, "total": len(products) }
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = None for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = '%s.%s' % (field_data['namespace'], field_data['in_database_name']) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or (isinstance( val, basestring) and ' ' not in val): filter_value = val # If the term contains white spaces, we want to perform # a phrase query. Thus we do nothing here and let this # value be handled later. else: filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator == '>': # greater than filter_type = 'range' filter_value = {'gt': param.value} elif param.operator == '<': # lower than filter_type = 'range' filter_value = {'lt': param.value} elif param.operator == '>=': # greater than or equal to filter_type = 'range' filter_value = {'gte': param.value} elif param.operator == '<=': # lower than or equal to filter_type = 'range' filter_value = {'lte': param.value} elif param.operator == '__null__': # is null filter_type = 'missing' args['field'] = name if filter_value is not None: args[name] = filter_value if args: if param.operator_not: new_filter = ~F(filter_type, **args) else: new_filter = F(filter_type, **args) if sub_filters is None: sub_filters = new_filter elif param.data_type == 'enum': sub_filters |= new_filter else: sub_filters &= new_filter continue # These use a wildcard and thus need to be in a query # instead of a filter. operator_wildcards = { '~': '*%s*', # contains '$': '%s*', # starts with '^': '*%s' # ends with } if param.operator in operator_wildcards: if field_data['has_full_version']: name = '%s.full' % name query_type = 'wildcard' args[name] = (operator_wildcards[param.operator] % param.value) elif not param.operator: # This is a phrase that was passed down. query_type = 'simple_query_string' args['query'] = param.value[0] args['fields'] = [name] args['default_operator'] = 'and' if args: query = Q(query_type, **args) if param.operator_not: query = ~query search = search.query(query) else: # If we reach this point, that means the operator is # not supported, and we should raise an error about that. raise NotImplementedError('Operator %s is not supported' % param.operator) if filters is None: filters = sub_filters elif sub_filters is not None: filters &= sub_filters search = search.filter(filters) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. for param in params['_facets']: for value in param.value: try: field_ = self.all_fields[value] except KeyError: # That is not a known field, we can't facet on it. raise BadArgumentError( value, msg='Unknown field "%s", cannot facet on it' % value) field_name = '%s.%s' % (field_['namespace'], field_['in_database_name']) if field_['has_full_version']: # If the param has a full version, that means what matters # is the full string, and not its individual terms. field_name += '.full' search.aggs.bucket(value, 'terms', field=field_name, size=self.config.facets_max_number) # Query and compute results. hits = [] fields = [ '%s.%s' % (x['namespace'], x['in_database_name']) for x in self.all_fields.values() if x['is_returned'] ] search = search.fields(*fields) if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = self.format_aggregations(results.aggregations) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} break
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.lib.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_count = """ SELECT COUNT(r.uuid) """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params ) sql_count = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_count, sql_from, sql_where) ) count = self.count(sql_count, sql_params) comments = [] if count: # Assembling the query sql_query = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) error_message = "Failed to retrieve comments from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) # Transforming the results into what we want for row in results: comment = dict(zip(( "date_processed", "user_comments", "uuid", "email", ), row)) comment["date_processed"] = datetimeutil.date_to_string( comment["date_processed"] ) comments.append(comment) return { "hits": comments, "total": count }
def get(self, **kwargs): """Return a list of results and facets based on parameters. The list of accepted parameters (with types and default values) is in socorro.lib.search_common.SearchBase """ # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indexes to use to optimize the elasticsearch query. indexes = self.get_indexes(params['date']) # Create and configure the search object. search = SuperS().es( urls=self.config.elasticsearch_urls, timeout=self.config.elasticsearch_timeout, ) search = search.indexes(indexes) search = search.doctypes(self.config.elasticsearch_doctype) # Create filters. filters = F() for field, sub_params in params.items(): for param in sub_params: name = PARAM_TO_FIELD_MAPPING.get(param.name, param.name) name = self.prefix_field_name(name) if name.startswith('_'): if name == '_results_offset': results_from = param.value[0] elif name == '_results_number': results_number = param.value[0] # Don't use meta parameters in the query. continue if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] args = {} if not param.operator: # contains one of the terms if len(param.value) == 1: args[name] = param.value[0] else: args['%s__in' % name] = param.value elif param.operator == '=': # is exactly if name in FIELDS_WITH_FULL_VERSION: name = '%s.full' % name args[name] = param.value elif param.operator == '>': # greater than args['%s__gt' % name] = param.value elif param.operator == '<': # lower than args['%s__lt' % name] = param.value elif param.operator == '>=': # greater than or equal to args['%s__gte' % name] = param.value elif param.operator == '<=': # lower than or equal to args['%s__lte' % name] = param.value elif param.operator == '__null__': # is null args['%s__missing' % name] = param.value if args: if param.operator_not: filters &= ~F(**args) else: filters &= F(**args) continue # These use a wildcard and thus need to be in a query # instead of a filter. operator_wildcards = { '~': '*%s*', # contains '$': '%s*', # starts with '^': '*%s' # ends with } if param.operator in operator_wildcards: if name in FIELDS_WITH_FULL_VERSION: name = '%s.full' % name args['%s__wildcard' % name] = \ operator_wildcards[param.operator] % param.value args['must_not'] = param.operator_not if args: search = search.query(**args) else: # If we reach this point, that means the operator is # not supported, and we should raise an error about that. raise NotImplementedError('Operator %s is not supported' % param.operator) search = search.filter(filters) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. processed_filters = search._process_filters(filters.filters) for param in params['_facets']: for value in param.value: filter_ = self.get_filter(value) if not filter_: # That is not a known field, we can't facet on it. raise BadArgumentError( 'Unknown field "%s", cannot facet on it' % value) field_name = PARAM_TO_FIELD_MAPPING.get(value, value) field_name = self.prefix_field_name(field_name) if field_name in FIELDS_WITH_FULL_VERSION: # If the param has a full version, that means what matters # is the full string, and not its individual terms. field_name += '.full' args = { value: { 'terms': { 'field': field_name, 'size': self.config.facets_max_number, }, 'facet_filter': processed_filters, } } search = search.facet_raw(**args) # Query and compute results. hits = [] fields = ['processed_crash.%s' % x for x in PROCESSED_CRASH_FIELDS] for hit in search.values_dict(*fields): hits.append(self.format_field_names(hit)) return { 'hits': hits, 'total': search.count(), 'facets': search.facet_counts(), }
def test_get_comments(self): crashes = Crashes(config=self.config) today = datetimeutil.date_to_string(self.now) # Test 1: results params = { "signature": "js", } res_expected = { "hits": [{ "email": None, "date_processed": today, "uuid": "def", "user_comments": "hello" }, { "email": None, "date_processed": today, "uuid": "hij", "user_comments": "hah" }], "total": 2 } res = crashes.get_comments(**params) eq_(res, res_expected) # Test 2: no results params = { "signature": "blah", } res_expected = {"hits": [], "total": 0} res = crashes.get_comments(**params) eq_(res, res_expected) # Test 3: missing parameter assert_raises(MissingArgumentError, crashes.get_comments) # Test a valid rapid beta versions params = { "signature": "cool_sig", "products": "Firefox", "versions": "Firefox:14.0b", } res_expected = { 'hits': [{ 'email': None, 'date_processed': today, 'uuid': 'nop', 'user_comments': 'hi!' }], 'total': 1 } res = crashes.get_comments(**params) eq_(res, res_expected) # Test an invalid rapid beta versions params = { "signature": "cool_sig", "versions": "WaterWolf:2.0b", } res_expected = { 'hits': [{ 'email': None, 'date_processed': today, 'uuid': 'qrs', 'user_comments': 'meow' }], 'total': 1 } res = crashes.get_comments(**params) eq_(res, res_expected) # use pagination params = { "signature": "cool_sig", "result_number": 1, "result_offset": 0, } params['result_number'] = 1 params['result_offset'] = 0 res = crashes.get_comments(**params) eq_(len(res['hits']), 1) eq_(res['total'], 2)
def setup_data(self): self.now = datetimeutil.utc_now() now = self.now.date() yesterday = now - datetime.timedelta(days=1) lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) yesterday_str = datetimeutil.date_to_string(yesterday) lastweek_str = datetimeutil.date_to_string(lastweek) self.test_source_data = { # Test backfill_adu 'adu': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_all_dups 'all_dups': { 'params': { "start_date": yesterday_str, "end_date": now_str, }, 'res_expected': [(True,)], }, # Test backfill_build_adu 'build_adu': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_correlations 'correlations': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_crashes_by_user_build 'crashes_by_user_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_crashes_by_user 'crashes_by_user': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # TODO: Test backfill_daily_crashes tries to insert into a table # that do not exists. It can be fixed by creating a temporary one. #'daily_crashes': { # 'params': { # "update_day": now_str, # }, # 'res_expected': [(True,)], # }, # Test backfill_exploitability 'exploitability': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_home_page_graph_build 'home_page_graph_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_home_page_graph 'home_page_graph': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_matviews 'matviews': { 'params': { "start_date": yesterday_str, "reports_clean": 'false', }, 'res_expected': [(True,)], }, # Test backfill_rank_compare 'rank_compare': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_reports_clean 'reports_clean': { 'params': { "start_date": yesterday_str, "end_date": now_str, }, 'res_expected': [(True,)], }, # TODO: Test backfill_reports_duplicates tries to insert into a # table that do not exists. It can be fixed by using the update # function inside of the backfill. #'reports_duplicates': { # 'params': { # "start_date": yesterday_str, # "end_date": now_str, # }, # 'res_expected': [(True,)], # }, # TODO: Test backfill_signature_counts tries to insert into # tables and to update functions that does not exist. #'signature_counts': { # 'params': { # "start_date": yesterday_str, # "end_date": now_str, # }, # 'res_expected': [(True,)], # }, # Test backfill_tcbs_build 'tcbs_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_tcbs 'tcbs': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_weekly_report_partitions 'weekly_report_partitions': { 'params': { "start_date": lastweek_str, "end_date": now_str, "table_name": 'raw_crashes', }, 'res_expected': [(True,)], }, # TODO: Update Backfill to support signature_summary backfill # through the API #'signature_summary_products': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_installations': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_uptime': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_os': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_process_type': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_architecture': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_flash_version': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_device': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_graphics': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, }
def get(self, **kwargs): """Return the current state of the server and the revisions of Socorro and Breakpad. """ filters = [ ("duration", 12, "int"), ] params = external_common.parse_arguments(filters, kwargs) # Find the recent server status sql = """ /* socorro.external.postgresql.server_status.ServerStatus.get */ SELECT id, date_recently_completed, date_oldest_job_queued, avg_process_sec, avg_wait_sec, waiting_job_count, processors_count, date_created FROM server_status ORDER BY date_created DESC LIMIT %(duration)s """ error_message = "Failed to retrieve server status data from PostgreSQL" results = self.query(sql, params, error_message=error_message) stats = [] for stat in results.zipped(): # Turn dates into strings for later JSON encoding for i in ("date_recently_completed", "date_oldest_job_queued", "date_created"): try: stat[i] = datetimeutil.date_to_string(stat[i]) except TypeError: pass stats.append(stat) # Find the current database version sql = """ /* socorro.external.postgresql.server_status.ServerStatus.get */ SELECT version_num FROM alembic_version """ error_message = "Failed to retrieve database version from PostgreSQL" results = self.query(sql, error_message=error_message) if results: schema_revision, = results[0] else: logger.warning("No version_num was found in table alembic_version") schema_revision = "Unknown" # Find the current breakpad and socorro revisions socorro_revision = resource_string('socorro', 'socorro_revision.txt') breakpad_revision = resource_string('socorro', 'breakpad_revision.txt') return { "hits": stats, "total": len(stats), "socorro_revision": socorro_revision, "breakpad_revision": breakpad_revision, "schema_revision": schema_revision, }
def test_get(self): status = ServerStatus(config=self.config) now = datetimeutil.utc_now() date1 = datetime.datetime(now.year, now.month, now.day, 12, 00, 00, tzinfo=now.tzinfo) date2 = date1 - datetime.timedelta(minutes=15) date3 = date2 - datetime.timedelta(minutes=15) date4 = date3 - datetime.timedelta(minutes=15) date1 = datetimeutil.date_to_string(date1) date2 = datetimeutil.date_to_string(date2) date3 = datetimeutil.date_to_string(date3) date4 = datetimeutil.date_to_string(date4) #...................................................................... # Test 1: default behavior res = status.get() res_expected = { "hits": [{ "id": 1, "date_recently_completed": date1, "date_oldest_job_queued": date1, "avg_process_sec": 2, "avg_wait_sec": 5, "waiting_job_count": 3, "processors_count": 2, "date_created": date1 }, { "id": 2, "date_recently_completed": date2, "date_oldest_job_queued": date2, "avg_process_sec": 3, "avg_wait_sec": 3.12, "waiting_job_count": 2, "processors_count": 2, "date_created": date2 }, { "id": 3, "date_recently_completed": date3, "date_oldest_job_queued": date3, "avg_process_sec": 1, "avg_wait_sec": 2, "waiting_job_count": 4, "processors_count": 1, "date_created": date3 }, { "id": 4, "date_recently_completed": None, "date_oldest_job_queued": None, "avg_process_sec": 1, "avg_wait_sec": 2, "waiting_job_count": 4, "processors_count": 1, "date_created": date4 }], "socorro_revision": 42, "breakpad_revision": 43, "total": 4 } self.assertEqual(res, res_expected) #...................................................................... # Test 2: with duration params = {"duration": 1} res = status.get(**params) res_expected = { "hits": [{ "id": 1, "date_recently_completed": date1, "date_oldest_job_queued": date1, "avg_process_sec": 2, "avg_wait_sec": 5, "waiting_job_count": 3, "processors_count": 2, "date_created": date1 }], "socorro_revision": 42, "breakpad_revision": 43, "total": 1 } self.assertEqual(res, res_expected)
def setup_data(self): now = self.now.date() yesterday = now - datetime.timedelta(days=1) lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) yesterday_str = datetimeutil.date_to_string(yesterday) lastweek_str = datetimeutil.date_to_string(lastweek) self.test_source_data = { # Test 1: find exact match for one product version and signature 'products': { 'params': { "versions": "Firefox:8.0", "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "product_name": 'Firefox', "version_string": "8.0", "report_count": 1, "percentage": '50.000', }, { "product_name": 'Firefox', "version_string": "9.0", "report_count": 1, "percentage": '50.000', }], }, # Test 2: find ALL matches for all product versions and signature 'products_no_version': { 'params': { "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "product_name": 'Firefox', "version_string": "8.0", "report_count": 1, "percentage": '50.000', }, { "product_name": 'Firefox', "version_string": "9.0", "report_count": 1, "percentage": '50.000', }], }, # Test 3: find architectures reported for a given version and a # signature 'architecture': { 'params': { "versions": "Firefox:8.0", "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'amd64', "report_count": 1.0, "percentage": "100.000", }], }, # Test 4: find architectures reported for a signature with no # specific version. 'architecture_no_version': { 'params': { "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'amd64', "report_count": 2, "percentage": '100.000', }], }, # Test 5: find flash_versions reported for specific version and # a signature 'flash_versions': { 'params': { "versions": "Firefox:8.0", "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '1.0', "report_count": 1.0, "percentage": "100.000", }], }, # Test 6: find flash_versions reported with a signature and without # a specific version 'flash_versions_no_version': { 'params': { "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '1.0', "report_count": 2.0, "percentage": "100.000", }], }, # Test 7: find installations reported for a signature 'distinct_install': { 'params': { "versions": "Firefox:8.0", "report_type": "distinct_install", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "product_name": 'Firefox', "version_string": '8.0', "crashes": 10, "installations": 8, }], }, # Test 8: find os_version_strings reported for a signature 'os': { 'params': { "versions": "Firefox:8.0", "report_type": "os", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'Windows NT 6.4', "report_count": 1, "percentage": "100.000", }], }, # Test 9: find process_type reported for a signature 'process_type': { 'params': { "versions": "Firefox:8.0", "report_type": "process_type", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'plugin', "report_count": 1, "percentage": "100.000", }], }, # Test 10: find uptime reported for signature 'uptime': { 'params': { "versions": "Firefox:8.0", "report_type": "uptime", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '15-30 minutes', "report_count": 1, "percentage": '100.000', }], }, # Test 11: find exploitability reported for signature 'exploitability': { 'params': { "versions": "Firefox:8.0", "report_type": "exploitability", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ 'low_count': 3, 'high_count': 5, 'null_count': 1, 'none_count': 2, 'report_date': yesterday_str, 'medium_count': 4, }], }, # Test 12: find mobile devices reported for signature with a # specific version 'devices': { 'params': { "versions": "Firefox:8.0", 'report_type': 'devices', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'cpu_abi': 'armeabi-v7a', 'manufacturer': 'samsung', 'model': 'GT-P5100', 'version': '16 (REL)', 'report_count': 123, 'percentage': '100.000', }], }, # Test 13: find mobile devices reported for signature 'devices_no_version': { 'params': { 'report_type': 'devices', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'cpu_abi': 'armeabi-v7a', 'manufacturer': 'samsung', 'model': 'GT-P5100', 'version': '16 (REL)', 'report_count': 246, 'percentage': '100.000', }], }, # Test 14: find mobile devices reported for signature 'graphics': { 'params': { "versions": "Firefox:8.0", 'report_type': 'graphics', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'vendor_hex': '0x1234', 'adapter_hex': '0x5678', 'vendor_name': 'Test Vendor', 'adapter_name': 'Test Adapter', 'report_count': 123, 'percentage': '100.000', }], }, }
def get(self, **kwargs): """Return the current state of the server and the revisions of Socorro and Breakpad. """ filters = [ ("duration", 12, "int"), ] params = external_common.parse_arguments(filters, kwargs) sql = """ /* socorro.external.postgresql.server_status.ServerStatus.get */ SELECT id, date_recently_completed, date_oldest_job_queued, avg_process_sec, avg_wait_sec, waiting_job_count, processors_count, date_created FROM server_status ORDER BY date_created DESC LIMIT %(duration)s """ connection = None try: connection = self.database.connection() cursor = connection.cursor() cursor.execute(sql, params) results = cursor.fetchall() except psycopg2.Error: logger.error("Failed retrieving server status from PostgreSQL", exc_info=True) results = [] finally: if connection: connection.close() stats = [] for row in results: stat = dict( zip(("id", "date_recently_completed", "date_oldest_job_queued", "avg_process_sec", "avg_wait_sec", "waiting_job_count", "processors_count", "date_created"), row)) # Turn dates into strings for later JSON encoding for i in ("date_recently_completed", "date_oldest_job_queued", "date_created"): try: stat[i] = datetimeutil.date_to_string(stat[i]) except TypeError: pass stats.append(stat) return { "hits": stats, "total": len(stats), "socorro_revision": self.context.socorro_revision, "breakpad_revision": self.context.breakpad_revision }
def get_exploitability(self, **kwargs): """Return a list of exploitable crash reports. See socorro.lib.external_common.parse_arguments() for all filters. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("page", None, "int"), ("batch", None, "int"), ] params = external_common.parse_arguments(filters, kwargs) count_sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ SELECT COUNT(*) FROM exploitability_reports WHERE report_date BETWEEN %(start_date)s AND %(end_date)s """ results = self.query( count_sql_query, params, error_message="Failed to retrieve exploitable crashes count") total_crashes_count, = results[0] sql_query = """ /* external.postgresql.crashes.Crashes.get_exploitability */ SELECT signature, report_date, null_count, none_count, low_count, medium_count, high_count FROM exploitability_reports WHERE report_date BETWEEN %(start_date)s AND %(end_date)s ORDER BY report_date DESC """ if params['page'] is not None: if params['page'] <= 0: raise BadArgumentError('page', params['page'], 'starts on 1') if params['batch'] is None: raise MissingArgumentError('batch') sql_query += """ LIMIT %(limit)s OFFSET %(offset)s """ params['limit'] = params['batch'] params['offset'] = params['batch'] * (params['page'] - 1) error_message = "Failed to retrieve exploitable crashes from PostgreSQL" results = self.query(sql_query, params, error_message=error_message) # Transforming the results into what we want crashes = [] for row in results: crash = dict( zip(("signature", "report_date", "null_count", "none_count", "low_count", "medium_count", "high_count"), row)) crash["report_date"] = datetimeutil.date_to_string( crash["report_date"]) crashes.append(crash) return {"hits": crashes, "total": total_crashes_count}
def test_get_comments(self): crashes = Crashes(config=self.config) today = datetimeutil.date_to_string(self.now) # Test 1: results params = { "signature": "js", } res_expected = { "hits": [{ "email": None, "date_processed": today, "uuid": "def", "user_comments": "hello" }, { "email": None, "date_processed": today, "uuid": "hij", "user_comments": "hah" }], "total": 2 } res = crashes.get_comments(**params) self.assertEqual(res, res_expected) # Test 2: no results params = { "signature": "blah", } res_expected = {"hits": [], "total": 0} res = crashes.get_comments(**params) self.assertEqual(res, res_expected) # Test 3: missing parameter self.assertRaises(MissingArgumentError, crashes.get_comments) # Test a valid rapid beta versions params = { "signature": "cool_sig", "products": "Firefox", "versions": "Firefox:14.0b", } res_expected = { 'hits': [{ 'email': None, 'date_processed': today, 'uuid': 'nop', 'user_comments': 'hi!' }], 'total': 1 } res = crashes.get_comments(**params) self.assertEqual(res, res_expected) # Test an invalid rapid beta versions params = { "signature": "cool_sig", "versions": "WaterWolf:2.0b", } res = crashes.get_comments(**params) self.assertTrue(res)
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Both `from_date` and `to_date` (and their aliases `from` and `to`) are required and can not be greater than 30 days apart. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if not kwargs.get('from_date'): raise MissingArgumentError('from_date') if not kwargs.get('to_date'): raise MissingArgumentError('to_date') from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date']) to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date']) span_days = (to_date - from_date).days if span_days > 30: raise BadArgumentError( 'Span between from_date and to_date can not be more than 30') # start with the default sort_order = {'key': 'date_processed', 'direction': 'DESC'} if 'sort' in kwargs: sort_order['key'] = kwargs.pop('sort') _recognized_sort_orders = ( 'date_processed', 'uptime', 'user_comments', 'uuid', 'uuid_text', 'product', 'version', 'build', 'signature', 'url', 'os_name', 'os_version', 'cpu_name', 'cpu_info', 'address', 'reason', 'last_crash', 'install_age', 'hangid', 'process_type', 'release_channel', 'install_time', 'duplicate_of', ) if sort_order['key'] not in _recognized_sort_orders: raise BadArgumentError( '%s is not a recognized sort order key' % sort_order['key']) sort_order['direction'] = 'ASC' if 'reverse' in kwargs: if kwargs.pop('reverse'): sort_order['direction'] = 'DESC' include_raw_crash = kwargs.get('include_raw_crash') or False params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid::uuid, r.uuid as uuid_text, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, r.release_channel, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time """ if include_raw_crash: pass else: sql_select += """ , rd.duplicate_of """ wrapped_select = """ WITH report_slice AS ( %s ), dupes AS ( SELECT report_slice.uuid, rd.duplicate_of FROM reports_duplicates rd JOIN report_slice ON report_slice.uuid_text = rd.uuid WHERE rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s ) SELECT rs.*, dupes.duplicate_of, rc.raw_crash FROM report_slice rs LEFT OUTER JOIN dupes USING (uuid) LEFT OUTER JOIN raw_crashes rc ON rs.uuid = rc.uuid AND rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s """ sql_from = self.build_reports_sql_from(params) if not include_raw_crash: sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from sql_where, sql_params = self.build_reports_sql_where( params, sql_params, self.context) sql_order = """ ORDER BY %(key)s %(direction)s """ % sort_order sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params) # Assembling the query if include_raw_crash: sql_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) else: sql_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Querying the DB with self.get_connection() as connection: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from reports.", connection=connection) # No need to call Postgres if we know there will be no results if total: if include_raw_crash: sql_query = wrapped_select % sql_query results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from reports", connection=connection) else: results = [] # Transforming the results into what we want fields = ( "date_processed", "uptime", "user_comments", "uuid", "uuid", # the uuid::text one "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "release_channel", "install_time", "duplicate_of", ) if include_raw_crash: fields += ("raw_crash", ) crashes = [] for row in results: crash = dict(zip(fields, row)) if include_raw_crash and crash['raw_crash']: crash['raw_crash'] = json.loads(crash['raw_crash']) for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return {"hits": crashes, "total": total}
def test_get_list(self): now = datetimeutil.utc_now() yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) report = Report(config=self.config) # Test 1 params = {'signature': 'sig1'} res = report.get_list(**params) self.assertEqual(res['total'], 5) # Test 2 params = { 'signature': 'sig1', 'products': 'WaterWolf', 'versions': 'WaterWolf:2.0' } res = report.get_list(**params) self.assertEqual(res['total'], 1) # Test 3 params = { 'signature': 'sig1', 'products': 'WaterWolf', 'versions': ['WaterWolf:1.0', 'WaterWolf:3.0'], 'os': 'win', 'build_ids': '20001212010203', 'reasons': 'STACK_OVERFLOW' } res = report.get_list(**params) self.assertEqual(res['total'], 2) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '4', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }, { 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '7', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'process_type': 'plugin', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '3.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 2 } self.assertEqual(res, res_expected) # Test 5 params = {'signature': 'this/is+a=C|signature'} res = report.get_list(**params) self.assertEqual(res['total'], 1) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WindBear', 'os_name': 'Linux', 'uuid': '10', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'this/is+a=C|signature', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 1 } self.assertEqual(res, res_expected) # Test 6: plugins params = { 'signature': 'sig1', 'report_process': 'plugin', 'plugin_in': 'filename', 'plugin_terms': 'NPSWF', 'plugin_search_mode': 'contains', } res = report.get_list(**params) self.assertEqual(res['total'], 1) # Test 7: plugins params = { 'signature': 'sig1', 'report_process': 'plugin', 'plugin_in': 'name', 'plugin_terms': 'Flash', 'plugin_search_mode': 'starts_with', } res = report.get_list(**params) self.assertEqual(res['total'], 1)
def setUp(self): super(IntegrationElasticsearchSearch, self).setUp() with self.get_config_manager().context() as config: self.storage = crashstorage.ElasticSearchCrashStorage(config) # clear the indices cache so the index is created on every test self.storage.indices_cache = set() now = datetimeutil.utc_now() yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) last_month = now - datetime.timedelta(weeks=4) last_month = datetimeutil.date_to_string(last_month) # insert data into elasticsearch default_crash_report = { 'uuid': 100, 'signature': 'js::break_your_browser', 'date_processed': yesterday, 'product': 'WaterWolf', 'version': '1.0', 'release_channel': 'release', 'os_name': 'Linux', 'build': '1234567890', 'reason': 'MOZALLOC_WENT_WRONG', 'hangid': None, 'process_type': None, } self.storage.save_processed(default_crash_report) self.storage.save_processed( dict(default_crash_report, uuid=1, product='EarthRaccoon') ) self.storage.save_processed( dict(default_crash_report, uuid=2, version='2.0') ) self.storage.save_processed( dict(default_crash_report, uuid=3, release_channel='aurora') ) self.storage.save_processed( dict(default_crash_report, uuid=4, os_name='Windows NT') ) self.storage.save_processed( dict(default_crash_report, uuid=5, build='0987654321') ) self.storage.save_processed( dict(default_crash_report, uuid=6, reason='VERY_BAD_EXCEPTION') ) self.storage.save_processed( dict(default_crash_report, uuid=7, hangid='12') ) self.storage.save_processed( dict(default_crash_report, uuid=8, process_type='plugin') ) self.storage.save_processed( dict(default_crash_report, uuid=9, signature='my_bad') ) self.storage.save_processed( dict( default_crash_report, uuid=10, date_processed=last_month, signature='my_little_signature', ) ) # for plugin terms test self.storage.save_processed( dict( default_crash_report, uuid=11, product='PluginSoft', process_type='plugin', PluginFilename='carly.dll', PluginName='Hey I just met you', PluginVersion='1.2', ) ) self.storage.save_processed( dict( default_crash_report, uuid=12, product='PluginSoft', process_type='plugin', PluginFilename='hey.dll', PluginName='Hey Plugin', PluginVersion='10.7.0.2a', ) ) self.storage.save_processed( dict( default_crash_report, uuid=13, product='EarlyOwl', version='11.0b1', release_channel='beta', ) ) self.storage.save_processed( dict( default_crash_report, uuid=14, product='EarlyOwl', version='11.0b2', release_channel='beta', ) ) # As indexing is asynchronous, we need to force elasticsearch to # make the newly created content searchable before we run the tests self.storage.es.refresh()
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Require that the list of fields be passed. if not kwargs.get('_fields'): raise MissingArgumentError('_fields') self.all_fields = kwargs['_fields'] # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = [] histogram_intervals = {} for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): # By default, all param values are turned into lists, # even when they have and can have only one value. # For those we know there can only be one value, # so we just extract it from the made-up list. if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] if results_number > 1000: raise BadArgumentError( '_results_number', msg=('_results_number cannot be greater ' 'than 1,000')) if results_number < 0: raise BadArgumentError( '_results_number', msg='_results_number cannot be negative') elif param.name == '_facets_size': facets_size = param.value[0] # Why cap it? # Because if the query is covering a lot of different # things you can get a really really large query # which can hog resources excessively. # Downloading, as an example, 100k facets (and 0 hits) # when there is plenty of data yields a 11MB JSON # file. if facets_size > 10000: raise BadArgumentError( '_facets_size greater than 10,000') for f in self.histogram_fields: if param.name == '_histogram_interval.%s' % f: histogram_intervals[f] = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = self.get_full_field_name(field_data) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] # Operators needing wildcards, and the associated value # transformation with said wildcards. operator_wildcards = { '~': '*%s*', # contains '^': '%s*', # starts with '$': '*%s' # ends with } # Operators needing ranges, and the associated Elasticsearch # comparison operator. operator_range = { '>': 'gt', '<': 'lt', '>=': 'gte', '<=': 'lte', } args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or ' ' not in val: # There's only one term and no white space, this # is a simple term filter. filter_value = val else: # If the term contains white spaces, we want to # perform a phrase query. filter_type = 'query' args = Q( 'simple_query_string', query=param.value[0], fields=[name], default_operator='and', ).to_dict() else: # There are several terms, this is a terms filter. filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_range: filter_type = 'range' filter_value = { operator_range[param.operator]: param.value } elif param.operator == '__null__': filter_type = 'missing' args['field'] = name elif param.operator == '__true__': filter_type = 'term' filter_value = True elif param.operator == '@': filter_type = 'regexp' if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_wildcards: filter_type = 'query' # Wildcard operations are better applied to a non-analyzed # field (called "full") if there is one. if field_data['has_full_version']: name = '%s.full' % name q_args = {} q_args[name] = (operator_wildcards[param.operator] % param.value) query = Q('wildcard', **q_args) args = query.to_dict() if filter_value is not None: args[name] = filter_value if args: new_filter = F(filter_type, **args) if param.operator_not: new_filter = ~new_filter if sub_filters is None: sub_filters = new_filter elif filter_type == 'range': sub_filters &= new_filter else: sub_filters |= new_filter continue if sub_filters is not None: filters.append(sub_filters) search = search.filter(F('bool', must=filters)) # Restricting returned fields. fields = [] # We keep track of the requested columns in order to make sure we # return those column names and not aliases for example. self.request_columns = [] for param in params['_columns']: for value in param.value: if not value: continue self.request_columns.append(value) field_name = self.get_field_name(value, full=False) fields.append(field_name) search = search.fields(fields) # Sorting. sort_fields = [] for param in params['_sort']: for value in param.value: if not value: continue # Values starting with a '-' are sorted in descending order. # In order to retrieve the database name of the field, we # must first remove the '-' part and add it back later. # Example: given ['product', '-version'], the results will be # sorted by ascending product then descending version. desc = False if value.startswith('-'): desc = True value = value[1:] field_name = self.get_field_name(value) if desc: # The underlying library understands that '-' means # sorting in descending order. field_name = '-' + field_name sort_fields.append(field_name) search = search.sort(*sort_fields) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. if facets_size: self._create_aggregations(params, search, facets_size, histogram_intervals) # Query and compute results. hits = [] if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } errors = [] # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = getattr(results, 'aggregations', {}) if aggregations: aggregations = self.format_aggregations(aggregations) shards = getattr(results, '_shards', {}) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise errors.append({ 'type': 'missing_index', 'index': missing_index, }) if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} shards = None break except RequestError as exception: # Try to handle it gracefully if we can find out what # input was bad and caused the exception. try: bad_input = ELASTICSEARCH_PARSE_EXCEPTION_REGEX.findall( exception.error)[-1] # Loop over the original parameters to try to figure # out which *key* had the bad input. for key, value in kwargs.items(): if value == bad_input: raise BadArgumentError(key) except IndexError: # Not an ElasticsearchParseException exception pass raise
def test_get_signature_history(self): api = Crashes(config=self.config) now = self.now lastweek = now - datetime.timedelta(days=7) params = { 'product': 'Firefox', 'version': '8.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) eq_(len(res['hits']), 2) eq_(len(res['hits']), res['total']) date = datetimeutil.date_to_string(now.date()) eq_(res['hits'][0]['date'], date) eq_(res['hits'][1]['date'], date) eq_(res['hits'][0]['count'], 5) eq_(res['hits'][1]['count'], 14) eq_(round(res['hits'][0]['percent_of_total'], 2), round(5.0 / 19.0 * 100, 2)) eq_(round(res['hits'][1]['percent_of_total'], 2), round(14.0 / 19.0 * 100, 2)) # Test no results params = { 'product': 'Firefox', 'version': '9.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) res_expected = {'hits': [], 'total': 0} eq_(res, res_expected) # Test default date parameters params = { 'product': 'Fennec', 'version': '11.0.1', 'signature': 'signature3', } res = api.get_signature_history(**params) res_expected = { 'hits': [{ 'date': now.date().isoformat(), 'count': 14, 'percent_of_total': 100 }], 'total': 1 } eq_(res, res_expected) # Test missing parameters assert_raises(MissingArgumentError, api.get_signature_history) assert_raises(MissingArgumentError, api.get_signature_history, **{'product': 'Firefox'}) assert_raises(MissingArgumentError, api.get_signature_history, **{ 'product': 'Firefox', 'version': '8.0' }) assert_raises(MissingArgumentError, api.get_signature_history, **{ 'signature': 'signature1', 'version': '8.0' })
def test_get(self): products = Products(config=self.config) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) lastweek_str = datetimeutil.date_to_string(lastweek) #...................................................................... # Test 1: find one exact match for one product and one version params = {"versions": "Firefox:8.0"} res = products.get(**params) res_expected = { "hits": [{ "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }], "total": 1 } self.assertEqual(res, res_expected) #...................................................................... # Test 2: Find two different products with their correct verions params = {"versions": ["Firefox:8.0", "Thunderbird:10.0.2b"]} res = products.get(**params) res_expected = { "hits": [{ "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }, { "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }], "total": 2 } self.assertEqual(res, res_expected) #...................................................................... # Test 3: empty result, no products:version found params = {"versions": "Firefox:14.0"} res = products.get(**params) res_expected = {"hits": [], "total": 0} self.assertEqual(res, res_expected) #...................................................................... # Test 4: Test products list is returned with no parameters # Note that the expired version is not returned params = {} res = products.get(**params) res_expected = { "products": ["Firefox", "Thunderbird", "Fennec"], "hits": { "Firefox": [{ "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False }], "Thunderbird": [{ "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False, }], "Fennec": [{ "product": "Fennec", "version": "11.0.1", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False }] }, "total": 3 } self.assertEqual(res, res_expected) #...................................................................... # Test 5: An unvalid versions list is passed, all versions are returned params = {'versions': [1]} res = products.get(**params) res_expected = { "hits": [{ "product": "Fennec", "version": "11.0.1", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }, { "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }, { "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False }], "total": 3 } self.assertEqual(res, res_expected)
def test_get_exploitibility_with_pagination(self): crashes = Crashes(config=self.config) yesterday_date = (self.now - datetime.timedelta(days=1)).date() day_before_yesterday = (self.now - datetime.timedelta(days=2)).date() j = 100 # some number so it's not used by other tests or fixtures rand = lambda: random.randint(0, 10) exploit_values = [] signature_values = [] for day in day_before_yesterday, yesterday_date, self.now: for i in range(10): exploit_values.append( "(%s, 3, 'Signature%s%s', '%s', %s, %s, %s, %s, %s)" % (j + 1, j, i, day, rand(), rand(), rand(), rand(), rand())) signature_values.append( "(%s, 'Signature%s%s', %s, '%s')" % (j + 1, j, i, day.strftime('%Y%m%d%H'), day)) j += 1 cursor = self.connection.cursor() insert = """ INSERT INTO signatures (signature_id, signature, first_build, first_report) VALUES """ insert += ',\n'.join(signature_values) cursor.execute(insert) insert = """ INSERT INTO exploitability_reports (signature_id, product_version_id, signature, report_date, null_count, none_count, low_count, medium_count, high_count) VALUES """ insert += ',\n'.join(exploit_values) cursor.execute(insert) self.connection.commit() res = crashes.get_exploitability() self.assertEqual(len(res['hits']), res['total']) self.assertTrue(res['total'] >= 3 * 10) res = crashes.get_exploitability(start_date=yesterday_date, end_date=self.now) self.assertEqual(len(res['hits']), res['total']) self.assertTrue(res['total'] >= 2 * 10) self.assertTrue(res['total'] < 3 * 10) # passing a `page` without `batch` will yield an error self.assertRaises(MissingArgumentError, crashes.get_exploitability, page=2) # `page` starts on one so anything smaller is bad self.assertRaises(BadArgumentError, crashes.get_exploitability, page=0, batch=15) # Note, `page=1` is on number line starting on 1 res = crashes.get_exploitability(page=1, batch=15) self.assertNotEqual(len(res['hits']), res['total']) self.assertEqual(len(res['hits']), 15) self.assertTrue(res['total'] >= 3 * 10) # since it's ordered by `report_date`... report_dates = [x['report_date'] for x in res['hits']] self.assertEqual(report_dates[0], datetimeutil.date_to_string(self.now.date())) self.assertEqual(report_dates[-1], datetimeutil.date_to_string(yesterday_date)) res = crashes.get_exploitability(page=2, batch=5, start_date=day_before_yesterday, end_date=yesterday_date) self.assertEqual(len(res['hits']), 5) self.assertTrue(res['total'] >= 2 * 10) self.assertTrue(res['total'] < 3 * 10) report_dates = [x['report_date'] for x in res['hits']] self.assertEqual(report_dates[0], datetimeutil.date_to_string(yesterday_date))
def get_signature_history(self, **kwargs): """Return the history of a signature. See http://socorro.readthedocs.org/en/latest/middleware.html """ now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(days=7) filters = [ ('product', None, 'str'), ('version', None, 'str'), ('signature', None, 'str'), ('end_date', now, 'datetime'), ('start_date', lastweek, 'datetime'), ] params = external_common.parse_arguments(filters, kwargs) for param in ('product', 'version', 'signature'): if not params[param]: raise MissingArgumentError(param) if params.signature == '##null##': signature_where = 'AND signature IS NULL' else: signature_where = 'AND signature = %(signature)s' if params.signature == '##empty##': params.signature = '' sql = """ /* external.postgresql.crashes.Crashes.get_signature_history */ WITH hist AS ( SELECT report_date, report_count FROM tcbs JOIN signatures using (signature_id) JOIN product_versions using (product_version_id) WHERE report_date BETWEEN %%(start_date)s AND %%(end_date)s AND product_name = %%(product)s AND version_string = %%(version)s %s GROUP BY report_date, report_count ORDER BY 1 ), scaling_window AS ( SELECT hist.*, SUM(report_count) over () AS total_crashes FROM hist ) SELECT report_date, report_count, report_count / total_crashes::float * 100 AS percent_of_total FROM scaling_window ORDER BY report_date DESC """ % signature_where error_message = 'Failed to retrieve signature history from PostgreSQL' results = self.query(sql, params, error_message=error_message) # Transforming the results into what we want history = [] for row in results: dot = dict(zip(('date', 'count', 'percent_of_total'), row)) dot['date'] = datetimeutil.date_to_string(dot['date']) history.append(dot) return { 'hits': history, 'total': len(history) }