def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBaseWithFields( config=config, ) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() ok_('date' in params) eq_(len(params['date']), 2) # Pass only the high value args = { 'date': '<%s' % datetimeutil.date_to_string(now) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_( params['date'][1].value.date(), now.date() - datetime.timedelta(days=7) ) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = { 'date': '>=%s' % datetimeutil.date_to_string(pasttime) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<=') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date()) # Pass the two values pasttime = now - datetime.timedelta(days=10) args = { 'date': [ '<%s' % datetimeutil.date_to_string(now), '>%s' % datetimeutil.date_to_string(pasttime), ] } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date())
def twoPeriodTopCrasherComparison( databaseConnection, context, closestEntryFunction=latestEntryBeforeOrEqualTo, listOfTopCrashersFunction=getListOfTopCrashersBySignature): try: context['logger'].debug('entered twoPeriodTopCrasherComparison') except KeyError: context['logger'] = util.SilentFakeLogger() assertions = ['to_date', 'duration', 'product', 'version'] for param in assertions: assert param in context, ( "%s is missing from the configuration" % param) context['numberOfComparisonPoints'] = 2 if not context['limit']: context['limit'] = 100 #context['logger'].debug('about to latestEntryBeforeOrEqualTo') context['to_date'] = closestEntryFunction(databaseConnection, context['to_date'], context['product'], context['version']) context['logger'].debug('New to_date: %s' % context['to_date']) context['startDate'] = context.to_date - (context.duration * context.numberOfComparisonPoints) #context['logger'].debug('after %s' % context) listOfTopCrashers = listOfListsWithChangeInRank( rangeOfQueriesGenerator( databaseConnection, context, listOfTopCrashersFunction))[0] #context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers) totalNumberOfCrashes = totalPercentOfTotal = 0 for x in listOfTopCrashers: if 'total_crashes' in x: totalNumberOfCrashes = x['total_crashes'] del x['total_crashes'] totalPercentOfTotal += x.get('percentOfTotal', 0) result = { 'crashes': listOfTopCrashers, 'start_date': datetimeutil.date_to_string( context.to_date - context.duration ), 'end_date': datetimeutil.date_to_string(context.to_date), 'totalNumberOfCrashes': totalNumberOfCrashes, 'totalPercentage': totalPercentOfTotal, } #logger.debug("about to return %s", result) return result
def post(self, **kwargs): params = external_common.parse_arguments(self.filters, kwargs) if not params['signatures']: raise MissingArgumentError('signatures') sql_params = [tuple(params['signatures'])] sql = """ SELECT signature, first_report AS first_date, first_build FROM signatures WHERE signature IN %s """ error_message = 'Failed to retrieve signatures from PostgreSQL' results = self.query(sql, sql_params, error_message=error_message) signatures = [] for sig in results.zipped(): sig['first_date'] = datetimeutil.date_to_string(sig['first_date']) signatures.append(sig) return { 'hits': signatures, 'total': len(signatures) }
def test_get_exploitibility_by_report_date(self): crashes = Crashes(config=self.config) yesterday_date = (self.now - datetime.timedelta(days=1)).date() yesterday = datetimeutil.date_to_string(yesterday_date) res_expected = { "hits": [ { "signature": "canIhaveYourSignature()", "null_count": 2, "none_count": 2, "low_count": 2, "medium_count": 2, "high_count": 2 }, { "signature": "ofCourseYouCan()", "null_count": 4, "none_count": 3, "low_count": 2, "medium_count": 1, "high_count": 0 } ], "total": 2, } res = crashes.get_exploitability( start_date=yesterday, end_date=yesterday ) eq_(res, res_expected)
def test_date_to_string(): # Datetime with timezone date = datetime.datetime(2012, 1, 3, 12, 23, 34, tzinfo=UTC) res_exp = '2012-01-03T12:23:34+00:00' res = datetimeutil.date_to_string(date) eq_(res, res_exp) # Datetime without timezone date = datetime.datetime(2012, 1, 3, 12, 23, 34) res_exp = '2012-01-03T12:23:34' res = datetimeutil.date_to_string(date) eq_(res, res_exp) # Date (no time, no timezone) date = datetime.date(2012, 1, 3) res_exp = '2012-01-03' res = datetimeutil.date_to_string(date) eq_(res, res_exp)
def test_twoPeriodTopCrasherComparisonLimited(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14)) self.params.limit = 1 res = tcbs.twoPeriodTopCrasherComparison( self.connection, self.params ) res_expected = { 'totalPercentage': 0.58333333333333304, 'end_date': lastweek_str, 'start_date': two_weeks, 'crashes': [{ 'count': 14L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.58333333333333304, 'changeInRank': 'new', 'is_gc_count': 1L, 'win_count': 12L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }], 'totalNumberOfCrashes': 24L } eq_(res, res_expected)
def test_get_adu_by_signature(self): crashes = Crashes(config=self.config) signature = "canIhaveYourSignature()" channel = "release" yesterday_date = (self.now - datetime.timedelta(days=1)).date() yesterday = datetimeutil.date_to_string(yesterday_date) res_expected = { "hits": [ { "product_name": "WaterWolf", "signature": signature, "adu_date": yesterday, "build_date": "2014-03-01", "buildid": '201403010101', "crash_count": 3, "adu_count": 1023, "os_name": "Mac OS X", "channel": channel, }, { "product_name": "WaterWolf", "signature": signature, "adu_date": yesterday, "build_date": "2014-04-01", "buildid": '201404010101', "crash_count": 4, "adu_count": 1024, "os_name": "Windows NT", "channel": channel, }, ], "total": 2, } res = crashes.get_adu_by_signature( product_name="WaterWolf", start_date=yesterday, end_date=yesterday, signature=signature, channel=channel, ) eq_(res, res_expected) assert_raises( BadArgumentError, crashes.get_adu_by_signature, start_date=(yesterday_date - datetime.timedelta(days=366)), end_date=yesterday, signature=signature, channel=channel )
def test_get_list_with_sort(self): now = self.now yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) report = Report(config=self.config) base_params = { 'signature': 'sig1', 'from_date': yesterday, 'to_date': now, } res = report.get_list(**base_params) # by default it's sorted by date_processed eq_( res['hits'], sorted(res['hits'], key=lambda x: x['date_processed']) ) # now sort by 'reason' res = report.get_list(**dict(base_params, sort='reason')) eq_( res['hits'], sorted(res['hits'], key=lambda x: x['reason']) ) res = report.get_list(**dict( base_params, sort='reason', reverse=True )) # be default it's sorted by date_processed eq_( res['hits'], sorted(res['hits'], key=lambda x: x['reason'], reverse=True) ) assert_raises( BadArgumentError, report.get_list, **dict( base_params, sort='neverheardof' ) )
def get(self, **kwargs): """Return the current state of all Crontabber jobs""" sql = """ /* socorro.external.postgresql.crontabber_state.CrontabberState.get */ SELECT app_name, next_run, first_run, last_run, last_success, error_count, depends_on, last_error, ongoing FROM crontabber ORDER BY app_name """ error_message = ( "Failed to retrieve crontabber state data from PostgreSQL" ) results = self.query(sql, error_message=error_message) state = {} for row in results.zipped(): app_name = row.pop('app_name') state[app_name] = row possible_datetimes = ( 'next_run', 'first_run', 'last_run', 'last_success', 'ongoing' ) for key in possible_datetimes: value = state[app_name][key] if value is None: continue state[app_name][key] = datetimeutil.date_to_string(value) return {"state": state}
def setup_data(self): now = self.now.date() yesterday = now - datetime.timedelta(days=1) lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) yesterday_str = datetimeutil.date_to_string(yesterday) lastweek_str = datetimeutil.date_to_string(lastweek) self.test_source_data = { # Test 1: find exact match for one product version and signature 'products': { 'params': { "versions": "Firefox:8.0", "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [ { "product_name": 'Firefox', "version_string": "8.0", "report_count": 1, "percentage": '50.000', }, { "product_name": 'Firefox', "version_string": "9.0", "report_count": 1, "percentage": '50.000', } ], }, # Test 2: find ALL matches for all product versions and signature 'products_no_version': { 'params': { "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [ { "product_name": 'Firefox', "version_string": "8.0", "report_count": 1, "percentage": '50.000', }, { "product_name": 'Firefox', "version_string": "9.0", "report_count": 1, "percentage": '50.000', } ], }, # Test 3: find architectures reported for a given version and a # signature 'architecture': { 'params': { "versions": "Firefox:8.0", "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'amd64', "report_count": 1.0, "percentage": "100.000", }], }, # Test 4: find architectures reported for a signature with no # specific version. 'architecture_no_version': { 'params': { "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'amd64', "report_count": 2, "percentage": '100.000', }], }, # Test 5: find flash_versions reported for specific version and # a signature 'flash_versions': { 'params': { "versions": "Firefox:8.0", "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '1.0', "report_count": 1.0, "percentage": "100.000", }], }, # Test 6: find flash_versions reported with a signature and without # a specific version 'flash_versions_no_version': { 'params': { "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '1.0', "report_count": 2.0, "percentage": "100.000", }], }, # Test 7: find installations reported for a signature 'distinct_install': { 'params': { "versions": "Firefox:8.0", "report_type": "distinct_install", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "product_name": 'Firefox', "version_string": '8.0', "crashes": 10, "installations": 8, }], }, # Test 8: find os_version_strings reported for a signature 'os': { 'params': { "versions": "Firefox:8.0", "report_type": "os", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'Windows NT 6.4', "report_count": 1, "percentage": "100.000", }], }, # Test 9: find process_type reported for a signature 'process_type': { 'params': { "versions": "Firefox:8.0", "report_type": "process_type", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": 'plugin', "report_count": 1, "percentage": "100.000", }], }, # Test 10: find uptime reported for signature 'uptime': { 'params': { "versions": "Firefox:8.0", "report_type": "uptime", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ "category": '15-30 minutes', "report_count": 1, "percentage": '100.000', }], }, # Test 11: find exploitability reported for signature 'exploitability': { 'params': { "versions": "Firefox:8.0", "report_type": "exploitability", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, 'res_expected': [{ 'low_count': 3, 'high_count': 5, 'null_count': 1, 'none_count': 2, 'report_date': yesterday_str, 'medium_count': 4, }], }, # Test 12: find mobile devices reported for signature with a # specific version 'devices': { 'params': { "versions": "Firefox:8.0", 'report_type': 'devices', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'cpu_abi': 'armeabi-v7a', 'manufacturer': 'samsung', 'model': 'GT-P5100', 'version': '16 (REL)', 'report_count': 123, 'percentage': '100.000', }], }, # Test 13: find mobile devices reported for signature 'devices_no_version': { 'params': { 'report_type': 'devices', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'cpu_abi': 'armeabi-v7a', 'manufacturer': 'samsung', 'model': 'GT-P5100', 'version': '16 (REL)', 'report_count': 246, 'percentage': '100.000', }], }, # Test 14: find mobile devices reported for signature 'graphics': { 'params': { "versions": "Firefox:8.0", 'report_type': 'graphics', 'signature': 'Fake Signature #1', 'start_date': lastweek_str, 'end_date': now_str, }, 'res_expected': [{ 'vendor_hex': '0x1234', 'adapter_hex': '0x5678', 'vendor_name': 'Test Vendor', 'adapter_name': 'Test Adapter', 'report_count': 123, 'percentage': '100.000', }], }, }
def get_signature_history(self, **kwargs): """Return the history of a signature. See http://socorro.readthedocs.org/en/latest/middleware.html """ now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(days=7) filters = [ ('product', None, 'str'), ('version', None, 'str'), ('signature', None, 'str'), ('end_date', now, 'datetime'), ('start_date', lastweek, 'datetime'), ] params = external_common.parse_arguments(filters, kwargs) for param in ('product', 'version', 'signature'): if not params[param]: raise MissingArgumentError(param) if params.signature == '##null##': signature_where = 'AND signature IS NULL' else: signature_where = 'AND signature = %(signature)s' if params.signature == '##empty##': params.signature = '' sql = """ /* external.postgresql.crashes.Crashes.get_signature_history */ WITH hist AS ( SELECT report_date, report_count FROM tcbs JOIN signatures using (signature_id) JOIN product_versions using (product_version_id) WHERE report_date BETWEEN %%(start_date)s AND %%(end_date)s AND product_name = %%(product)s AND version_string = %%(version)s %s GROUP BY report_date, report_count ORDER BY 1 ), scaling_window AS ( SELECT hist.*, SUM(report_count) over () AS total_crashes FROM hist ) SELECT report_date AS date, report_count AS count, report_count / total_crashes::float * 100 AS percent_of_total FROM scaling_window ORDER BY report_date DESC """ % signature_where error_message = 'Failed to retrieve signature history from PostgreSQL' results = self.query(sql, params, error_message=error_message) # Transforming the results into what we want history = [] for dot in results.zipped(): dot['date'] = datetimeutil.date_to_string(dot['date']) history.append(dot) return { 'hits': history, 'total': len(history) }
def test_twoPeriodTopCrasherComparison(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14)) res = tcbs.twoPeriodTopCrasherComparison( self.connection, self.params ) res_expected = { 'totalPercentage': 1.0, 'end_date': lastweek_str, 'start_date': two_weeks, 'crashes': [{ 'count': 14L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.58333333333333304, 'changeInRank': 'new', 'is_gc_count': 1L, 'win_count': 12L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }, { 'count': 10L, 'mac_count': 2L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 'null', 'currentRank': 1, 'startup_percent': None, 'versions': 'plugin1, plugin2, plugin3, ' 'plugin4, plugin5, plugin6', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.41666666666666702, 'changeInRank': 'new', 'is_gc_count': 3L, 'win_count': 7L, 'changeInPercentOfTotal': 'new', 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #2', 'versions_count': 6, 'previousPercentOfTotal': 'null', 'plugin_count': 0 }], 'totalNumberOfCrashes': 24L } eq_(res, res_expected)
def test_listOfListsWithChangeInRank(self): lastweek = self.now - datetime.timedelta(days=7) lastweek_str = datetimeutil.date_to_string(lastweek.date()) params = self.params params.startDate = self.now.date() - datetime.timedelta(days=14) query_list = tcbs.getListOfTopCrashersBySignature query_range = tcbs.rangeOfQueriesGenerator( self.connection, self.params, query_list ) res = tcbs.listOfListsWithChangeInRank(query_range) res_expected = [[{ 'count': 5L, 'mac_count': 0L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 0, 'currentRank': 0, 'startup_percent': None, 'versions': 'plugin1, plugin2', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.625, 'changeInRank': 0, 'is_gc_count': 10L, 'win_count': 0L, 'changeInPercentOfTotal': 0.041666666666666963, 'linux_count': 5L, 'hang_count': 5L, 'signature': 'Fake Signature #1', 'versions_count': 2, 'previousPercentOfTotal': 0.58333333333333304, 'plugin_count': 0, 'total_crashes': 8 }, { 'count': 3L, 'mac_count': 1L, 'content_count': 0, 'first_report': lastweek_str, 'previousRank': 1, 'currentRank': 1, 'startup_percent': None, 'versions': 'plugin1, plugin2, plugin3, plugin4, plugin5, plugin6', 'first_report_exact': lastweek_str + ' 00:00:00', 'percentOfTotal': 0.375, 'changeInRank': 0, 'is_gc_count': 1L, 'win_count': 1L, 'changeInPercentOfTotal': -0.041666666666667018, 'linux_count': 1L, 'hang_count': 0L, 'signature': 'Fake Signature #2', 'versions_count': 6, 'previousPercentOfTotal': 0.41666666666666702, 'plugin_count': 0, 'total_crashes': 8 }]]
def test_get(self): signature_urls = SignatureURLs(config=self.config) now = self.now now = datetime.datetime(now.year, now.month, now.day) now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: find one exact match for products and versions passed params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }], "total": 1 } eq_(res, res_expected) #...................................................................... # Test 2: Raise error if parameter is not passed params = { "signature": "", "start_date": "", "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } assert_raises(MissingArgumentError, signature_urls.get, **params) #...................................................................... # Test 3: Query returning no results params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Fennec'], "versions": ["Fennec:10.0", "Fennec:11.0"] } res = signature_urls.get(**params) res_expected = {"hits": [], "total": 0} eq_(res, res_expected) # Test 4: Return results for all version of Firefox params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 }], "total": 2 } eq_(res, res_expected) # Test 5: Return results for all products and versions params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['ALL'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [{ "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 }, { "url": "http://arewemobileyet.org/", "crash_count": 1 }], "total": 3 } eq_(res, res_expected) # Test when we send incorrectly formatted 'versions' parameter params = { "signature": 'Does not exist', "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ['27.0a1'] } assert_raises(BadArgumentError, signature_urls.get, **params)
def test_get_signature_history(self): api = Crashes(config=self.config) now = self.now lastweek = now - datetime.timedelta(days=7) params = { 'product': 'Firefox', 'version': '8.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) eq_(len(res['hits']), 2) eq_(len(res['hits']), res['total']) date = datetimeutil.date_to_string(now.date()) eq_(res['hits'][0]['date'], date) eq_(res['hits'][1]['date'], date) eq_(res['hits'][0]['count'], 5) eq_(res['hits'][1]['count'], 14) eq_( round(res['hits'][0]['percent_of_total'], 2), round(5.0 / 19.0 * 100, 2) ) eq_( round(res['hits'][1]['percent_of_total'], 2), round(14.0 / 19.0 * 100, 2) ) # Test no results params = { 'product': 'Firefox', 'version': '9.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) res_expected = { 'hits': [], 'total': 0 } eq_(res, res_expected) # Test default date parameters params = { 'product': 'Fennec', 'version': '11.0.1', 'signature': 'signature3', } res = api.get_signature_history(**params) res_expected = { 'hits': [ { 'date': now.date().isoformat(), 'count': 14, 'percent_of_total': 100 } ], 'total': 1 } eq_(res, res_expected) # Test missing parameters assert_raises( MissingArgumentError, api.get_signature_history ) assert_raises( MissingArgumentError, api.get_signature_history, **{'product': 'Firefox'} ) assert_raises( MissingArgumentError, api.get_signature_history, **{'product': 'Firefox', 'version': '8.0'} ) assert_raises( MissingArgumentError, api.get_signature_history, **{'signature': 'signature1', 'version': '8.0'} )
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Both `from_date` and `to_date` (and their aliases `from` and `to`) are required and can not be greater than 30 days apart. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if not kwargs.get('from_date'): raise MissingArgumentError('from_date') if not kwargs.get('to_date'): raise MissingArgumentError('to_date') from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date']) to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date']) span_days = (to_date - from_date).days if span_days > 30: raise BadArgumentError( 'Span between from_date and to_date can not be more than 30' ) # start with the default sort_order = { 'key': 'date_processed', 'direction': 'DESC' } if 'sort' in kwargs: sort_order['key'] = kwargs.pop('sort') _recognized_sort_orders = ( 'date_processed', 'uptime', 'user_comments', 'uuid', 'uuid_text', 'product', 'version', 'build', 'signature', 'url', 'os_name', 'os_version', 'cpu_name', 'cpu_info', 'address', 'reason', 'last_crash', 'install_age', 'hangid', 'process_type', 'release_channel', 'install_time', 'duplicate_of', ) if sort_order['key'] not in _recognized_sort_orders: raise BadArgumentError( '%s is not a recognized sort order key' % sort_order['key'] ) sort_order['direction'] = 'ASC' if str(kwargs.get('reverse', '')).lower() == 'true': if kwargs.pop('reverse'): sort_order['direction'] = 'DESC' include_raw_crash = kwargs.get('include_raw_crash') or False params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"] ) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions( params["versions"], params["products"] ) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid::uuid, r.uuid as uuid_text, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, r.release_channel, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time """ if include_raw_crash: pass else: sql_select += """ , rd.duplicate_of """ wrapped_select = """ WITH report_slice AS ( %s ), dupes AS ( SELECT report_slice.uuid, rd.duplicate_of FROM reports_duplicates rd JOIN report_slice ON report_slice.uuid_text = rd.uuid WHERE rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s ) SELECT rs.*, dupes.duplicate_of, rc.raw_crash FROM report_slice rs LEFT OUTER JOIN dupes USING (uuid) LEFT OUTER JOIN raw_crashes rc ON rs.uuid = rc.uuid AND rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s """ sql_from = self.build_reports_sql_from(params) if not include_raw_crash: sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from sql_where, sql_params = self.build_reports_sql_where( params, sql_params, self.context ) sql_order = """ ORDER BY %(key)s %(direction)s """ % sort_order sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params ) # Assembling the query if include_raw_crash: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) else: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) # Query for counting the results sql_count_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where) ) # Querying the DB with self.get_connection() as connection: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from reports.", connection=connection ) # No need to call Postgres if we know there will be no results if total: if include_raw_crash: sql_query = wrapped_select % sql_query results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from reports", connection=connection ).zipped() else: results = [] crashes = [] for crash in results: assert crash['uuid'] == crash['uuid_text'] crash.pop('uuid_text') if not include_raw_crash and 'raw_crash' in crash: crash.pop('raw_crash') for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return { "hits": crashes, "total": total }
def test_get_list(self): now = self.now yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) report = Report(config=self.config) base_params = { 'signature': 'sig1', 'from_date': yesterday, 'to_date': now, } # Basic test res = report.get_list(**base_params) eq_(res['total'], 5) eq_(len(res['hits']), 5) duplicates_map = dict( (x['uuid'], x['duplicate_of']) for x in res['hits'] if x['duplicate_of'] ) eq_( duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'], '60597bdc-5dbe-4409-6b38-4309c0130833' ) # Test with products and versions params = dict( base_params, products='WaterWolf', versions='WaterWolf:2.0', ) res = report.get_list(**params) eq_(res['total'], 1) # Test with os, build_ids and reasons params = dict( base_params, products='WaterWolf', versions=['WaterWolf:1.0', 'WaterWolf:3.0'], os='win', build_ids='20001212010203', reasons='STACK_OVERFLOW', ) res = report.get_list(**params) eq_(res['total'], 2) res_expected = { 'hits': [ { 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }, { 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'plugin', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '3.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None } ], 'total': 2 } eq_(res, res_expected) # Test with a signature with strange characters params = dict( base_params, signature='this/is+a=C|signature', ) res = report.get_list(**params) eq_(res['total'], 1) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WindBear', 'os_name': 'Linux', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'this/is+a=C|signature', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 1 } eq_(res, res_expected) # Test plugins params = dict( base_params, report_process='plugin', plugin_in='filename', plugin_terms='NPSWF', plugin_search_mode='contains', ) res = report.get_list(**params) eq_(res['total'], 1) # Test plugins with 'starts_with' search mode params = dict( base_params, report_process='plugin', plugin_in='name', plugin_terms='Flash', plugin_search_mode='starts_with', ) res = report.get_list(**params) eq_(res['total'], 1)
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.middleware.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_count = """ SELECT COUNT(r.uuid) """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params) sql_count = " ".join( ("/* external.postgresql.crashes.Crashes.get_comments */", sql_count, sql_from, sql_where)) count = self.count(sql_count, sql_params) comments = [] if count: # Assembling the query sql_query = " ".join( ("/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order, sql_limit)) error_message = "Failed to retrieve comments from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) # Transforming the results into what we want for comment in results.zipped(): comment['date_processed'] = datetimeutil.date_to_string( comment['date_processed']) comments.append(comment) return {"hits": comments, "total": count}
def get_signature_history(self, **kwargs): """Return the history of a signature. See https://socorro.readthedocs.io/en/latest/middleware.html """ now = datetimeutil.utc_now() lastweek = now - datetime.timedelta(days=7) filters = [ ('product', None, 'str'), ('version', None, 'str'), ('signature', None, 'str'), ('end_date', now, 'datetime'), ('start_date', lastweek, 'datetime'), ] params = external_common.parse_arguments(filters, kwargs) for param in ('product', 'version', 'signature'): if not params[param]: raise MissingArgumentError(param) if params.signature == '##null##': signature_where = 'AND signature IS NULL' else: signature_where = 'AND signature = %(signature)s' if params.signature == '##empty##': params.signature = '' sql = """ /* external.postgresql.crashes.Crashes.get_signature_history */ WITH hist AS ( SELECT report_date, report_count FROM tcbs JOIN signatures using (signature_id) JOIN product_versions using (product_version_id) WHERE report_date BETWEEN %%(start_date)s AND %%(end_date)s AND product_name = %%(product)s AND version_string = %%(version)s %s GROUP BY report_date, report_count ORDER BY 1 ), scaling_window AS ( SELECT hist.*, SUM(report_count) over () AS total_crashes FROM hist ) SELECT report_date AS date, report_count AS count, report_count / total_crashes::float * 100 AS percent_of_total FROM scaling_window ORDER BY report_date DESC """ % signature_where error_message = 'Failed to retrieve signature history from PostgreSQL' results = self.query(sql, params, error_message=error_message) # Transforming the results into what we want history = [] for dot in results.zipped(): dot['date'] = datetimeutil.date_to_string(dot['date']) history.append(dot) return {'hits': history, 'total': len(history)}
def get_daily(self, **kwargs): """Return crashes by active daily users. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("versions", None, ["list", "str"]), ("from_date", lastweek, "date"), ("to_date", now, "date"), ("os", None, ["list", "str"]), ("report_type", None, ["list", "str"]), ("date_range_type", "date", "str"), ] # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = external_common.parse_arguments(filters, kwargs) if not params.product: raise MissingArgumentError('product') if not params.versions or not params.versions[0]: raise MissingArgumentError('versions') params.versions = tuple(params.versions) # simple version, for home page graphs mainly if ((not params.os or not params.os[0]) and (not params.report_type or not params.report_type[0])): if params.date_range_type == "build": table_to_use = "home_page_graph_build_view" date_range_field = "build_date" else: table_to_use = "home_page_graph_view" date_range_field = "report_date" db_fields = ("product_name", "version_string", date_range_field, "report_count", "adu", "crash_hadu") out_fields = ("product", "version", "date", "report_count", "adu", "crash_hadu") sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use } # complex version, for daily crashes page mainly else: if params.date_range_type == "build": table_to_use = "crashes_by_user_build_view" date_range_field = "build_date" else: table_to_use = "crashes_by_user_view" date_range_field = "report_date" db_fields = [ "product_name", "version_string", date_range_field, "sum(adjusted_report_count)::bigint as report_count", "sum(adu)::bigint as adu", """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint, avg(throttle)) as crash_hadu""", "avg(throttle) as throttle" ] out_fields = [ "product", "version", "date", "report_count", "adu", "crash_hadu", "throttle" ] db_group = ["product_name", "version_string", date_range_field] sql_where = [] if params.os and params.os[0]: sql_where.append("os_short_name IN %(os)s") params.os = tuple(x[0:3].lower() for x in params.os) if params.report_type and params.report_type[0]: sql_where.append("crash_type_short IN %(report_type)s") params.report_type = tuple(params.report_type) if sql_where: sql_where = "AND %s" % " AND ".join(sql_where) else: sql_where = '' sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM ( SELECT product_name, version_string, %(date_range_field)s, os_name, os_short_name, SUM(report_count)::int as report_count, SUM(adjusted_report_count)::int as adjusted_report_count, MAX(adu) as adu, AVG(throttle) as throttle FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s %(sql_where)s GROUP BY product_name, version_string, %(date_range_field)s, os_name, os_short_name ) as aggregated_crashes_by_user """ % { "db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use, "sql_where": sql_where } if db_group: sql = "%s GROUP BY %s" % (sql, ", ".join(db_group)) error_message = "Failed to retrieve daily crashes data from PostgreSQL" results = self.query(sql, params, error_message=error_message) hits = {} for row in results: daily_data = dict(zip(out_fields, row)) if "throttle" in daily_data: daily_data["throttle"] = float(daily_data["throttle"]) daily_data["crash_hadu"] = float(daily_data["crash_hadu"]) daily_data["date"] = datetimeutil.date_to_string( daily_data["date"]) key = "%s:%s" % (daily_data["product"], daily_data["version"]) if "os_short" in daily_data: del daily_data["os_short"] if key not in hits: hits[key] = {} hits[key][daily_data["date"]] = daily_data return {"hits": hits}
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.middleware.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_count = """ SELECT COUNT(r.uuid) """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params ) sql_count = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_count, sql_from, sql_where) ) count = self.count(sql_count, sql_params) comments = [] if count: # Assembling the query sql_query = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) error_message = "Failed to retrieve comments from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) # Transforming the results into what we want for comment in results.zipped(): comment['date_processed'] = datetimeutil.date_to_string( comment['date_processed'] ) comments.append(comment) return { "hits": comments, "total": count }
def test_get_signature_history(self): api = Crashes(config=self.config) now = self.now lastweek = now - datetime.timedelta(days=7) params = { 'product': 'Firefox', 'version': '8.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) eq_(len(res['hits']), 2) eq_(len(res['hits']), res['total']) date = datetimeutil.date_to_string(now.date()) eq_(res['hits'][0]['date'], date) eq_(res['hits'][1]['date'], date) eq_(res['hits'][0]['count'], 5) eq_(res['hits'][1]['count'], 14) eq_(round(res['hits'][0]['percent_of_total'], 2), round(5.0 / 19.0 * 100, 2)) eq_(round(res['hits'][1]['percent_of_total'], 2), round(14.0 / 19.0 * 100, 2)) # Test no results params = { 'product': 'Firefox', 'version': '9.0', 'signature': 'signature1', 'start_date': lastweek, 'end_date': now, } res = api.get_signature_history(**params) res_expected = {'hits': [], 'total': 0} eq_(res, res_expected) # Test default date parameters params = { 'product': 'Fennec', 'version': '11.0.1', 'signature': 'signature3', } res = api.get_signature_history(**params) res_expected = { 'hits': [{ 'date': now.date().isoformat(), 'count': 14, 'percent_of_total': 100 }], 'total': 1 } eq_(res, res_expected) # Test missing parameters assert_raises(MissingArgumentError, api.get_signature_history) assert_raises(MissingArgumentError, api.get_signature_history, **{'product': 'Firefox'}) assert_raises(MissingArgumentError, api.get_signature_history, **{ 'product': 'Firefox', 'version': '8.0' }) assert_raises(MissingArgumentError, api.get_signature_history, **{ 'signature': 'signature1', 'version': '8.0' })
def setup_data(self): self.now = datetimeutil.utc_now() now = self.now.date() yesterday = now - datetime.timedelta(days=1) lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) yesterday_str = datetimeutil.date_to_string(yesterday) lastweek_str = datetimeutil.date_to_string(lastweek) self.test_source_data = { # Test backfill_adu 'adu': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_all_dups 'all_dups': { 'params': { "start_date": yesterday_str, "end_date": now_str, }, 'res_expected': [(True,)], }, # Test backfill_build_adu 'build_adu': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_correlations 'correlations': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_crashes_by_user_build 'crashes_by_user_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_crashes_by_user 'crashes_by_user': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # TODO: Test backfill_daily_crashes tries to insert into a table # that do not exists. It can be fixed by creating a temporary one. #'daily_crashes': { # 'params': { # "update_day": now_str, # }, # 'res_expected': [(True,)], # }, # Test backfill_exploitability 'exploitability': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_home_page_graph_build 'home_page_graph_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_home_page_graph 'home_page_graph': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_matviews 'matviews': { 'params': { "start_date": yesterday_str, "reports_clean": 'false', }, 'res_expected': [(True,)], }, # Test backfill_rank_compare 'rank_compare': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_reports_clean 'reports_clean': { 'params': { "start_date": yesterday_str, "end_date": now_str, }, 'res_expected': [(True,)], }, # TODO: Test backfill_reports_duplicates tries to insert into a # table that do not exists. It can be fixed by using the update # function inside of the backfill. #'reports_duplicates': { # 'params': { # "start_date": yesterday_str, # "end_date": now_str, # }, # 'res_expected': [(True,)], # }, # TODO: Test backfill_signature_counts tries to insert into # tables and to update functions that does not exist. #'signature_counts': { # 'params': { # "start_date": yesterday_str, # "end_date": now_str, # }, # 'res_expected': [(True,)], # }, # Test backfill_tcbs_build 'tcbs_build': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_tcbs 'tcbs': { 'params': { "update_day": yesterday_str, }, 'res_expected': [(True,)], }, # Test backfill_weekly_report_partitions 'weekly_report_partitions': { 'params': { "start_date": lastweek_str, "end_date": now_str, "table_name": 'raw_crashes', }, 'res_expected': [(True,)], }, # TODO: Update Backfill to support signature_summary backfill # through the API #'signature_summary_products': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_installations': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_uptime': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_os': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_process_type': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_architecture': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_flash_version': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_device': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, #'signature_summary_graphics': { # 'params': { # "update_day": yesterday_str, # }, # 'res_expected': [(True,)], #}, }
def test_get_list(self): now = self.now yesterday = now - datetime.timedelta(days=1) yesterday = datetimeutil.date_to_string(yesterday) report = Report(config=self.config) base_params = { 'signature': 'sig1', 'from_date': yesterday, 'to_date': now, } # Basic test res = report.get_list(**base_params) eq_(res['total'], 5) eq_(len(res['hits']), 5) duplicates_map = dict((x['uuid'], x['duplicate_of']) for x in res['hits'] if x['duplicate_of']) eq_(duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'], '60597bdc-5dbe-4409-6b38-4309c0130833') # Test with products and versions params = dict( base_params, products='WaterWolf', versions='WaterWolf:2.0', ) res = report.get_list(**params) eq_(res['total'], 1) # Test with os, build_ids and reasons params = dict( base_params, products='WaterWolf', versions=['WaterWolf:1.0', 'WaterWolf:3.0'], os='win', build_ids='20001212010203', reasons='STACK_OVERFLOW', ) res = report.get_list(**params) eq_(res['total'], 2) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }, { 'hangid': None, 'product': 'WaterWolf', 'os_name': 'Windows NT', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'plugin', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '3.0', 'build': '20001212010203', 'install_age': None, 'signature': 'sig1', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 2 } eq_(res, res_expected) # Test with a signature with strange characters params = dict( base_params, signature='this/is+a=C|signature', ) res = report.get_list(**params) eq_(res['total'], 1) res_expected = { 'hits': [{ 'hangid': None, 'product': 'WindBear', 'os_name': 'Linux', 'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837', 'cpu_info': None, 'url': None, 'last_crash': None, 'date_processed': yesterday, 'cpu_name': None, 'uptime': None, 'release_channel': 'Release', 'process_type': 'browser', 'os_version': None, 'reason': 'STACK_OVERFLOW', 'version': '1.0', 'build': '20001212010203', 'install_age': None, 'signature': 'this/is+a=C|signature', 'install_time': None, 'duplicate_of': None, 'address': None, 'user_comments': None }], 'total': 1 } eq_(res, res_expected) # Test plugins params = dict( base_params, report_process='plugin', plugin_in='filename', plugin_terms='NPSWF', plugin_search_mode='contains', ) res = report.get_list(**params) eq_(res['total'], 1) # Test plugins with 'starts_with' search mode params = dict( base_params, report_process='plugin', plugin_in='name', plugin_terms='Flash', plugin_search_mode='starts_with', ) res = report.get_list(**params) eq_(res['total'], 1)
def fix(configContext, logger, query, fixer): rows, last_date_processed = fetchOoids(configContext, logger, query) hbc = hbaseClient.HBaseConnectionForCrashReports(configContext.hbaseHost, configContext.hbasePort, configContext.hbaseTimeout, logger=logger) for row in rows: try: ooid, last_date_processed = row logger.info('fixing ooid: %s' % ooid) dump = hbc.get_dump(ooid) fname = '/dev/shm/%s.dump' % ooid with open(fname, 'wb') as orig_dump_file: orig_dump_file.write(dump) logger.debug('wrote dump file: %s' % fname) logger.debug('fixed dump file: %s' % fname) subprocess.check_call([fixer, fname]) logger.debug('fixer: %s' % fixer) with open(fname, 'rb') as fixed_dump_file: fixed_dump = fixed_dump_file.read() hbc.put_fixed_dump(ooid, fixed_dump, add_to_unprocessed_queue = True, submitted_timestamp = date_to_string(utc_now())) logger.debug('put fixed dump file into hbase: %s' % fname) os.unlink(fname) logger.debug('removed dump file: %s' % fname) except: socorrolib.lib.util.reportExceptionAndContinue(logger) return last_date_processed
def test_get(self): status = server_status.ServerStatus(config=self.config) date1 = datetime.datetime( self.now.year, self.now.month, self.now.day, 12, 00, 00, tzinfo=self.now.tzinfo ) date2 = date1 - datetime.timedelta(minutes=15) date3 = date2 - datetime.timedelta(minutes=15) date4 = date3 - datetime.timedelta(minutes=15) date1 = datetimeutil.date_to_string(date1) date2 = datetimeutil.date_to_string(date2) date3 = datetimeutil.date_to_string(date3) date4 = datetimeutil.date_to_string(date4) #...................................................................... # Test 1: default behavior res = status.get() res_expected = { "hits": [ { "id": 1, "date_recently_completed": date1, "date_oldest_job_queued": date1, "avg_process_sec": 2, "avg_wait_sec": 5, "waiting_job_count": 3, "processors_count": 2, "date_created": date1 }, { "id": 2, "date_recently_completed": date2, "date_oldest_job_queued": date2, "avg_process_sec": 3, "avg_wait_sec": 3.12, "waiting_job_count": 2, "processors_count": 2, "date_created": date2 }, { "id": 3, "date_recently_completed": date3, "date_oldest_job_queued": date3, "avg_process_sec": 1, "avg_wait_sec": 2, "waiting_job_count": 4, "processors_count": 1, "date_created": date3 }, { "id": 4, "date_recently_completed": None, "date_oldest_job_queued": None, "avg_process_sec": 1, "avg_wait_sec": 2, "waiting_job_count": 4, "processors_count": 1, "date_created": date4 } ], "socorro_revision": "42", "breakpad_revision": "43", "schema_revision": "aaaaaaaaaaaa", "total": 4 } eq_(res, res_expected) #...................................................................... # Test 2: with duration params = { "duration": 1 } res = status.get(**params) res_expected = { "hits": [ { "id": 1, "date_recently_completed": date1, "date_oldest_job_queued": date1, "avg_process_sec": 2, "avg_wait_sec": 5, "waiting_job_count": 3, "processors_count": 2, "date_created": date1 } ], "socorro_revision": "42", "breakpad_revision": "43", "schema_revision": "aaaaaaaaaaaa", "total": 1 } eq_(res, res_expected)
def test_get(self): signature_urls = SignatureURLs(config=self.config) now = self.now now = datetime.datetime(now.year, now.month, now.day) now_str = datetimeutil.date_to_string(now) #...................................................................... # Test 1: find one exact match for products and versions passed params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [ { "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 } ], "total": 1 } eq_(res, res_expected) #...................................................................... # Test 2: Raise error if parameter is not passed params = { "signature": "", "start_date": "", "end_date": now_str, "products": ['Firefox'], "versions": ["Firefox:10.0", "Firefox:11.0"] } assert_raises(MissingArgumentError, signature_urls.get, **params) #...................................................................... # Test 3: Query returning no results params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Fennec'], "versions": ["Fennec:10.0", "Fennec:11.0"] } res = signature_urls.get(**params) res_expected = { "hits": [], "total": 0 } eq_(res, res_expected) # Test 4: Return results for all version of Firefox params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [ { "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 } ], "total": 2 } eq_(res, res_expected) # Test 5: Return results for all products and versions params = { "signature": "EMPTY: no crashing thread identified; corrupt dump", "start_date": now_str, "end_date": now_str, "products": ['ALL'], "versions": ["ALL"] } res = signature_urls.get(**params) res_expected = { "hits": [ { "url": "http://deusex.wikia.com/wiki/Praxis_kit", "crash_count": 1 }, { "url": "http://wikipedia.org/Code_Rush", "crash_count": 1 }, { "url": "http://arewemobileyet.org/", "crash_count": 1 } ], "total": 3 } eq_(res, res_expected) # Test when we send incorrectly formatted 'versions' parameter params = { "signature": 'Does not exist', "start_date": now_str, "end_date": now_str, "products": ['Firefox'], "versions": ['27.0a1'] } assert_raises(BadArgumentError, signature_urls.get, **params)
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Require that the list of fields be passed. if not kwargs.get('_fields'): raise MissingArgumentError('_fields') self.all_fields = kwargs['_fields'] # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = [] histogram_intervals = {} for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): # By default, all param values are turned into lists, # even when they have and can have only one value. # For those we know there can only be one value, # so we just extract it from the made-up list. if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] if results_number > 1000: raise BadArgumentError( '_results_number', msg=('_results_number cannot be greater ' 'than 1,000')) if results_number < 0: raise BadArgumentError( '_results_number', msg='_results_number cannot be negative') elif param.name == '_facets_size': facets_size = param.value[0] for f in self.histogram_fields: if param.name == '_histogram_interval.%s' % f: histogram_intervals[f] = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = '%s.%s' % (field_data['namespace'], field_data['in_database_name']) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] # Operators needing wildcards, and the associated value # transformation with said wildcards. operator_wildcards = { '~': '*%s*', # contains '^': '%s*', # starts with '$': '*%s' # ends with } # Operators needing ranges, and the associated Elasticsearch # comparison operator. operator_range = { '>': 'gt', '<': 'lt', '>=': 'gte', '<=': 'lte', } args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or ' ' not in val: # There's only one term and no white space, this # is a simple term filter. filter_value = val else: # If the term contains white spaces, we want to # perform a phrase query. filter_type = 'query' args = Q( 'simple_query_string', query=param.value[0], fields=[name], default_operator='and', ).to_dict() else: # There are several terms, this is a terms filter. filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_range: filter_type = 'range' filter_value = { operator_range[param.operator]: param.value } elif param.operator == '__null__': filter_type = 'missing' args['field'] = name elif param.operator == '__true__': filter_type = 'term' filter_value = True elif param.operator == '@': filter_type = 'regexp' if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_wildcards: filter_type = 'query' # Wildcard operations are better applied to a non-analyzed # field (called "full") if there is one. if field_data['has_full_version']: name = '%s.full' % name q_args = {} q_args[name] = (operator_wildcards[param.operator] % param.value) query = Q('wildcard', **q_args) args = query.to_dict() if filter_value is not None: args[name] = filter_value if args: new_filter = F(filter_type, **args) if param.operator_not: new_filter = ~new_filter if sub_filters is None: sub_filters = new_filter elif filter_type == 'range': sub_filters &= new_filter else: sub_filters |= new_filter continue if sub_filters is not None: filters.append(sub_filters) search = search.filter(F('bool', must=filters)) # Restricting returned fields. fields = [] # We keep track of the requested columns in order to make sure we # return those column names and not aliases for example. self.request_columns = [] for param in params['_columns']: for value in param.value: if not value: continue self.request_columns.append(value) field_name = self.get_field_name(value, full=False) fields.append(field_name) search = search.fields(fields) # Sorting. sort_fields = [] for param in params['_sort']: for value in param.value: if not value: continue # Values starting with a '-' are sorted in descending order. # In order to retrieve the database name of the field, we # must first remove the '-' part and add it back later. # Example: given ['product', '-version'], the results will be # sorted by ascending product then descending version. desc = False if value.startswith('-'): desc = True value = value[1:] field_name = self.get_field_name(value) if desc: # The underlying library understands that '-' means # sorting in descending order. field_name = '-' + field_name sort_fields.append(field_name) search = search.sort(*sort_fields) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. if facets_size: self._create_aggregations(params, search, facets_size, histogram_intervals) # Query and compute results. hits = [] if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } errors = [] # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = getattr(results, 'aggregations', {}) if aggregations: aggregations = self.format_aggregations(aggregations) shards = getattr(results, '_shards', {}) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise errors.append({ 'type': 'missing_index', 'index': missing_index, }) if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} shards = None break
def setup_data(self): now = self.now.date() yesterday = now - datetime.timedelta(days=1) lastweek = now - datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) yesterday_str = datetimeutil.date_to_string(yesterday) lastweek_str = datetimeutil.date_to_string(lastweek) self.test_source_data = { # Test 1: find exact match for one product version and signature "products": { "params": { "versions": "Firefox:8.0", "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ {"product_name": "Firefox", "version_string": "8.0", "report_count": 1, "percentage": "50.000"}, {"product_name": "Firefox", "version_string": "9.0", "report_count": 1, "percentage": "50.000"}, ], }, # Test 2: find ALL matches for all product versions and signature "products_no_version": { "params": { "report_type": "products", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ {"product_name": "Firefox", "version_string": "8.0", "report_count": 1, "percentage": "50.000"}, {"product_name": "Firefox", "version_string": "9.0", "report_count": 1, "percentage": "50.000"}, ], }, # Test 3: find architectures reported for a given version and a # signature "architecture": { "params": { "versions": "Firefox:8.0", "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "amd64", "report_count": 1.0, "percentage": "100.000"}], }, # Test 4: find architectures reported for a signature with no # specific version. "architecture_no_version": { "params": { "report_type": "architecture", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "amd64", "report_count": 2, "percentage": "100.000"}], }, # Test 5: find flash_versions reported for specific version and # a signature "flash_versions": { "params": { "versions": "Firefox:8.0", "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "1.0", "report_count": 1.0, "percentage": "100.000"}], }, # Test 6: find flash_versions reported with a signature and without # a specific version "flash_versions_no_version": { "params": { "report_type": "flash_version", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "1.0", "report_count": 2.0, "percentage": "100.000"}], }, # Test 7: find installations reported for a signature "distinct_install": { "params": { "versions": "Firefox:8.0", "report_type": "distinct_install", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ {"product_name": "Firefox", "version_string": "8.0", "crashes": 10, "installations": 8} ], }, # Test 8: find os_version_strings reported for a signature "os": { "params": { "versions": "Firefox:8.0", "report_type": "os", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "Windows NT 6.4", "report_count": 1, "percentage": "100.000"}], }, # Test 9: find process_type reported for a signature "process_type": { "params": { "versions": "Firefox:8.0", "report_type": "process_type", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "plugin", "report_count": 1, "percentage": "100.000"}], }, # Test 10: find uptime reported for signature "uptime": { "params": { "versions": "Firefox:8.0", "report_type": "uptime", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [{"category": "15-30 minutes", "report_count": 1, "percentage": "100.000"}], }, # Test 11: find exploitability reported for signature "exploitability": { "params": { "versions": "Firefox:8.0", "report_type": "exploitability", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ { "low_count": 3, "high_count": 5, "null_count": 1, "none_count": 2, "report_date": yesterday_str, "medium_count": 4, } ], }, # Test 12: find mobile devices reported for signature with a # specific version "devices": { "params": { "versions": "Firefox:8.0", "report_type": "devices", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ { "cpu_abi": "armeabi-v7a", "manufacturer": "samsung", "model": "GT-P5100", "version": "16 (REL)", "report_count": 123, "percentage": "100.000", } ], }, # Test 13: find mobile devices reported for signature "devices_no_version": { "params": { "report_type": "devices", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ { "cpu_abi": "armeabi-v7a", "manufacturer": "samsung", "model": "GT-P5100", "version": "16 (REL)", "report_count": 246, "percentage": "100.000", } ], }, # Test 14: find mobile devices reported for signature "graphics": { "params": { "versions": "Firefox:8.0", "report_type": "graphics", "signature": "Fake Signature #1", "start_date": lastweek_str, "end_date": now_str, }, "res_expected": [ { "vendor_hex": "0x1234", "adapter_hex": "0x5678", "vendor_name": "Test Vendor", "adapter_name": "Test Adapter", "report_count": 123, "percentage": "100.000", } ], }, }
def get(self, **kwargs): """Return the current state of the server and the revisions of Socorro and Breakpad. """ filters = [ ("duration", 12, "int"), ] params = external_common.parse_arguments(filters, kwargs) # Find the recent server status sql = """ /* socorro.external.postgresql.server_status.ServerStatus.get */ SELECT id, date_recently_completed, date_oldest_job_queued, avg_process_sec, avg_wait_sec, waiting_job_count, processors_count, date_created FROM server_status ORDER BY date_created DESC LIMIT %(duration)s """ error_message = "Failed to retrieve server status data from PostgreSQL" results = self.query(sql, params, error_message=error_message) stats = [] for stat in results.zipped(): # Turn dates into strings for later JSON encoding for i in ("date_recently_completed", "date_oldest_job_queued", "date_created"): try: stat[i] = datetimeutil.date_to_string(stat[i]) except TypeError: pass stats.append(stat) # Find the current database version sql = """ /* socorro.external.postgresql.server_status.ServerStatus.get */ SELECT version_num FROM alembic_version """ error_message = "Failed to retrieve database version from PostgreSQL" results = self.query(sql, error_message=error_message) if results: schema_revision, = results[0] else: logger.warning("No version_num was found in table alembic_version") schema_revision = "Unknown" # Find the current breakpad and socorro revisions socorro_revision = resource_string('socorro', 'socorro_revision.txt') breakpad_revision = resource_string('socorro', 'breakpad_revision.txt') return { "hits": stats, "total": len(stats), "socorro_revision": socorro_revision, "breakpad_revision": breakpad_revision, "schema_revision": schema_revision, }
def test_get(self): products = Products(config=self.config) now = self.now.date() lastweek = now - datetime.timedelta(days=7) nextweek = now + datetime.timedelta(days=7) now_str = datetimeutil.date_to_string(now) lastweek_str = datetimeutil.date_to_string(lastweek) nextweek_str = datetimeutil.date_to_string(nextweek) #...................................................................... # Test 1: find one exact match for one product and one version params = { "versions": "Firefox:8.0" } res = products.get(**params) res_expected = { "hits": [ { "is_featured": False, "version": "8.0", "throttle": 10.0, "start_date": now_str, "end_date": now_str, "has_builds": False, "product": "Firefox", "build_type": "Release" } ], "total": 1 } # make sure the 'throttle' is a floating point number ok_(isinstance(res['hits'][0]['throttle'], float)) eq_( sorted(res['hits'][0]), sorted(res_expected['hits'][0]) ) #...................................................................... # Test 2: Find two different products with their correct verions params = { "versions": ["Firefox:8.0", "Thunderbird:10.0.2b"] } res = products.get(**params) res_expected = { "hits": [ { "product": "Firefox", "version": "8.0", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": True }, { "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": now_str, "is_featured": False, "build_type": "Release", "throttle": 10.0, "has_builds": False } ], "total": 2 } eq_( sorted(res['hits'][0]), sorted(res_expected['hits'][0]) ) #...................................................................... # Test 3: empty result, no products:version found params = { "versions": "Firefox:14.0" } res = products.get(**params) res_expected = { "hits": [], "total": 0 } eq_(res, res_expected) #...................................................................... # Test 4: Test products list is returned with no parameters params = {} res = products.get(**params) res_expected = { "products": ["Firefox", "Thunderbird", "Fennec"], "hits": { "Firefox": [ { "product": "Firefox", "version": "9.0", "start_date": now_str, "end_date": nextweek_str, "throttle": 100.00, "featured": True, "release": "Nightly", "has_builds": True }, { "product": "Firefox", "version": "8.0", "start_date": lastweek_str, "end_date": lastweek_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False } ], "Thunderbird": [ { "product": "Thunderbird", "version": "10.0.2b", "start_date": now_str, "end_date": nextweek_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False, } ], "Fennec": [ { "product": "Fennec", "version": "12.0b1", "start_date": now_str, "end_date": nextweek_str, "throttle": 100.00, "featured": False, "release": "Beta", "has_builds": True }, { "product": "Fennec", "version": "11.0.1", "start_date": now_str, "end_date": now_str, "throttle": 10.00, "featured": False, "release": "Release", "has_builds": False } ] }, "total": 5 } eq_(res['total'], res_expected['total']) eq_( sorted(res['products']), sorted(res_expected['products']) ) eq_(sorted(res['hits']), sorted(res_expected['hits'])) for product in sorted(res['hits'].keys()): eq_( sorted(res['hits'][product][0]), sorted(res_expected['hits'][product][0]) ) eq_(res['hits'][product], res_expected['hits'][product]) # test returned order of versions assert len(res['hits']['Fennec']) == 2 eq_(res['hits']['Fennec'][0]['version'], '12.0b1') eq_(res['hits']['Fennec'][1]['version'], '11.0.1') #...................................................................... # Test 5: An invalid versions list is passed, all versions are returned params = { 'versions': [1] } res = products.get(**params) eq_(res['total'], 5)
def get(self, **kwargs): """ Return product information, or version information for one or more product:version combinations """ warnings.warn( 'This class is deprecated. Use ProductVersions instead.', DeprecationWarning ) filters = [ ("versions", None, ["list", "str"]), # for legacy, to be removed ] params = external_common.parse_arguments(filters, kwargs) if params.versions and params.versions[0]: return self._get_versions(params) sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name AS product, version_string AS version, start_date, end_date, throttle, is_featured AS featured, build_type AS release, has_builds FROM product_info ORDER BY product_sort, version_sort DESC, channel_sort """ error_message = "Failed to retrieve products/versions from PostgreSQL" results = self.query(sql, error_message=error_message) products = [] versions_per_product = {} for version in results.zipped(): try: version['end_date'] = datetimeutil.date_to_string( version['end_date'] ) except TypeError: pass try: version['start_date'] = datetimeutil.date_to_string( version['start_date'] ) except TypeError: pass version['throttle'] = float(version['throttle']) product = version['product'] if product not in products: products.append(product) if product not in versions_per_product: versions_per_product[product] = [version] else: versions_per_product[product].append(version) return { 'products': products, 'hits': versions_per_product, 'total': len(results) }
def test_get_comments(self): crashes = Crashes(config=self.config) today = datetimeutil.date_to_string(self.now) # Test 1: results params = { "signature": "js", } res_expected = { "hits": [ { "email": None, "date_processed": today, "uuid": "def", "user_comments": "hello" }, { "email": None, "date_processed": today, "uuid": "hij", "user_comments": "hah" } ], "total": 2 } res = crashes.get_comments(**params) eq_(res, res_expected) # Test 2: no results params = { "signature": "blah", } res_expected = { "hits": [], "total": 0 } res = crashes.get_comments(**params) eq_(res, res_expected) # Test 3: missing parameter assert_raises(MissingArgumentError, crashes.get_comments) # Test a valid rapid beta versions params = { "signature": "cool_sig", "products": "Firefox", "versions": "Firefox:14.0b", } res_expected = { 'hits': [ { 'email': None, 'date_processed': today, 'uuid': 'nop', 'user_comments': 'hi!' } ], 'total': 1 } res = crashes.get_comments(**params) eq_(res, res_expected) # Test an invalid rapid beta versions params = { "signature": "cool_sig", "versions": "WaterWolf:2.0b", } res_expected = { 'hits': [ { 'email': None, 'date_processed': today, 'uuid': 'qrs', 'user_comments': 'meow' } ], 'total': 1 } res = crashes.get_comments(**params) eq_(res, res_expected) # use pagination params = { "signature": "cool_sig", "result_number": 1, "result_offset": 0, } params['result_number'] = 1 params['result_offset'] = 0 res = crashes.get_comments(**params) eq_(len(res['hits']), 1) eq_(res['total'], 2)
def _get_versions(self, params): """ Return product information for one or more product:version combinations """ products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) sql_select = """ SELECT product_name as product, version_string as version, start_date, end_date, is_featured, build_type, throttle::float, has_builds FROM product_info """ sql_where = [] versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) sql_where = [ "(product_name = %(product" + str(x) + ")s AND version_string = %(version" + str(x) + ")s)" for x in range(len(products_list)) ] sql_params = {} sql_params = add_param_to_dict(sql_params, "product", products_list) sql_params = add_param_to_dict(sql_params, "version", versions_list) if len(sql_where) > 0: sql_query = " WHERE ".join((sql_select, " OR ".join(sql_where))) else: sql_query = sql_select sql_query = """ /* socorro.external.postgresql.Products.get_versions */ %s """ % sql_query error_message = "Failed to retrieve products versions from PostgreSQL" results = self.query(sql_query, sql_params, error_message=error_message) products = [] for product in results.zipped(): product['start_date'] = datetimeutil.date_to_string( product['start_date'] ) product['end_date'] = datetimeutil.date_to_string( product['end_date'] ) products.append(product) return { "hits": products, "total": len(products) }
def get_daily(self, **kwargs): """Return crashes by active daily users. """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("versions", None, ["list", "str"]), ("from_date", lastweek, "date"), ("to_date", now, "date"), ("os", None, ["list", "str"]), ("report_type", None, ["list", "str"]), ("date_range_type", "date", "str"), ] # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = external_common.parse_arguments(filters, kwargs) if not params.product: raise MissingArgumentError('product') if not params.versions or not params.versions[0]: raise MissingArgumentError('versions') params.versions = tuple(params.versions) # simple version, for home page graphs mainly if ((not params.os or not params.os[0]) and (not params.report_type or not params.report_type[0])): if params.date_range_type == "build": table_to_use = "home_page_graph_build_view" date_range_field = "build_date" else: table_to_use = "home_page_graph_view" date_range_field = "report_date" db_fields = ("product_name", "version_string", date_range_field, "report_count", "adu", "crash_hadu") out_fields = ("product", "version", "date", "report_count", "adu", "crash_hadu") sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s """ % {"db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use} # complex version, for daily crashes page mainly else: if params.date_range_type == "build": table_to_use = "crashes_by_user_build_view" date_range_field = "build_date" else: table_to_use = "crashes_by_user_view" date_range_field = "report_date" db_fields = [ "product_name", "version_string", date_range_field, "sum(adjusted_report_count)::bigint as report_count", "sum(adu)::bigint as adu", """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint, avg(throttle)) as crash_hadu""", "avg(throttle) as throttle" ] out_fields = ["product", "version", "date", "report_count", "adu", "crash_hadu", "throttle"] db_group = ["product_name", "version_string", date_range_field] sql_where = [] if params.os and params.os[0]: sql_where.append("os_short_name IN %(os)s") params.os = tuple(x[0:3].lower() for x in params.os) if params.report_type and params.report_type[0]: sql_where.append("crash_type_short IN %(report_type)s") params.report_type = tuple(params.report_type) if sql_where: sql_where = "AND %s" % " AND ".join(sql_where) else: sql_where = '' sql = """ /* socorro.external.postgresql.crashes.Crashes.get_daily */ SELECT %(db_fields)s FROM ( SELECT product_name, version_string, %(date_range_field)s, os_name, os_short_name, SUM(report_count)::int as report_count, SUM(adjusted_report_count)::int as adjusted_report_count, MAX(adu) as adu, AVG(throttle) as throttle FROM %(table_to_use)s WHERE product_name=%%(product)s AND version_string IN %%(versions)s AND %(date_range_field)s BETWEEN %%(from_date)s AND %%(to_date)s %(sql_where)s GROUP BY product_name, version_string, %(date_range_field)s, os_name, os_short_name ) as aggregated_crashes_by_user """ % {"db_fields": ", ".join(db_fields), "date_range_field": date_range_field, "table_to_use": table_to_use, "sql_where": sql_where} if db_group: sql = "%s GROUP BY %s" % (sql, ", ".join(db_group)) error_message = "Failed to retrieve daily crashes data from PostgreSQL" results = self.query(sql, params, error_message=error_message) hits = {} for row in results: daily_data = dict(zip(out_fields, row)) if "throttle" in daily_data: daily_data["throttle"] = float(daily_data["throttle"]) daily_data["crash_hadu"] = float(daily_data["crash_hadu"]) daily_data["date"] = datetimeutil.date_to_string( daily_data["date"] ) key = "%s:%s" % (daily_data["product"], daily_data["version"]) if "os_short" in daily_data: del daily_data["os_short"] if key not in hits: hits[key] = {} hits[key][daily_data["date"]] = daily_data return {"hits": hits}
def test_date_to_string_fail(): datetimeutil.date_to_string('2012-01-03')
def get(self, **kwargs): """ Return product information, or version information for one or more product:version combinations """ warnings.warn('This class is deprecated. Use ProductVersions instead.', DeprecationWarning) filters = [ ("versions", None, ["list", "str"]), # for legacy, to be removed ] params = external_common.parse_arguments(filters, kwargs) if params.versions and params.versions[0]: return self._get_versions(params) sql = """ /* socorro.external.postgresql.products.Products.get */ SELECT product_name AS product, version_string AS version, start_date, end_date, throttle, is_featured AS featured, build_type AS release, has_builds FROM product_info ORDER BY product_sort, version_sort DESC, channel_sort """ error_message = "Failed to retrieve products/versions from PostgreSQL" results = self.query(sql, error_message=error_message) products = [] versions_per_product = {} for version in results.zipped(): try: version['end_date'] = datetimeutil.date_to_string( version['end_date']) except TypeError: pass try: version['start_date'] = datetimeutil.date_to_string( version['start_date']) except TypeError: pass version['throttle'] = float(version['throttle']) product = version['product'] if product not in products: products.append(product) if product not in versions_per_product: versions_per_product[product] = [version] else: versions_per_product[product].append(version) return { 'products': products, 'hits': versions_per_product, 'total': len(results) }
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Require that the list of fields be passed. if not kwargs.get('_fields'): raise MissingArgumentError('_fields') self.all_fields = kwargs['_fields'] # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = [] histogram_intervals = {} for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): # By default, all param values are turned into lists, # even when they have and can have only one value. # For those we know there can only be one value, # so we just extract it from the made-up list. if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] if results_number > 1000: raise BadArgumentError( '_results_number', msg=( '_results_number cannot be greater ' 'than 1,000' ) ) if results_number < 0: raise BadArgumentError( '_results_number', msg='_results_number cannot be negative' ) elif param.name == '_facets_size': facets_size = param.value[0] for f in self.histogram_fields: if param.name == '_histogram_interval.%s' % f: histogram_intervals[f] = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = '%s.%s' % ( field_data['namespace'], field_data['in_database_name'] ) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] # Operators needing wildcards, and the associated value # transformation with said wildcards. operator_wildcards = { '~': '*%s*', # contains '^': '%s*', # starts with '$': '*%s' # ends with } # Operators needing ranges, and the associated Elasticsearch # comparison operator. operator_range = { '>': 'gt', '<': 'lt', '>=': 'gte', '<=': 'lte', } args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or ' ' not in val: # There's only one term and no white space, this # is a simple term filter. filter_value = val else: # If the term contains white spaces, we want to # perform a phrase query. filter_type = 'query' args = Q( 'simple_query_string', query=param.value[0], fields=[name], default_operator='and', ).to_dict() else: # There are several terms, this is a terms filter. filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_range: filter_type = 'range' filter_value = { operator_range[param.operator]: param.value } elif param.operator == '__null__': filter_type = 'missing' args['field'] = name elif param.operator == '__true__': filter_type = 'term' filter_value = True elif param.operator == '@': filter_type = 'regexp' if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_wildcards: filter_type = 'query' # Wildcard operations are better applied to a non-analyzed # field (called "full") if there is one. if field_data['has_full_version']: name = '%s.full' % name q_args = {} q_args[name] = ( operator_wildcards[param.operator] % param.value ) query = Q('wildcard', **q_args) args = query.to_dict() if filter_value is not None: args[name] = filter_value if args: new_filter = F(filter_type, **args) if param.operator_not: new_filter = ~new_filter if sub_filters is None: sub_filters = new_filter elif filter_type == 'range': sub_filters &= new_filter else: sub_filters |= new_filter continue if sub_filters is not None: filters.append(sub_filters) search = search.filter(F('bool', must=filters)) # Restricting returned fields. fields = [] # We keep track of the requested columns in order to make sure we # return those column names and not aliases for example. self.request_columns = [] for param in params['_columns']: for value in param.value: if not value: continue self.request_columns.append(value) field_name = self.get_field_name(value, full=False) fields.append(field_name) search = search.fields(fields) # Sorting. sort_fields = [] for param in params['_sort']: for value in param.value: if not value: continue # Values starting with a '-' are sorted in descending order. # In order to retrieve the database name of the field, we # must first remove the '-' part and add it back later. # Example: given ['product', '-version'], the results will be # sorted by ascending product then descending version. desc = False if value.startswith('-'): desc = True value = value[1:] field_name = self.get_field_name(value) if desc: # The underlying library understands that '-' means # sorting in descending order. field_name = '-' + field_name sort_fields.append(field_name) search = search.sort(*sort_fields) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. for param in params['_facets']: self._add_second_level_aggs( param, search.aggs, facets_size, histogram_intervals, ) # Create sub-aggregations. for key in params: if not key.startswith('_aggs.'): continue fields = key.split('.')[1:] if fields[0] not in self.all_fields: continue base_bucket = self._get_fields_agg(fields[0], facets_size) sub_bucket = base_bucket for field in fields[1:]: # For each field, make a bucket, then include that bucket in # the latest one, and then make that new bucket the latest. if field in self.all_fields: tmp_bucket = self._get_fields_agg(field, facets_size) sub_bucket.bucket(field, tmp_bucket) sub_bucket = tmp_bucket for value in params[key]: self._add_second_level_aggs( value, sub_bucket, facets_size, histogram_intervals, ) search.aggs.bucket(fields[0], base_bucket) # Create histograms. for f in self.histogram_fields: key = '_histogram.%s' % f if params.get(key): histogram_bucket = self._get_histogram_agg( f, histogram_intervals ) for param in params[key]: self._add_second_level_aggs( param, histogram_bucket, facets_size, histogram_intervals, ) search.aggs.bucket('histogram_%s' % f, histogram_bucket) # Query and compute results. hits = [] if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = self.format_aggregations(results.aggregations) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} break