def assert_supersearch_no_errors(): """Make sure an uncached SuperSearch query doesn't have any errors""" supersearch = SuperSearch() # We don't want any caching this time supersearch.cache_seconds = 0 results = supersearch.get( product=settings.DEFAULT_PRODUCT, _results_number=1, _columns=['uuid'], _facets_size=1, ) assert not results['errors'], results['errors']
def get(self, **kwargs): form = forms.NewSignaturesForm(kwargs) if not form.is_valid(): return http.JsonResponse({"errors": form.errors}, status=400) start_date = form.cleaned_data["start_date"] end_date = form.cleaned_data["end_date"] not_after = form.cleaned_data["not_after"] product = form.cleaned_data[ "product"] or productlib.get_default_product().name # Make default values for all dates parameters. if not end_date: end_date = datetime.datetime.utcnow().date() + datetime.timedelta( days=1) if not start_date: start_date = end_date - datetime.timedelta(days=8) if not not_after: not_after = start_date - datetime.timedelta(days=14) api = SuperSearch() signatures_number = 100 # First let's get a list of the top signatures that appeared during # the period we are interested in. params = { "product": product, "version": form.cleaned_data["version"], "date": [">=" + start_date.isoformat(), "<" + end_date.isoformat()], "_facets": "signature", "_facets_size": signatures_number, "_results_number": 0, } data = api.get(**params) signatures = [] for signature in data["facets"]["signature"]: signatures.append(signature["term"]) # Now we want to verify whether those signatures appeared or not during # some previous period of time. params["date"] = [ ">=" + not_after.isoformat(), "<" + start_date.isoformat() ] # Filter exactly the signatures that we have. params["signature"] = ["=" + x for x in signatures] data = api.get(**params) # If any of those signatures is in the results, it's that it did not # appear during the period of time we are interested in. Let's # remove it from the list of new signatures. for signature in data["facets"]["signature"]: if signature["term"] in signatures: signatures.remove(signature["term"]) # All remaining signatures are "new" ones. return {"hits": signatures, "total": len(signatures)}
def graphics_report(request): """Return a CSV output of all crashes for a specific date for a particular day and a particular product.""" if (not request.user.is_active or not request.user.has_perm('crashstats.run_long_queries')): return http.HttpResponseForbidden( "You must have the 'Run long queries' permission") form = forms.GraphicsReportForm(request.GET, ) if not form.is_valid(): return http.HttpResponseBadRequest(str(form.errors)) batch_size = 1000 product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT date = form.cleaned_data['date'] params = { 'product': product, 'date': [ '>={}'.format(date.strftime('%Y-%m-%d')), '<{}'.format( (date + datetime.timedelta(days=1)).strftime('%Y-%m-%d')) ], '_columns': ( 'signature', 'uuid', 'date', 'product', 'version', 'build_id', 'platform', 'platform_version', 'cpu_name', 'cpu_info', 'address', 'uptime', 'topmost_filenames', 'reason', 'app_notes', 'release_channel', ), '_results_number': batch_size, '_results_offset': 0, } api = SuperSearch() # Do the first query. That'll give us the total and the first page's # worth of crashes. data = api.get(**params) assert 'hits' in data accept_gzip = 'gzip' in request.META.get('HTTP_ACCEPT_ENCODING', '') response = http.HttpResponse(content_type='text/csv') out = BytesIO() writer = utils.UnicodeWriter(out, delimiter='\t') writer.writerow(GRAPHICS_REPORT_HEADER) pages = data['total'] // batch_size # if there is a remainder, add one more page if data['total'] % batch_size: pages += 1 alias = { 'crash_id': 'uuid', 'os_name': 'platform', 'os_version': 'platform_version', 'date_processed': 'date', 'build': 'build_id', 'uptime_seconds': 'uptime', } # Make sure that we don't have an alias for a header we don't need alias_excess = set(alias.keys()) - set(GRAPHICS_REPORT_HEADER) if alias_excess: raise ValueError('Not all keys in the map of aliases are in ' 'the header ({!r})'.format(alias_excess)) def get_value(row, key): """Return the appropriate output from the row of data, one key at a time. The output is what's used in writing the CSV file. The reason for doing these "hacks" is to match what used to be done with the SELECT statement in SQL in the ancient, but now replaced, report. """ value = row.get(alias.get(key, key)) if key == 'cpu_info': value = '{cpu_name} | {cpu_info}'.format( cpu_name=row.get('cpu_name', ''), cpu_info=row.get('cpu_info', ''), ) if value is None: return '' if key == 'date_processed': value = timezone.make_aware( datetime.datetime.strptime( value.split('.')[0], '%Y-%m-%dT%H:%M:%S')) value = value.strftime('%Y%m%d%H%M') if key == 'uptime_seconds' and value == 0: value = '' return value for page in range(pages): if page > 0: params['_results_offset'] = batch_size * page data = api.get(**params) for row in data['hits']: # Each row is a dict, we want to turn it into a list of # exact order as the `header` tuple above. # However, because the csv writer module doesn't "understand" # python's None, we'll replace those with '' to make the # CSV not have the word 'None' where the data is None. writer.writerow( [get_value(row, x) for x in GRAPHICS_REPORT_HEADER]) payload = out.getvalue() if accept_gzip: zbuffer = BytesIO() zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuffer) zfile.write(payload) zfile.close() compressed_payload = zbuffer.getvalue() response.write(compressed_payload) response['Content-Length'] = len(compressed_payload) response['Content-Encoding'] = 'gzip' else: response.write(payload) response['Content-Length'] = len(payload) return response
def get(self, **kwargs): form = forms.NewSignaturesForm(kwargs) if not form.is_valid(): return http.JsonResponse({'errors': form.errors}, status=400) start_date = form.cleaned_data['start_date'] end_date = form.cleaned_data['end_date'] not_after = form.cleaned_data['not_after'] product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT # Make default values for all dates parameters. if not end_date: end_date = (datetime.datetime.utcnow().date() + datetime.timedelta(days=1)) if not start_date: start_date = end_date - datetime.timedelta(days=8) if not not_after: not_after = start_date - datetime.timedelta(days=14) api = SuperSearch() signatures_number = 100 # First let's get a list of the top signatures that appeared during # the period we are interested in. params = { 'product': product, 'version': form.cleaned_data['version'], 'date': [ '>=' + start_date.isoformat(), '<' + end_date.isoformat(), ], '_facets': 'signature', '_facets_size': signatures_number, '_results_number': 0, } data = api.get(**params) signatures = [] for signature in data['facets']['signature']: signatures.append(signature['term']) # Now we want to verify whether those signatures appeared or not during # some previous period of time. params['date'] = [ '>=' + not_after.isoformat(), '<' + start_date.isoformat(), ] # Filter exactly the signatures that we have. params['signature'] = ['=' + x for x in signatures] data = api.get(**params) # If any of those signatures is in the results, it's that it did not # appear during the period of time we are interested in. Let's # remove it from the list of new signatures. for signature in data['facets']['signature']: if signature['term'] in signatures: signatures.remove(signature['term']) # All remaining signatures are "new" ones. return {'hits': signatures, 'total': len(signatures)}
def get(self, **kwargs): form = forms.NewSignaturesForm(kwargs) if not form.is_valid(): return http.JsonResponse({ 'errors': form.errors }, status=400) start_date = form.cleaned_data['start_date'] end_date = form.cleaned_data['end_date'] not_after = form.cleaned_data['not_after'] product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT # Make default values for all dates parameters. if not end_date: end_date = ( datetime.datetime.utcnow().date() + datetime.timedelta(days=1) ) if not start_date: start_date = end_date - datetime.timedelta(days=8) if not not_after: not_after = start_date - datetime.timedelta(days=14) api = SuperSearch() signatures_number = 100 # First let's get a list of the top signatures that appeared during # the period we are interested in. params = { 'product': product, 'version': form.cleaned_data['version'], 'date': [ '>=' + start_date.isoformat(), '<' + end_date.isoformat(), ], '_facets': 'signature', '_facets_size': signatures_number, '_results_number': 0, } data = api.get(**params) signatures = [] for signature in data['facets']['signature']: signatures.append(signature['term']) # Now we want to verify whether those signatures appeared or not during # some previous period of time. params['date'] = [ '>=' + not_after.isoformat(), '<' + start_date.isoformat(), ] # Filter exactly the signatures that we have. params['signature'] = ['=' + x for x in signatures] data = api.get(**params) # If any of those signatures is in the results, it's that it did not # appear during the period of time we are interested in. Let's # remove it from the list of new signatures. for signature in data['facets']['signature']: if signature['term'] in signatures: signatures.remove(signature['term']) # All remaining signatures are "new" ones. return { 'hits': signatures, 'total': len(signatures) }
def handle(self, **options): start_datetime = options.get("last_success") end_datetime = options.get("run_time") if end_datetime: end_datetime = parse_datetime(end_datetime) else: end_datetime = timezone.now() if start_datetime: start_datetime = parse_datetime(start_datetime) # When run via cronrun, start_datetime is based on the last success # and we want to increase the window by 10 minutes to get some # overlap with the previous run start_datetime = start_datetime - datetime.timedelta(minutes=10) else: # Default to end_datetime - 90 minutes start_datetime = end_datetime - datetime.timedelta(minutes=90) # Truncate seconds and microseconds start_datetime = start_datetime.replace(second=0, microsecond=0) end_datetime = end_datetime.replace(second=0, microsecond=0) if not end_datetime > start_datetime: raise CommandError("start time must be before end time.") # Do a super search and get the signature, buildid, and date processed for # every crash in the range all_fields = SuperSearchFieldsData().get() api = SuperSearch() self.stdout.write("Looking at %s to %s" % (start_datetime, end_datetime)) params = { "date": [ f">={start_datetime.isoformat()}", f"<{end_datetime.isoformat()}", ], "_columns": ["signature", "build_id", "date"], "_facets_size": 0, "_fields": all_fields, # Set up first page "_results_offset": 0, "_results_number": MAX_PAGE, } results = {} crashids_count = 0 while True: resp = api.get(**params) hits = resp["hits"] for hit in hits: crashids_count += 1 if not hit["build_id"]: # Not all crashes have a build id, so skip the ones that don't. continue if hit["signature"] in results: data = results[hit["signature"]] data["build_id"] = min(data["build_id"], hit["build_id"]) data["date"] = min(data["date"], hit["date"]) else: data = { "signature": hit["signature"], "build_id": hit["build_id"], "date": hit["date"], } results[hit["signature"]] = data # If there are no more crash ids to get, we return total = resp["total"] if not hits or crashids_count >= total: break # Get the next page, but only as many results as we need params["_results_offset"] += MAX_PAGE params["_results_number"] = min( # MAX_PAGE is the maximum we can request MAX_PAGE, # The number of results Super Search can return to us that is hasn't returned so far total - crashids_count, ) signature_data = results.values() # Save signature data to the db for item in signature_data: if options["dry_run"]: self.stdout.write( "Inserting/updating signature (%s, %s, %s)" % (item["signature"], item["date"], item["build_id"]) ) else: self.update_crashstats_signature( signature=item["signature"], report_date=item["date"], report_build=item["build_id"], ) self.stdout.write("Inserted/updated %d signatures." % len(signature_data))
def handle(self, **options): start_datetime = options.get('last_success') end_datetime = options.get('run_time') if end_datetime: end_datetime = parse_datetime(end_datetime) else: end_datetime = timezone.now() if start_datetime: start_datetime = parse_datetime(start_datetime) # When run via cronrun, start_datetime is based on the last success # and we want to increase the window by 10 minutes to get some # overlap with the previous run start_datetime = start_datetime - datetime.timedelta(minutes=10) else: # Default to end_datetime - 90 minutes start_datetime = end_datetime - datetime.timedelta(minutes=90) # Truncate seconds and microseconds start_datetime = start_datetime.replace(second=0, microsecond=0) end_datetime = end_datetime.replace(second=0, microsecond=0) if not end_datetime > start_datetime: raise CommandError('start time must be before end time.') # Do a super search and get the signature, buildid, and date processed for # every crash in the range all_fields = SuperSearchFieldsData().get() api = SuperSearch() self.stdout.write('Looking at %s to %s' % (start_datetime, end_datetime)) params = { 'date': [ '>={}'.format(start_datetime.isoformat()), '<{}'.format(end_datetime.isoformat()), ], '_columns': ['signature', 'build_id', 'date'], '_facets_size': 0, '_fields': all_fields, # Set up first page '_results_offset': 0, '_results_number': MAX_PAGE, } results = {} crashids_count = 0 while True: resp = api.get(**params) hits = resp['hits'] for hit in hits: crashids_count += 1 if not hit['build_id']: # Not all crashes have a build id, so skip the ones that don't. continue if hit['signature'] in results: data = results[hit['signature']] data['build_id'] = min(data['build_id'], hit['build_id']) data['date'] = min(data['date'], hit['date']) else: data = { 'signature': hit['signature'], 'build_id': hit['build_id'], 'date': hit['date'] } results[hit['signature']] = data # If there are no more crash ids to get, we return total = resp['total'] if not hits or crashids_count >= total: break # Get the next page, but only as many results as we need params['_results_offset'] += MAX_PAGE params['_results_number'] = min( # MAX_PAGE is the maximum we can request MAX_PAGE, # The number of results Super Search can return to us that is hasn't returned so far total - crashids_count ) signature_data = results.values() # Save signature data to the db for item in signature_data: if options['dry_run']: self.stdout.write( 'Inserting/updating signature (%s, %s, %s)' % (item['signature'], item['date'], item['build_id']) ) else: self.update_crashstats_signature( signature=item['signature'], report_date=item['date'], report_build=item['build_id'], ) self.stdout.write('Inserted/updated %d signatures.' % len(signature_data))
def graphics_report(request): """Return a CSV output of all crashes for a specific date for a particular day and a particular product.""" if ( not request.user.is_active or not request.user.has_perm('crashstats.run_long_queries') ): return http.HttpResponseForbidden( "You must have the 'Run long queries' permission" ) form = forms.GraphicsReportForm( request.GET, ) if not form.is_valid(): return http.HttpResponseBadRequest(str(form.errors)) batch_size = 1000 product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT date = form.cleaned_data['date'] params = { 'product': product, 'date': [ '>={}'.format(date.strftime('%Y-%m-%d')), '<{}'.format( (date + datetime.timedelta(days=1)).strftime('%Y-%m-%d') ) ], '_columns': ( 'signature', 'uuid', 'date', 'product', 'version', 'build_id', 'platform', 'platform_version', 'cpu_name', 'cpu_info', 'address', 'uptime', 'topmost_filenames', 'reason', 'app_notes', 'release_channel', ), '_results_number': batch_size, '_results_offset': 0, } api = SuperSearch() # Do the first query. That'll give us the total and the first page's # worth of crashes. data = api.get(**params) assert 'hits' in data accept_gzip = 'gzip' in request.META.get('HTTP_ACCEPT_ENCODING', '') response = http.HttpResponse(content_type='text/csv') out = BytesIO() writer = utils.UnicodeWriter(out, delimiter='\t') writer.writerow(GRAPHICS_REPORT_HEADER) pages = data['total'] // batch_size # if there is a remainder, add one more page if data['total'] % batch_size: pages += 1 alias = { 'crash_id': 'uuid', 'os_name': 'platform', 'os_version': 'platform_version', 'date_processed': 'date', 'build': 'build_id', 'uptime_seconds': 'uptime', } # Make sure that we don't have an alias for a header we don't need alias_excess = set(alias.keys()) - set(GRAPHICS_REPORT_HEADER) if alias_excess: raise ValueError( 'Not all keys in the map of aliases are in ' 'the header ({!r})'.format(alias_excess) ) def get_value(row, key): """Return the appropriate output from the row of data, one key at a time. The output is what's used in writing the CSV file. The reason for doing these "hacks" is to match what used to be done with the SELECT statement in SQL in the ancient, but now replaced, report. """ value = row.get(alias.get(key, key)) if key == 'cpu_info': value = '{cpu_name} | {cpu_info}'.format( cpu_name=row.get('cpu_name', ''), cpu_info=row.get('cpu_info', ''), ) if value is None: return '' if key == 'date_processed': value = timezone.make_aware(datetime.datetime.strptime( value.split('.')[0], '%Y-%m-%dT%H:%M:%S' )) value = value.strftime('%Y%m%d%H%M') if key == 'uptime_seconds' and value == 0: value = '' return value for page in range(pages): if page > 0: params['_results_offset'] = batch_size * page data = api.get(**params) for row in data['hits']: # Each row is a dict, we want to turn it into a list of # exact order as the `header` tuple above. # However, because the csv writer module doesn't "understand" # python's None, we'll replace those with '' to make the # CSV not have the word 'None' where the data is None. writer.writerow([ get_value(row, x) for x in GRAPHICS_REPORT_HEADER ]) payload = out.getvalue() if accept_gzip: zbuffer = BytesIO() zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuffer) zfile.write(payload) zfile.close() compressed_payload = zbuffer.getvalue() response.write(compressed_payload) response['Content-Length'] = len(compressed_payload) response['Content-Encoding'] = 'gzip' else: response.write(payload) response['Content-Length'] = len(payload) return response