def _iterator(self, start_date): self.stdout.write("Working on %s to now" % start_date) # Fetch all the bugs that have been created or had the crash_signature # field changed since start_date payload = BUG_QUERY_PARAMS.copy() payload["chfieldfrom"] = start_date # Use a 30-second timeout because Bugzilla is slow sometimes session = session_with_retries(default_timeout=30.0) headers = {} if settings.BZAPI_TOKEN: headers["X-BUGZILLA-API-KEY"] = settings.BZAPI_TOKEN self.stdout.write( "using BZAPI_TOKEN (%s)" % (settings.BZAPI_TOKEN[:-8] + "xxxxxxxx") ) else: self.stdout.write("Warning: No BZAPI_TOKEN specified!") r = session.get( settings.BZAPI_BASE_URL + "/bug", headers=headers, params=payload ) if r.status_code < 200 or r.status_code >= 300: r.raise_for_status() results = r.json() # Yield each one as a (bug_id, set of signatures) for report in results["bugs"]: yield ( int(report["id"]), find_signatures(report.get("cf_crash_signature", "")), )
def __init__(self, config): super().__init__(config) self.cache = ExpiringCache(max_size=self.CACHE_MAX_SIZE, default_ttl=self.SHORT_CACHE_TTL) self.metrics = markus.get_metrics('processor.betaversionrule') # For looking up version strings self.version_string_api = config.version_string_api self.session = session_with_retries()
def __init__(self, config): super().__init__(config) self.cache = ExpiringCache(max_size=self.CACHE_MAX_SIZE, default_ttl=self.SHORT_CACHE_TTL) self.metrics = markus.get_metrics('processor.betaversionrule') # For looking up version strings self.version_string_api = config.version_string_api self.session = session_with_retries()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.database = self.config.database_class(self.config) # NOTE(willkg): If archive.mozilla.org is timing out after 5 seconds, # then it has issues and we should try again some other time self.session = session_with_retries(default_timeout=5.0) self.successful_inserts = 0
def main(argv=None): parser = argparse.ArgumentParser(formatter_class=WrappedTextHelpFormatter, description=DESCRIPTION.strip()) parser.add_argument( "--sleep", help="how long in seconds to sleep before submitting the next group", type=int, default=SLEEP_DEFAULT, ) parser.add_argument("--host", help="host for system to reprocess in", default=DEFAULT_HOST) parser.add_argument( "crashid", help="one or more crash ids to fetch data for", nargs="*", action=FallbackToPipeAction, ) if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) api_token = os.environ.get("SOCORRO_REPROCESS_API_TOKEN") if not api_token: print("You need to set SOCORRO_REPROCESS_API_TOKEN in the environment") return 1 url = args.host.rstrip("/") + "/api/Reprocessing/" print("Sending reprocessing requests to: %s" % url) session = session_with_retries() crash_ids = args.crashid print("Reprocessing %s crashes sleeping %s seconds between groups..." % (len(crash_ids), args.sleep)) groups = list(chunked(crash_ids, CHUNK_SIZE)) for i, group in enumerate(groups): print("Processing group ending with %s ... (%s/%s)" % (group[-1], i + 1, len(groups))) resp = session.post(url, data={"crash_ids": group}, headers={"Auth-Token": api_token}) if resp.status_code != 200: print("Got back non-200 status code: %s %s" % (resp.status_code, resp.content)) continue # NOTE(willkg): We sleep here because the webapp has a bunch of rate limiting and we don't # want to trigger that. It'd be nice if we didn't have to do this. time.sleep(args.sleep) print("Done!")
def main(argv=None): parser = argparse.ArgumentParser( formatter_class=WrappedTextHelpFormatter, description=DESCRIPTION.strip(), ) parser.add_argument( '--sleep', help='how long in seconds to sleep before submitting the next group', type=int, default=SLEEP_DEFAULT ) parser.add_argument('--host', help='host for system to reprocess in', default=DEFAULT_HOST) parser.add_argument('crashid', help='one or more crash ids to fetch data for', nargs='*', action=FallbackToPipeAction) if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) api_token = os.environ.get('SOCORRO_REPROCESS_API_TOKEN') if not api_token: print('You need to set SOCORRO_REPROCESS_API_TOKEN in the environment') return 1 url = args.host.rstrip('/') + '/api/Reprocessing/' print('Sending reprocessing requests to: %s' % url) session = session_with_retries() crash_ids = args.crashid print('Reprocessing %s crashes sleeping %s seconds between groups...' % ( len(crash_ids), args.sleep )) groups = list(chunked(crash_ids, CHUNK_SIZE)) for i, group in enumerate(groups): print('Processing group ending with %s ... (%s/%s)' % (group[-1], i + 1, len(groups))) resp = session.post( url, data={'crash_ids': group}, headers={ 'Auth-Token': api_token } ) if resp.status_code != 200: print('Got back non-200 status code: %s %s' % (resp.status_code, resp.content)) continue # NOTE(willkg): We sleep here because the webapp has a bunch of rate limiting and we don't # want to trigger that. It'd be nice if we didn't have to do this. time.sleep(args.sleep) print('Done!')
def _iterator(self, from_date): payload = BUGZILLA_PARAMS.copy() payload['chfieldfrom'] = from_date session = session_with_retries() r = session.get(BUGZILLA_BASE_URL, params=payload) if r.status_code < 200 or r.status_code >= 300: r.raise_for_status() results = r.json() for report in results['bugs']: yield (int(report['id']), find_signatures(report.get('cf_crash_signature', '')))
def fetch_crashids(host, params, num_results): """Generator that returns crash ids :arg str host: the host to query :arg dict params: dict of super search parameters to base the query on :arg varies num: number of results to get or INFINITY :returns: generator of crash ids """ url = host + '/api/SuperSearch/' session = session_with_retries() # Set up first page params['_results_offset'] = 0 params['_results_number'] = min(MAX_PAGE, num_results) # Fetch pages of crash ids until we've gotten as many as we want or there aren't any more to get crashids_count = 0 while True: resp = session.get(url, params=params) if resp.status_code != 200: raise Exception('Bad response: %s %s' % (resp.status_code, resp.content)) hits = resp.json()['hits'] for hit in hits: crashids_count += 1 yield hit['uuid'] # If we've gotten as many crashids as we need, we return if crashids_count >= num_results: return # If there are no more crash ids to get, we return total = resp.json()['total'] if not hits or crashids_count >= total: return # Get the next page, but only as many results as we need params['_results_offset'] += MAX_PAGE params['_results_number'] = min( # MAX_PAGE is the maximum we can request MAX_PAGE, # The number of results Super Search can return to us that is hasn't returned so far total - crashids_count, # The numver of results we want that we haven't gotten, yet num_results - crashids_count )
def _iterator(self, from_date): # Fetch all the bugs that have been created or had the crash_signature # field changed since from_date payload = BUGZILLA_PARAMS.copy() payload['chfieldfrom'] = from_date session = session_with_retries() r = session.get(BUGZILLA_BASE_URL, params=payload) if r.status_code < 200 or r.status_code >= 300: r.raise_for_status() results = r.json() # Yield each one as a (bug_id, set of signatures) for report in results['bugs']: yield (int(report['id']), find_signatures(report.get('cf_crash_signature', '')))
def get(self, bugs): if isinstance(bugs, six.string_types): bugs = [bugs] fields = ('summary', 'status', 'id', 'resolution') results = [] missing = [] for bug in bugs: cache_key = self.make_cache_key(bug) cached = cache.get(cache_key) if cached is None: missing.append(bug) else: results.append(cached) if missing: params = { 'bugs': ','.join(missing), 'fields': ','.join(fields), } headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' } url = settings.BZAPI_BASE_URL + ( '/bug?id=%(bugs)s&include_fields=%(fields)s' % params ) session = session_with_retries( # BZAPI isn't super reliable, so be extra patient total_retries=5, # 502 = Bad Gateway # 504 = Gateway Time-out status_forcelist=(500, 502, 504) ) response = session.get( url, headers=headers, timeout=self.BUGZILLA_REST_TIMEOUT, ) if response.status_code != 200: raise BugzillaRestHTTPUnexpectedError(response.status_code) for each in response.json()['bugs']: cache_key = self.make_cache_key(each['id']) cache.set(cache_key, each, self.BUG_CACHE_SECONDS) results.append(each) return {'bugs': results}
def get(self, bugs): if isinstance(bugs, str): bugs = [bugs] fields = ('summary', 'status', 'id', 'resolution') results = [] missing = [] for bug in bugs: cache_key = self.make_cache_key(bug) cached = cache.get(cache_key) if cached is None: missing.append(bug) else: results.append(cached) if missing: params = { 'bugs': ','.join(missing), 'fields': ','.join(fields), } headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' } url = settings.BZAPI_BASE_URL + ( '/bug?id=%(bugs)s&include_fields=%(fields)s' % params ) session = session_with_retries( # BZAPI isn't super reliable, so be extra patient total_retries=5, # 502 = Bad Gateway # 504 = Gateway Time-out status_forcelist=(500, 502, 504) ) response = session.get( url, headers=headers, timeout=self.BUGZILLA_REST_TIMEOUT, ) if response.status_code != 200: raise BugzillaRestHTTPUnexpectedError(response.status_code) for each in response.json()['bugs']: cache_key = self.make_cache_key(each['id']) cache.set(cache_key, each, self.BUG_CACHE_SECONDS) results.append(each) return {'bugs': results}
def _iterator(self, start_date): self.stdout.write('Working on %s to now' % start_date) # Fetch all the bugs that have been created or had the crash_signature # field changed since start_date payload = BUGZILLA_PARAMS.copy() payload['chfieldfrom'] = start_date # Use a 30-second timeout because Bugzilla is slow sometimes session = session_with_retries(default_timeout=30.0) r = session.get(BUGZILLA_BASE_URL, params=payload) if r.status_code < 200 or r.status_code >= 300: r.raise_for_status() results = r.json() # Yield each one as a (bug_id, set of signatures) for report in results['bugs']: yield (int(report['id']), find_signatures(report.get('cf_crash_signature', '')))
def _iterator(self, from_date): # Fetch all the bugs that have been created or had the crash_signature # field changed since from_date payload = BUGZILLA_PARAMS.copy() payload['chfieldfrom'] = from_date # Use a 30-second timeout because Bugzilla is slow sometimes session = session_with_retries(default_timeout=30.0) r = session.get(BUGZILLA_BASE_URL, params=payload) if r.status_code < 200 or r.status_code >= 300: r.raise_for_status() results = r.json() # Yield each one as a (bug_id, set of signatures) for report in results['bugs']: yield ( int(report['id']), find_signatures(report.get('cf_crash_signature', '')) )
def handle(self, **options): debug_mode = options.get("debug") # Request file session = session_with_retries() resp = session.get(PCI_IDS_URL) # Let's raise an error if there's an error and let it alert us in Sentry for now resp.raise_for_status() # If we got the file successfully, then process it self.stdout.write(f"Fetch successful, {len(resp.text)} bytes...") devices = utils.pci_ids__parse_graphics_devices_iterable( resp.text.splitlines(), debug=debug_mode) total_created = 0 total_updated = 0 total_skipped = 0 for item in devices: obj, created = GraphicsDevice.objects.get_or_create( vendor_hex=item["vendor_hex"], adapter_hex=item["adapter_hex"]) if (obj.vendor_name == item["vendor_name"] and obj.adapter_name == item["adapter_name"]): total_skipped += 1 continue obj.vendor_name = item["vendor_name"] obj.adapter_name = item["adapter_name"] obj.save() if created: total_created += 1 else: total_updated += 1 self.stdout.write(f"Done. " f"Created: {total_created}; " f"Updated: {total_updated}; " f"Skipped: {total_skipped}")
def get(self, bugs): if isinstance(bugs, str): bugs = [bugs] fields = ("summary", "status", "id", "resolution") results = [] missing = [] for bug in bugs: cache_key = self.make_cache_key(bug) cached = cache.get(cache_key) if cached is None: missing.append(bug) else: results.append(cached) if missing: params = {"bugs": ",".join(missing), "fields": ",".join(fields)} headers = { "Accept": "application/json", "Content-Type": "application/json" } if settings.BZAPI_TOKEN: headers["X-BUGZILLA-API-KEY"] = settings.BZAPI_TOKEN url = settings.BZAPI_BASE_URL + ( "/bug?id=%(bugs)s&include_fields=%(fields)s" % params) session = session_with_retries( # BZAPI isn't super reliable, so be extra patient total_retries=5, # 502 = Bad Gateway # 504 = Gateway Time-out status_forcelist=(500, 502, 504), ) response = session.get(url, headers=headers, timeout=self.BUGZILLA_REST_TIMEOUT) if response.status_code != 200: raise BugzillaRestHTTPUnexpectedError(response.status_code) for each in response.json()["bugs"]: cache_key = self.make_cache_key(each["id"]) cache.set(cache_key, each, self.BUG_CACHE_SECONDS) results.append(each) return {"bugs": results}
def run(self, connection): # The @with_single_postgres_transaction decorator makes # sure this cursor is committed or rolled back and cleaned up. cursor = connection.cursor() session = session_with_retries() for product in self.config.products: url = self.config.api_endpoint_url.format(product=product) response = session.get(url) if response.status_code != 200: raise DownloadError('{} ({})'.format( url, response.status_code, )) versions = response.json() self._set_featured_versions( cursor, product, versions, )
def fetch_crash(fetchdumps, outputdir, api_token, crash_id): """Fetch crash data and save to correct place on the file system http://antenna.readthedocs.io/en/latest/architecture.html#aws-s3-file-hierarchy """ if api_token: headers = { 'Auth-Token': api_token } else: headers = {} # Fetch raw crash metadata session = session_with_retries() resp = session.get( HOST + '/api/RawCrash/', params={ 'crash_id': crash_id, 'format': 'meta', }, headers=headers, ) # Handle 404 and 403 so we can provide the user more context if resp.status_code == 404: raise CrashDoesNotExist(crash_id) if api_token and resp.status_code == 403: raise BadAPIToken(resp.json().get('error', 'No error provided')) # Raise an error for any other non-200 response resp.raise_for_status() # Save raw crash to file system raw_crash = resp.json() fn = os.path.join(outputdir, 'v2', 'raw_crash', crash_id[0:3], '20' + crash_id[-6:], crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'w') as fp: json.dump(raw_crash, fp, cls=JsonDTEncoder, indent=2, sort_keys=True) if fetchdumps: # Fetch dumps dumps = {} dump_names = raw_crash.get('dump_checksums', {}).keys() for dump_name in dump_names: print('Fetching %s -> %s' % (crash_id, dump_name)) # We store "upload_file_minidump" as "dump", so we need to use that # name when requesting from the RawCrash api file_name = dump_name if file_name == 'upload_file_minidump': file_name = 'dump' resp = session.get( HOST + '/api/RawCrash/', params={ 'crash_id': crash_id, 'format': 'raw', 'name': file_name }, headers=headers, ) if resp.status_code != 200: raise Exception('Something unexpected happened. status_code %s, content %s' % ( resp.status_code, resp.content) ) dumps[dump_name] = resp.content # Save dump_names to file system fn = os.path.join(outputdir, 'v1', 'dump_names', crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'w') as fp: json.dump(list(dumps.keys()), fp) # Save dumps to file system for dump_name, data in dumps.items(): if dump_name == 'upload_file_minidump': dump_name = 'dump' fn = os.path.join(outputdir, 'v1', dump_name, crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'wb') as fp: fp.write(data)
def get_data(product): # Get 10 Firefox versions from product_versions to compare with connection.cursor() as cursor: cursor.execute( """ SELECT distinct pv.product_name, pv.build_type, pvb.build_id, pv.release_version, pv.version_string FROM product_versions AS pv, product_version_builds AS pvb WHERE pv.product_version_id = pvb.product_version_id AND pv.build_type = 'beta' AND pv.product_name = %s ORDER BY pvb.build_id DESC, pv.product_name, pv.build_type, pv.release_version, pv.version_string LIMIT 15 """, (product, )) pv_data = cursor.fetchall() pv_data = [{ 'product': pv[0], 'channel': pv[1], 'build_id': pv[2], 'release_version': pv[3], 'version_string': pv[4], } for pv in pv_data] session = session_with_retries(buildhub_api) for pv in pv_data: # Lookup (product, buildid, channel) and exclude versions with "rc" in # them because we're looking at the beta channel es_query = { 'query': { 'bool': { 'must': { 'match_all': {} }, 'filter': [ { 'term': { 'source.product': product.lower() } }, { 'term': { 'build.id': str(pv['build_id']) } }, { 'term': { 'target.channel': pv['channel'] } }, ], 'must_not': { 'wildcard': { 'target.version': '*rc*' } } } }, 'size': 1 } resp = session.post(buildhub_api, data=json.dumps(es_query)) if resp.status_code != 200: pv['buildhub_resp'] = 'HTTP %s' % resp.status_code continue data = resp.json() hits = data.get('hits', {}).get('hits', []) if not hits: pv['buildhub_resp'] = 'no hits--might be release' continue pv['buildhub_resp'] = hits[0]['_source']['target']['version'] return pv_data
def _get_version_data(self, product, build_id, channel): """Return the real version number of a specified product, build, channel For example, beta builds of Firefox declare their version number as the major version (i.e. version 54.0b3 would say its version is 54.0). This database call returns the actual version number of said build (i.e. 54.0b3 for the previous example). :arg product: the product :arg build_id: the build_id as a string :arg channel: the release channel :returns: ``None`` or the version string that should be used :raises requests.RequestException: raised if it has connection issues with the host specified in ``version_string_api`` """ # NOTE(willkg): AURORA LIVES! # # But seriously, if this is for Firefox/aurora and the build id is after # 20170601, then we ask Buildhub about devedition/aurora instead because # devedition is the aurora channel if (product, channel) == ('firefox', 'aurora') and build_id > '20170601': product = 'devedition' key = '%s:%s:%s' % (product, build_id, channel) if key in self.cache: return self.cache[key] session = session_with_retries(self.buildhub_api) query = { 'source.product': product, 'build.id': '"%s"' % build_id, 'target.channel': channel, '_limit': 1 } resp = session.get(self.buildhub_api, params=query) if resp.status_code == 200: hits = resp.json()['data'] # Shimmy to add to ttl so as to distribute cache misses over time and reduce # HTTP requests from bunching up. shimmy = random.randint(1, 120) if hits: # If we got an answer we should keep it around for a while because it's # a real answer and it's not going to change so use the long ttl plus # a fudge factor. real_version = hits[0]['target']['version'] self.cache.set(key, value=real_version, ttl=self.LONG_CACHE_TTL + shimmy) return real_version # We didn't get an answer which could mean that this is a weird # build and there is no answer or it could mean that Buildhub # doesn't know, yet. Maybe in the future we get a better answer # so we use the short ttl plus a fudge factor. self.cache.set(key, value=None, ttl=self.SHORT_CACHE_TTL + shimmy) return None
def fetch_crash(host, fetchraw, fetchdumps, fetchprocessed, outputdir, api_token, crash_id): """Fetch crash data and save to correct place on the file system http://antenna.readthedocs.io/en/latest/architecture.html#aws-s3-file-hierarchy """ if api_token: headers = {"Auth-Token": api_token} else: headers = {} session = session_with_retries() if fetchraw: # Fetch raw crash metadata print("Fetching raw %s" % crash_id) resp = session.get( host + "/api/RawCrash/", params={ "crash_id": crash_id, "format": "meta" }, headers=headers, ) # Handle 404 and 403 so we can provide the user more context if resp.status_code == 404: raise CrashDoesNotExist(crash_id) if api_token and resp.status_code == 403: raise BadAPIToken(resp.json().get("error", "No error provided")) # Raise an error for any other non-200 response resp.raise_for_status() # Save raw crash to file system raw_crash = resp.json() fn = os.path.join(outputdir, "v2", "raw_crash", crash_id[0:3], "20" + crash_id[-6:], crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, "w") as fp: json.dump(raw_crash, fp, cls=JsonDTEncoder, indent=2, sort_keys=True) if fetchdumps: # Fetch dumps dumps = {} dump_names = raw_crash.get("dump_checksums", {}).keys() for dump_name in dump_names: print("Fetching dump %s/%s" % (crash_id, dump_name)) # We store "upload_file_minidump" as "dump", so we need to use that # name when requesting from the RawCrash api file_name = dump_name if file_name == "upload_file_minidump": file_name = "dump" resp = session.get( host + "/api/RawCrash/", params={ "crash_id": crash_id, "format": "raw", "name": file_name }, headers=headers, ) if resp.status_code != 200: raise Exception( "Something unexpected happened. status_code %s, content %s" % (resp.status_code, resp.content)) dumps[dump_name] = resp.content # Save dump_names to file system fn = os.path.join(outputdir, "v1", "dump_names", crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, "w") as fp: json.dump(list(dumps.keys()), fp) # Save dumps to file system for dump_name, data in dumps.items(): if dump_name == "upload_file_minidump": dump_name = "dump" fn = os.path.join(outputdir, "v1", dump_name, crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, "wb") as fp: fp.write(data) if fetchprocessed: # Fetch processed crash data print("Fetching processed %s" % crash_id) resp = session.get( host + "/api/ProcessedCrash/", params={ "crash_id": crash_id, "format": "meta" }, headers=headers, ) # Handle 404 and 403 so we can provide the user more context if resp.status_code == 404: raise CrashDoesNotExist(crash_id) if api_token and resp.status_code == 403: raise BadAPIToken(resp.json().get("error", "No error provided")) # Raise an error for any other non-200 response resp.raise_for_status() # Save processed crash to file system processed_crash = resp.json() fn = os.path.join(outputdir, "v1", "processed_crash", crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, "w") as fp: json.dump(processed_crash, fp, cls=JsonDTEncoder, indent=2, sort_keys=True)
def __init__(self, *args, **kwargs): super(ArchiveScraperCronApp, self).__init__(*args, **kwargs) # NOTE(willkg): If archive.mozilla.org is timing out after 5 seconds, # then it has issues and we should try again some other time self.session = session_with_retries(default_timeout=5.0) self.successful_inserts = 0
def main(argv=None): parser = argparse.ArgumentParser( formatter_class=WrappedTextHelpFormatter, description=DESCRIPTION.strip(), ) parser.add_argument( '--sleep', help='how long in seconds to sleep before submitting the next group', type=int, default=SLEEP_DEFAULT) parser.add_argument('--host', help='host for system to reprocess in', default=DEFAULT_HOST) parser.add_argument('crashid', nargs='*', help='one or more crash ids to fetch data for') if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) api_token = os.environ.get('SOCORRO_REPROCESS_API_TOKEN') if not api_token: print('You need to set SOCORRO_REPROCESS_API_TOKEN in the environment') return 1 url = args.host.rstrip('/') + '/api/Reprocessing/' if args.crashid: crash_ids = args.crashid elif not sys.stdin.isatty(): # If a script is piping to this script, then isatty() returns False. If there is no script # piping to this script, then isatty() returns True and if we do list(sys.stdin), it'll # block waiting for input. crash_ids = list(sys.stdin) else: crash_ids = [] # If there are no crashids, then print help and exit if not crash_ids: parser.print_help() return 0 crash_ids = [item.strip() for item in crash_ids] print('Sending reprocessing requests to: %s' % url) session = session_with_retries() print('Reprocessing %s crashes sleeping %s seconds between groups...' % (len(crash_ids), args.sleep)) groups = list(chunked(crash_ids, CHUNK_SIZE)) for i, group in enumerate(groups): print('Processing group ending with %s ... (%s/%s)' % (group[-1], i + 1, len(groups))) resp = session.post(url, data={'crash_ids': group}, headers={'Auth-Token': api_token}) if resp.status_code != 200: print('Got back non-200 status code: %s %s' % (resp.status_code, resp.content)) continue # NOTE(willkg): We sleep here because the webapp has a bunch of rate limiting and we don't # want to trigger that. It'd be nice if we didn't have to do this. time.sleep(args.sleep) print('Done!')
def _get_version_data(self, product, version, build_id): """Return the real version number of a specific product, version and build For example, beta builds of Firefox declare their version number as the major version (i.e. version 54.0b3 would say its version is 54.0). This database call returns the actual version number of said build (i.e. 54.0b3 for the previous example). :arg product: the product :arg version: the version as a string. e.g. "56.0" :arg build_id: the build_id as a string. :returns: ``None`` or the version string that should be used :raises requests.RequestException: raised if it has connection issues with the host specified in ``version_string_api`` """ if not (product and version and build_id): return None key = '%s:%s:%s' % (product, version, build_id) if key in self.cache: return self.cache[key] session = session_with_retries(self.version_string_api) resp = session.get(self.version_string_api, params={ 'product': product, 'version': version, 'build_id': build_id }) if resp.status_code == 200: hits = resp.json()['hits'] # Shimmy to add to ttl so as to distribute cache misses over time and reduce # HTTP requests from bunching up. shimmy = random.randint(1, 120) if hits: # If we got an answer we should keep it around for a while because it's # a real answer and it's not going to change so use the long ttl plus # a fudge factor. real_version = hits[0] self.cache.set(key, value=real_version, ttl=self.LONG_CACHE_TTL + shimmy) return real_version else: # We didn't get an answer which could mean that this is a weird build and there # is no answer or it could mean that ftpscraper hasn't picked up the relevant # build information or it could mean we're getting cached answers from the webapp. # Regardless, maybe in the future we get a better answer so we use the short # ttl plus a fudge factor. self.cache.set(key, value=None, ttl=self.SHORT_CACHE_TTL + shimmy) return None
def get_session(): """Return a retryable requests session.""" # NOTE(willkg): If archive.mozilla.org is timing out after 5 seconds, then # it has issues and we should try again some other time return session_with_retries(default_timeout=5.0)
def __init__(self, *args, **kwargs): super(FTPScraperCronApp, self).__init__(*args, **kwargs) self.session = session_with_retries()
def fetch_crash(fetchdumps, outputdir, api_token, crash_id): """Fetch crash data and save to correct place on the file system http://antenna.readthedocs.io/en/latest/architecture.html#aws-s3-file-hierarchy """ if api_token: headers = {'Auth-Token': api_token} else: headers = {} # Fetch raw crash metadata session = session_with_retries() resp = session.get( HOST + '/api/RawCrash/', params={ 'crash_id': crash_id, 'format': 'meta', }, headers=headers, ) # Handle 404 and 403 so we can provide the user more context if resp.status_code == 404: raise CrashDoesNotExist(crash_id) if api_token and resp.status_code == 403: raise BadAPIToken(resp.json().get('error', 'No error provided')) # Raise an error for any other non-200 response resp.raise_for_status() # Save raw crash to file system raw_crash = resp.json() fn = os.path.join(outputdir, 'v2', 'raw_crash', crash_id[0:3], '20' + crash_id[-6:], crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'w') as fp: json.dump(raw_crash, fp, cls=JsonDTEncoder, indent=2, sort_keys=True) if fetchdumps: # Fetch dumps dumps = {} dump_names = raw_crash.get('dump_checksums', {}).keys() for dump_name in dump_names: print('Fetching %s -> %s' % (crash_id, dump_name)) # We store "upload_file_minidump" as "dump", so we need to use that # name when requesting from the RawCrash api file_name = dump_name if file_name == 'upload_file_minidump': file_name = 'dump' resp = session.get( HOST + '/api/RawCrash/', params={ 'crash_id': crash_id, 'format': 'raw', 'name': file_name }, headers=headers, ) if resp.status_code != 200: raise Exception( 'Something unexpected happened. status_code %s, content %s' % (resp.status_code, resp.content)) dumps[dump_name] = resp.content # Save dump_names to file system fn = os.path.join(outputdir, 'v1', 'dump_names', crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'w') as fp: json.dump(dumps.keys(), fp) # Save dumps to file system for dump_name, data in dumps.items(): if dump_name == 'upload_file_minidump': dump_name = 'dump' fn = os.path.join(outputdir, 'v1', dump_name, crash_id) create_dir_if_needed(os.path.dirname(fn)) with open(fn, 'wb') as fp: fp.write(data)