def get_bugs(signature, wait=True): # return a dict: bugid => buginfo # if buginfo is None => security bug if not signature: return {} logger.info("Get bugs for signature {}: started.".format(signature)) def bug_handler(bug, data): if "cf_crash_signature" in bug: if signature in utils.get_signatures([bug["cf_crash_signature"]]): data[bug["id"]] = bug del bug["cf_crash_signature"] start_date = pytz.utc.localize(datetime.utcnow()) start_date -= relativedelta(hours=2) data = {} bz = Bugzilla(get_bz_search(signature, start_date), bughandler=bug_handler, bugdata=data).get_data() bugs = socorro.Bugs.get_bugs([signature])[signature] bz.wait() bz_bugs = set(data.keys()) old_bugs = [] for bug in bugs: if bug not in bz_bugs: old_bugs.append(bug) # the bug is in Socorro and not in search query data[bug] = None bz = Bugzilla(bugids=old_bugs, include_fields=BZ_FIELDS, bughandler=bug_handler, bugdata=data) if wait: bz.wait() logger.info("Get bugs: finished.") return data logger.info("Get bugs: finished.") return bz, data
def get(channel, date, product='Firefox', duration=11, tc_limit=50, crash_type='all', startup=False): """Get crashes info Args: channel (str): the channel date (str): the final date product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data tc_limit (Optional[int]): the number of topcrashes to load crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin' Returns: dict: contains all the info relative to the crashes """ channel = channel.lower() version = v[channel] sys.stdout.write('Getting version information from Socorro...') sys.stdout.flush() versions_info = socorro.ProductVersions.get_version_info(version, channel=channel, product=product) versions = versions_info.keys() platforms = socorro.Platforms.get_cached_all() sys.stdout.write(' ✔\n') sys.stdout.flush() if crash_type and isinstance(crash_type, six.string_types): crash_type = [crash_type] throttle = set(map(lambda p: p[1], versions_info.values())) if len(throttle) == 1: throttle = throttle.pop() else: return _date = utils.get_date_ymd(date) start_date = utils.get_date_str(_date - timedelta(duration - 1)) end_date = utils.get_date_str(_date) # First, we get the ADI sys.stdout.write('Getting ADI from Socorro...') sys.stdout.flush() adi = socorro.ADI.get(version=versions, product=product, end_date=end_date, duration=duration, platforms=platforms) adi = [adi[key] for key in sorted(adi.keys(), reverse=True)] sys.stdout.write(' ✔\n') sys.stdout.flush() # get the khours sys.stdout.write('Getting khours from Re:dash...') sys.stdout.flush() khours = Redash.get_khours(utils.get_date_ymd(start_date), utils.get_date_ymd(end_date), channel, versions, product) khours = [khours[key] for key in sorted(khours.keys(), reverse=True)] sys.stdout.write(' ✔\n') sys.stdout.flush() overall_crashes_by_day = [] signatures = {} def signature_handler(json): for signature in json['facets']['signature']: signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0] for platform in signature['facets']['platform']: if platform['term'] == 'Linux': signatures[signature['term']][3] = platform['count'] elif platform['term'] == 'Windows NT': signatures[signature['term']][1] = platform['count'] elif platform['term'] == 'Mac OS X': signatures[signature['term']][2] = platform['count'] # XXX: Remove this when all versions will have the StartupCrash annotation. if version >= 51: for startup_crash in signature['facets']['startup_crash']: if startup_crash['term'] in ['1', 'T']: signatures[ signature['term']][4] += startup_crash['count'] else: for uptime in signature['facets']['histogram_uptime']: if uptime['term'] == 0: signatures[signature['term']][4] = uptime['count'] break signatures[signature['term']][5] = signature['facets'][ 'cardinality_install_time']['value'] for facets in json['facets']['histogram_date']: overall_crashes_by_day.insert(0, facets['count']) params = { 'product': product, 'version': versions, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_aggs.signature': [ 'platform', '_histogram.uptime', '_cardinality.install_time', 'startup_crash' ], '_results_number': 0, '_facets_size': tc_limit, '_histogram.date': ['product'], '_histogram_interval': 1, '_histogram_interval.uptime': 60, } if startup: # XXX: Remove this when all versions will have the StartupCrash annotation. if version >= 51: params['startup_crash'] = True else: params['uptime'] = '<=60' sys.stdout.write('Getting top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(params=params, handler=signature_handler).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() bug_flags = [ 'resolution', 'id', 'last_change_time', 'cf_tracking_firefox' + str(version) ] for i in range(int(version), int(v['nightly']) + 1): bug_flags.append('cf_status_firefox' + str(i)) # TODO: too many requests... should be improved with chunks bugs = {} # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed. base = { 'j_top': 'OR', 'o1': 'substring', 'f1': 'cf_crash_signature', 'v1': None, 'o2': 'substring', 'f2': 'cf_crash_signature', 'v2': None, 'o3': 'substring', 'f3': 'cf_crash_signature', 'v3': None, 'o4': 'substring', 'f4': 'cf_crash_signature', 'v4': None, 'include_fields': bug_flags } queries = [] for sgn in signatures.keys(): cparams = base.copy() cparams['v1'] = '[@' + sgn + ']' cparams['v2'] = '[@ ' + sgn + ' ]' cparams['v3'] = '[@ ' + sgn + ']' cparams['v4'] = '[@' + sgn + ' ]' bugs[sgn] = [] queries.append( Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn])) res_bugs = Bugzilla(queries=queries) # we have stats by signature in self.signatures # for each signature get the number of crashes on the last X days # so get the signature trend trends = {} default_trend = {} for i in range(duration): default_trend[_date - timedelta(i)] = 0 base = { 'product': product, 'version': versions, 'signature': None, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_results_number': 0, '_histogram.date': ['signature'], '_histogram_interval': 1 } queries = [] for sgns in Connection.chunks( list(map(lambda sgn: '=' + sgn, signatures.keys())), 10): sgn_group = [] for sgn in sgns: if sum(len(s) for s in sgn_group) >= 1000: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sgn_group = [] sgn_group.append(sgn) if len(sgn_group) > 0: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sys.stdout.write('Getting trends for top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, trend in trends.items(): signatures[sgn] = (signatures[sgn], [ trend[key] for key in sorted(trend.keys(), reverse=True) ]) _signatures = {} # order self.signatures by crash count l = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True) i = 1 for s in l: _signatures[s[0]] = i # top crash rank i += 1 sys.stdout.write( 'Getting bugs linked to the top signatures from Bugzilla...') sys.stdout.flush() res_bugs.wait() sys.stdout.write(' ✔\n') sys.stdout.flush() # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query # in follow_dup (so modify follow_dup to accept both a bug ID or a bug object). queries = [] for sgn in signatures.keys(): duplicate_ids = [ bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE' ] # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature. bugs[sgn] = [ bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids ] # Find duplicates for bugs resolved as DUPLICATE. duplicates = { k: v for k, v in Bugzilla.follow_dup(duplicate_ids).items() if v is not None } duplicate_targets = [ bug_id for bug_id in duplicates.values() if int(bug_id) not in [bug['id'] for bug in bugs[sgn]] ] if len(duplicate_targets) == 0: continue # Get info about bugs that the DUPLICATE bugs have been duped to. params = { 'id': ','.join(duplicate_targets), 'include_fields': bug_flags, } queries.append( Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn])) sys.stdout.write( 'Resolving duplicate bugs to the bugs they\'ve been duplicated to...') sys.stdout.flush() Bugzilla(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, stats in signatures.items(): # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend) startup_percent = float(stats[0][4]) / float(stats[0][0]) _signatures[sgn] = { 'tc_rank': _signatures[sgn], 'crash_count': stats[0][0], 'estimated_user_count': stats[0][5], 'startup_percent': startup_percent, 'crash_by_day': stats[1], 'bugs': bugs[sgn] } return { 'start_date': start_date, 'end_date': end_date, 'versions': list(versions), 'adi': adi, 'khours': khours, 'crash_by_day': overall_crashes_by_day, 'signatures': _signatures, 'throttle': float(throttle) }