Exemplo n.º 1
0
def get_builds(product, channel, date):
    """Get the buildids for a product/channel prior to date"""
    if channel == "nightly":
        # for nightly, the strategy is pretty simple:
        #  - just get builds few day before (and update the old one too)
        ndays = config.get_ndays()
        few_days_ago = date - relativedelta(days=ndays + 5)
        few_days_ago = datetime(few_days_ago.year, few_days_ago.month,
                                few_days_ago.day)
        search_buildid = [
            ">=" + utils.get_buildid(few_days_ago),
            "<=" + utils.get_buildid(date),
        ]
        search_date = ">=" + lmdutils.get_date_str(few_days_ago)
        bids = get_buildids_from_socorro(search_buildid, search_date, product)
    else:
        bids = []
        search_date = ""
        min_date = None
        data = models.Build.get_last_versions(date, channel, product, n=3)
        if data:
            # data are ordered by buildid (desc)
            bids = [x["buildid"] for x in data]
            first_date = utils.get_build_date(bids[-1])
            if min_date is None or min_date > first_date:
                min_date = first_date
            if min_date:
                search_date = ">=" + lmdutils.get_date_str(min_date)

    return bids, search_date
Exemplo n.º 2
0
def prepare(significants, bugs_by_signature, totals, date):
    if significants:
        today = utils.get_date_ymd(date)
        yesterday = today - relativedelta(days=1)
        yesterday = utils.get_date_str(yesterday)
        tomorrow = today + relativedelta(days=1)
        tomorrow = utils.get_date_str(tomorrow)
        today = utils.get_date_str(today)

        search_date = ['>=' + today, '<' + tomorrow]
        affected_chans = set()
        urls = defaultdict(lambda: dict())
        spikes_number = 0
        results = OrderedDict()

        for product in products:
            if product in significants:
                data1 = significants[product]
                results1 = OrderedDict()
                results[product] = results1
                for chan in channels:
                    if chan in data1:
                        affected_chans.add(chan)
                        params = {
                            'product': product,
                            'date': search_date,
                            'release_channel': chan
                        }
                        params.update(query)
                        url = socorro.SuperSearch.get_link(params)
                        urls[product][chan] = url
                        spikes_number += 1
                        results2 = OrderedDict()
                        results1[chan] = results2
                        sgns = data1[chan]
                        # we order on the 2nd number (today) and the signature
                        for sgn, num in sorted(sgns.items(),
                                               key=lambda p: (p[1][1], p[0]),
                                               reverse=True):
                            bugs = bugs_by_signature.get(sgn, {})
                            results3 = OrderedDict()
                            results2[sgn] = results3
                            results3['numbers'] = num
                            results3['resolved'] = bugs.get('resolved', None)
                            results3['unresolved'] = bugs.get(
                                'unresolved', None)
        affected_chans = list(sorted(affected_chans))

        return results, spikes_number, urls, affected_chans, yesterday, today
    return None
Exemplo n.º 3
0
 def get_extra_for_template(self):
     return {
         'nightly': self.nightly,
         'beta': self.beta,
         'release': self.release,
         'date': lmdutils.get_date_str(self.date),
     }
def update_status_flags(info, update=False, verbose=False):
    status_flags_by_channel = info['status_flags']
    base_versions = info['base_versions']
    start_date_by_channel = info['start_dates']
    end_date = info['end_date']

    for c, d in start_date_by_channel.items():
        start_date_by_channel[c] = utils.get_date_str(d)

    bugs_to_update = {}

    for sgn, i in info['signatures'].items():
        data = generate_bug_report(sgn, i, status_flags_by_channel,
                                   base_versions, start_date_by_channel,
                                   end_date)
        if data:
            bugid = i['bugid']
            bugs_to_update[bugid] = data

    for bugid, data in bugs_to_update.items():
        __warn('Bug %d: %s' % (bugid, str(data)), verbose)
        __warn(data['comment']['body'], verbose)
        if update:
            Bugzilla([str(bugid)]).put(data)

    if update:
        links = '\n'.join(Bugzilla.get_links(list(bugs_to_update.keys())))
        __warn('Bug links: %s' % links, verbose)
Exemplo n.º 5
0
def fill_tables():
    engine = db.get_engine(app)
    if not engine.dialect.has_table(engine, 'crashes_bytype'):
        import requests

        logger.info('Generate tables')
        types = {
            'crashes-bytype': Bytype,
            'crashes-categories': Categories,
            'annotations': Annotations
        }

        db.create_all()

        base_url = 'https://crash-analysis.mozilla.com/rkaiser/{}-{}-{}.json'
        for product in magutils.get_products():
            for channel in magutils.get_channels():
                for typ, obj in types.items():
                    url = base_url.format(product, channel, typ)
                    logger.info('Get data from {}'.format(url))
                    response = requests.get(url)
                    logger.info('Status is {}'.format(response.status_code))
                    if response.status_code == 200:
                        data = response.json()
                        obj.populate(product, channel, data)
                        logger.info('DB populated for {}::{}'.format(
                            product, channel))
                        if product == 'Firefox' and \
                           channel == 'release' and \
                           typ == 'crashes-bytype':
                            dates = [magutils.get_date(d) for d in data.keys()]
                            last = max(dates)
                            last = utils.get_date_str(last)
                            Lastdate.put(last)
 def get_extra_for_template(self):
     return {
         'nightly': self.nightly,
         'beta': self.beta,
         'release': self.release,
         'date': lmdutils.get_date_str(self.date),
     }
Exemplo n.º 7
0
 def get_extra_for_template(self):
     return {
         "nightly": self.nightly,
         "beta": self.beta,
         "release": self.release,
         "date": lmdutils.get_date_str(self.date),
     }
Exemplo n.º 8
0
def update_status_flags(info, update=False, verbose=False):
    status_flags_by_channel = info['status_flags']
    base_versions = info['base_versions']
    start_date_by_channel = info['start_dates']
    end_date = info['end_date']

    for c, d in start_date_by_channel.items():
        start_date_by_channel[c] = utils.get_date_str(d)

    bugs_to_update = {}

    for sgn, i in info['signatures'].items():
        data = generate_bug_report(sgn, i, status_flags_by_channel, base_versions, start_date_by_channel, end_date)
        if data:
            bugid = i['bugid']
            bugs_to_update[bugid] = data

    for bugid, data in bugs_to_update.items():
        __warn('Bug %d: %s' % (bugid, str(data)), verbose)
        __warn(data['comment']['body'], verbose)
        if update:
            Bugzilla([str(bugid)]).put(data)

    if update:
        links = '\n'.join(Bugzilla.get_links(list(bugs_to_update.keys())))
        __warn('Bug links: %s' % links, verbose)
Exemplo n.º 9
0
    def test_cache(self):
        cache = Cache('test_cache', 7)
        cache.set_dry_run(False)

        bugids = [123, 456, 789]
        cache.add(bugids)

        for bugid in bugids:
            assert bugid in cache
            assert str(bugid) in cache

        assert 101112 not in cache
        assert '101112' not in cache

        with open(cache.get_path(), 'r') as In:
            data = json.load(In)

        for bugid in ['123', '456']:
            date = data[bugid]
            date = lmdutils.get_date_ymd(date) - relativedelta(days=8)
            data[bugid] = lmdutils.get_date_str(date)

        with open(cache.get_path(), 'w') as Out:
            json.dump(data, Out)

        cache = Cache('test_cache', 7)
        cache.set_dry_run(False)

        assert 123 not in cache
        assert 456 not in cache
        assert 789 in cache
Exemplo n.º 10
0
    def test_cache(self):
        cache = Cache("test_cache", 7)
        cache.set_dry_run(False)

        bugids = [123, 456, 789]
        cache.add(bugids)

        for bugid in bugids:
            assert bugid in cache
            assert str(bugid) in cache

        assert 101112 not in cache
        assert "101112" not in cache

        with open(cache.get_path(), "r") as In:
            data = json.load(In)

        for bugid in ["123", "456"]:
            date = data[bugid]
            date = lmdutils.get_date_ymd(date) - relativedelta(days=8)
            data[bugid] = lmdutils.get_date_str(date)

        with open(cache.get_path(), "w") as Out:
            json.dump(data, Out)

        cache = Cache("test_cache", 7)
        cache.set_dry_run(False)

        assert 123 not in cache
        assert 456 not in cache
        assert 789 in cache
Exemplo n.º 11
0
def get_versions(date, product, channel):
    earliest_mindate = utils.get_date_str(date - datetime.timedelta(days=365))
    all_versions = get_all_versions(product, earliest_mindate)
    delta = datetime.timedelta(weeks=getMaxBuildAge()[channel])
    min_version_date = utils.get_date_ymd(date) - delta
    versions = []
    throttle = 0
    last_versions = []
    last_throttle = 0
    last_date = utils.get_guttenberg_death()
    for v in all_versions:
        if v['product'] == product and v['build_type'] == channel:
            sd = utils.get_date_ymd(v['start_date'])
            if sd <= date:
                if sd > last_date:
                    last_date = sd
                    last_versions = [v['version']]
                    last_throttle = 100. / float(v['throttle'])

                if sd > min_version_date:
                    versions.append(v['version'])
                    if throttle == 0:
                        throttle = 100. / float(v['throttle'])

    if not versions:
        versions = last_versions
        throttle = last_throttle

    return versions, throttle
Exemplo n.º 12
0
    def get(product, channel, date):
        date = magutils.get_date(date)
        if date:
            bytype = db.session.query(Bytype).filter_by(product=product,
                                                        channel=channel,
                                                        date=date)
        else:
            bytype = db.session.query(Bytype).filter_by(product=product,
                                                        channel=channel)
        r = {}
        for bt in bytype:
            date = utils.get_date_str(bt.date)
            r[date] = {
                'adi': bt.adi,
                'crashes': {
                    'Content': bt.content,
                    'OOP Plugin': bt.oop_plugin,
                    'Hang Plugin': bt.hang_plugin,
                    'Browser': bt.browser,
                    'Gpu': bt.gpu
                },
                'versions': bt.versions.split('|')
            }

        return r
Exemplo n.º 13
0
def get_stats_for_past_weeks(product, channel, start_date_by_channel, versions_by_channel, analysis, search_start_date, end_date, check_for_fx=True):
    queries = []
    trends = {}
    signatures_by_chan = {}
    default_trend_by_chan = {}
    ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date))

    def get_past_week(date):
        monday, _ = utils.get_monday_sunday(date)
        return (ref_monday - monday).days // 7

    for chan in channel:
        past_w = get_past_week(start_date_by_channel[chan])
        default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)}

    for signature, info in analysis.items():
        if not check_for_fx or info['firefox']:
            data = {}
            trends[signature] = data
            # for chan, volume in info['affected']:
            for chan in channel:
                if chan in signatures_by_chan:
                    signatures_by_chan[chan].append(signature)
                else:
                    signatures_by_chan[chan] = [signature]
                data[chan] = default_trend_by_chan[chan].copy()

    def handler_ss(chan, json, data):
        sgns = []
        for facets in json['facets']['histogram_date']:
            d = utils.get_date_ymd(facets['term'])
            w = get_past_week(d)
            s = facets['facets']['signature']
            for signature in s:
                count = signature['count']
                sgn = signature['term']
                sgns.append(sgn)
                data[sgn][chan][w] += count

    for chan, signatures in signatures_by_chan.items():
        if search_start_date:
            search_date = socorro.SuperSearch.get_search_date(search_start_date, end_date)
        else:
            search_date = socorro.SuperSearch.get_search_date(utils.get_date_str(start_date_by_channel[chan]), end_date)

        vers = versions_by_channel[chan]
        for sgns in Connection.chunks(signatures, 10):
            queries.append(Query(socorro.SuperSearch.URL,
                                 {'signature': ['=' + s for s in sgns],
                                  'product': product,
                                  'version': vers,
                                  'release_channel': chan,
                                  'date': search_date,
                                  '_histogram.date': 'signature',
                                  '_results_number': 0},
                           handler=functools.partial(handler_ss, chan), handlerdata=trends))
    socorro.SuperSearch(queries=queries).wait()

    return trends
def get_search_date(search_start_date,
                    start_date,
                    end_date=utils.get_date('today')):
    if search_start_date:
        return socorro.SuperSearch.get_search_date(search_start_date, end_date)
    else:
        return socorro.SuperSearch.get_search_date(
            utils.get_date_str(start_date), end_date)
Exemplo n.º 15
0
 def handler_rev(json, data):
     push = json["pushdate"][0]
     push = datetime.datetime.utcfromtimestamp(push)
     push = lmdutils.as_utc(push)
     data["date"] = lmdutils.get_date_str(push)
     data["backedout"] = utils.is_backout(json)
     m = BUG_PAT.search(json["desc"])
     if not m or m.group(1) != data["bugid"]:
         data["bugid"] = ""
Exemplo n.º 16
0
 def handler_rev(json, data):
     push = json['pushdate'][0]
     push = datetime.datetime.utcfromtimestamp(push)
     push = lmdutils.as_utc(push)
     data['date'] = lmdutils.get_date_str(push)
     data['backedout'] = utils.is_backout(json)
     m = BUG_PAT.search(json['desc'])
     if not m or m.group(1) != data['bugid']:
         data['bugid'] = ''
Exemplo n.º 17
0
 def handler_rev(json, data):
     push = json['pushdate'][0]
     push = datetime.datetime.utcfromtimestamp(push)
     push = lmdutils.as_utc(push)
     data['date'] = lmdutils.get_date_str(push)
     data['backedout'] = utils.is_backout(json)
     m = BUG_PAT.search(json['desc'])
     if not m or m.group(1) != data['bugid']:
         data['bugid'] = ''
Exemplo n.º 18
0
def get_info_from_hg(json):
    res = {}
    push = json["pushdate"][0]
    push = datetime.datetime.utcfromtimestamp(push)
    push = lmdutils.as_utc(push)
    res["date"] = lmdutils.get_date_str(push)
    res["backedout"] = json.get("backedoutby", "") != ""
    m = BUG_PAT.search(json["desc"])
    res["bugid"] = m.group(1) if m else ""

    return res
Exemplo n.º 19
0
def tocsv(filename, channel, versions=None, product='Firefox', start_date=None, end_date='today', duration=30, platforms=None):
    with open(filename, 'w') as Out:
        writer = csv.writer(Out, delimiter=',')
        data = get(channel, versions, product, start_date, end_date, duration, platforms)
        data = [(utils.get_date_str(d), data[d]) for d in sorted(data)]
        head = ['date', 'adi', 'browser', 'content', 'b+c', 'plugin', 'browser_rate', 'content_rate', 'b+c_rate', 'plugin_rate']
        writer.writerow(head)

        for d in data:
            row = [d[0], d[1]['adi'], d[1]['browser'], d[1]['content'], d[1]['b+c'], d[1]['plugin'], d[1]['browser_rate'], d[1]['content_rate'], d[1]['b+c_rate'], d[1]['plugin_rate']]
            writer.writerow(row)
Exemplo n.º 20
0
def get_info_from_hg(json):
    res = {}
    push = json['pushdate'][0]
    push = datetime.datetime.utcfromtimestamp(push)
    push = lmdutils.as_utc(push)
    res['date'] = lmdutils.get_date_str(push)
    res['backedout'] = json.get('backedoutby', '') != ''
    m = BUG_PAT.search(json['desc'])
    res['bugid'] = m.group(1) if m else ''

    return res
Exemplo n.º 21
0
def get_info_from_hg(json):
    res = {}
    push = json['pushdate'][0]
    push = datetime.datetime.utcfromtimestamp(push)
    push = lmdutils.as_utc(push)
    res['date'] = lmdutils.get_date_str(push)
    res['backedout'] = json.get('backedoutby', '') != ''
    m = BUG_PAT.search(json['desc'])
    res['bugid'] = m.group(1) if m else ''

    return res
def get_filename(date, output_dir):
    try:
        if date:
            date = utils.get_date_str(utils.get_date_ymd(date))
        else:
            dates = getdates(output_dir)
            if dates['dates']:
                date = dates['dates'][-1]
            else:
                return None
        return os.path.join(output_dir, date + '.json')
    except:
        return None
Exemplo n.º 23
0
def get_filename(date, output_dir):
    try:
        if date:
            date = utils.get_date_str(utils.get_date_ymd(date))
        else:
            dates = getdates(output_dir)
            if dates['dates']:
                date = dates['dates'][-1]
            else:
                return None
        return os.path.join(output_dir, date + '.json')
    except:
        return None
Exemplo n.º 24
0
def get_params_for_link(date, query={}):
    today = utils.get_date_ymd(date)
    tomorrow = today + relativedelta(days=1)
    tomorrow = utils.get_date_str(tomorrow)
    today = utils.get_date_str(today)
    search_date = ['>=' + today, '<' + tomorrow]
    params = {'product': '',
              'date': search_date,
              'release_channel': '',
              'signature': '',
              '_facets': ['url',
                          'user_comments',
                          'install_time',
                          'version',
                          'address',
                          'moz_crash_reason',
                          'reason',
                          'build_id',
                          'platform_pretty_version',
                          'signature',
                          'useragent_locale']}
    params.update(query)
    return params
Exemplo n.º 25
0
    def get_browser_startup(product, channel, date):
        date = magutils.get_date(date)
        if date:
            cats = db.session.query(Categories).filter_by(
                product=product,
                channel=channel,
                kind='startup',
                date=date,
            )
        else:
            cats = db.session.query(Categories).filter_by(product=product,
                                                          channel=channel,
                                                          kind='startup')
        r = {}
        for cat in cats:
            date = utils.get_date_str(cat.date)
            r[date] = cat.browser

        return dict(r)
Exemplo n.º 26
0
def get_changeset(buildid, channel, product):
    """Trick to get changeset for a particular buildid/channel/product"""
    search_date = ">=" + lmdutils.get_date_str(buildid)
    buildid = utils.get_buildid(buildid)
    logger.info("Get changeset for {}-{}-{}.".format(buildid, product,
                                                     channel))

    def handler(json, data):
        pat = re.compile(r"^.*:([0-9a-f]+)$")
        if not json["facets"]["build_id"]:
            return
        for facets in json["facets"]["build_id"]:
            for tf in facets["facets"]["topmost_filenames"]:
                m = pat.match(tf["term"])
                if m:
                    chgset = m.group(1)
                    count = tf["count"]
                    data[chgset] += count

    params = {
        "product": product,
        "release_channel": channel,
        "build_id": buildid,
        "date": search_date,
        "topmost_filenames": '@"hg:hg.mozilla.org/".*:[0-9a-f]+',
        "_aggs.build_id": "topmost_filenames",
        "_results_number": 0,
        "_facets": "product",
        "_facets_size": 100,
    }

    data = defaultdict(lambda: 0)
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()
    chgset = None
    if data:
        chgset, _ = max(data.items(), key=lambda p: p[1])
        chgset = utils.short_rev(chgset)

    logger.info("Get changeset: finished.")

    return chgset
Exemplo n.º 27
0
def get_changeset(buildid, channel, product):
    """Trick to get changeset for a particular buildid/channel/product"""
    search_date = '>=' + lmdutils.get_date_str(buildid)
    buildid = utils.get_buildid(buildid)
    logger.info('Get changeset for {}-{}-{}.'.format(buildid, product,
                                                     channel))

    def handler(json, data):
        pat = re.compile(r'^.*:([0-9a-f]+)$')
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            for tf in facets['facets']['topmost_filenames']:
                m = pat.match(tf['term'])
                if m:
                    chgset = m.group(1)
                    count = tf['count']
                    data[chgset] += count

    params = {
        'product': product,
        'release_channel': channel,
        'build_id': buildid,
        'date': search_date,
        'topmost_filenames': '@\"hg:hg.mozilla.org/\".*:[0-9a-f]+',
        '_aggs.build_id': 'topmost_filenames',
        '_results_number': 0,
        '_facets': 'product',
        '_facets_size': 100
    }

    data = defaultdict(lambda: 0)
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()
    chgset = None
    if data:
        chgset, _ = max(data.items(), key=lambda p: p[1])
        chgset = utils.short_rev(chgset)

    logger.info('Get changeset: finished.')

    return chgset
Exemplo n.º 28
0
    def get(product, channel, date):
        date = magutils.get_date(date)
        if date:
            cats = db.session.query(Categories).filter_by(product=product,
                                                          channel=channel,
                                                          date=date)
        else:
            cats = db.session.query(Categories).filter_by(product=product,
                                                          channel=channel)
        r = defaultdict(lambda: dict())
        for cat in cats:
            kind = cat.kind
            date = utils.get_date_str(cat.date)
            if kind == 'shutdownhang':
                r[date]['shutdownhang'] = cat.browser
            else:
                r[date][kind] = {
                    'content': cat.content,
                    'browser': cat.browser,
                    'plugin': cat.plugin
                }

        return dict(r)
Exemplo n.º 29
0
def get_params_for_link(query={}):
    today = utils.get_date_ymd('today')
    last = today - relativedelta(days=config.get_limit())
    last = utils.get_date_str(last)
    search_date = ['>=' + last]
    params = {
        'product':
        '',
        'date':
        search_date,
        'release_channel':
        '',
        'version':
        '',
        'signature':
        '',
        '_facets': [
            'url', 'user_comments', 'install_time', 'version', 'address',
            'moz_crash_reason', 'reason', 'build_id',
            'platform_pretty_version', 'signature', 'useragent_locale'
        ]
    }
    params.update(query)
    return params
Exemplo n.º 30
0
def generate_bug_report(sgn, info, status_flags_by_channel, base_versions, start_date_by_channel, end_date, check_for_fx=True):
    data = {}
    if not check_for_fx or info['firefox']:
        volumes = default_volumes.copy()
        data = {}
        for channel, volume in info['affected']:
            data[status_flags_by_channel[channel]] = 'affected'
            volumes[channel] = volume
        for channel, volume in info['leftovers']:
            volumes[channel] = volume

        # We begin with the crash volume
        comment = 'Crash volume for signature \'%s\':\n' % sgn
        table = []
        for chan, volume in sorted(volumes.items(), key=lambda k: channel_order[k[0]]):
            version = base_versions[chan]
            start_date = start_date_by_channel[chan]
            plural = 'es' if volume != 1 else ''
            table.append(['- %s' % chan,
                          '(version %d):' % version,
                          '%d crash%s from %s.' % (volume, plural, utils.get_date_str(start_date))])
        comment += __mk_volume_table(table, 'global')

        # Make the table for the trend
        table = []
        empty = False
        N = -1
        for chan, trend in sorted(info['trend'].items(), key=lambda k: channel_order[k[0]]):
            if len(trend) >= 1:
                # we remove data for this week
                del(trend[0])
            if len(trend) >= 8:  # keep only the last seven weeks
                trend = trend[:7]

            if not trend:
                empty = True
                break

            N = max(N, len(trend))
            row = [str(n) for n in trend]
            row.insert(0, '- %s' % chan)
            table.append(row)

        if not empty:  # we've trends
            monday, sunday = utils.get_monday_sunday(utils.get_date_ymd(end_date))
            comment += '\n\nCrash volume on the last weeks (Week N is from %s to %s):\n' % (monday.strftime('%m-%d'), sunday.strftime('%m-%d'))
            headers = ['']
            for w in range(1, N + 1):
                headers.append('W. N-%d' % w)
            comment += __mk_volume_table(table, 'byweek', headers=headers)

        # Add affected platforms
        platforms = info['platforms']
        if platforms:
            comment += '\n\nAffected platform'
            if len(platforms) >= 2:
                comment += 's'
                platforms = sorted(platforms, key=lambda k: platform_order[k])
            comment += ': ' + ', '.join(platforms)

        ranks = info['rank']
        if ranks:
            # check if we've ranks
            empty = True
            for types in ranks.values():
                for v in types.values():
                    if v != -1:
                        empty = False
                        break

            if not empty:
                comment += '\n\nCrash rank on the last 7 days:\n'
                headers = ['', 'Browser', 'Content', 'Plugin']
                table = []

                def fmt_rank(s):
                    return None if s == -1 else '#' + str(s)

                for chan, types in sorted(ranks.items(), key=lambda k: channel_order[k[0]]):
                    table.append(['- %s' % chan,
                                 fmt_rank(types['browser']),
                                 fmt_rank(types['content']),
                                 fmt_rank(types['plugin'])])
                comment += __mk_volume_table(table, 'rank', headers=headers)

        data['comment'] = {'body': comment}

    return data
Exemplo n.º 31
0
def get_search_date(search_start_date, start_date, end_date=utils.get_date('today')):
    if search_start_date:
        return socorro.SuperSearch.get_search_date(search_start_date, end_date)
    else:
        return socorro.SuperSearch.get_search_date(utils.get_date_str(start_date), end_date)
Exemplo n.º 32
0
def get(channel, date, versions=None, product='Firefox', duration=1):
    """Get stability info

    Args:
        channel (str): the channel
        date (str): the final date
        versions (Optional[List[str]]): the versions to treat
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data

    Returns:
        dict: contains all the info relative to stability
    """
    channel = channel.lower()
    cycle = duration <= 0
    versions_info = socorro.ProductVersions.get_version_info(versions, channel=channel, product=product)

    versions = versions_info.keys()
    throttle = set(map(lambda p: p[1], versions_info.values()))
    diff_throttle = len(throttle) != 1
    # normally the throttle is 10% for release and 100% for others channel
    if not diff_throttle:
        throttle = throttle.pop()

    platforms = socorro.Platforms.get_cached_all()

    end_date_dt = utils.get_date_ymd(date)
    if cycle:
        # we get all the start date for each versions and get the min
        start_date_dt = min(map(lambda p: utils.get_date_ymd(p[0]), versions_info.values()))
        duration = (end_date_dt - start_date_dt).days + 1
    else:
        start_date_dt = end_date_dt - timedelta(duration - 1)

    start_date_str = utils.get_date_str(start_date_dt)
    end_date_str = utils.get_date_str(end_date_dt)

    # First, we get the ADI
    adi = socorro.ADI.get(version=versions, product=product, end_date=end_date_str, duration=duration, platforms=platforms)
    adi = [adi[key] for key in sorted(adi.keys(), reverse=False)]

    # Get the khours
    khours = Redash.get_khours(start_date_dt, end_date_dt, channel, versions, product)
    khours = [khours[key] for key in sorted(khours.keys(), reverse=False)]

    # Get the # of crashes (crash pings)
    crash_pings = Redash.get_number_of_crash(start_date_dt, end_date_dt, channel, versions, product)

    crashes = {}
    stats = {'m+c': 0.,
             'main': 0.,
             'content': 0.,
             'plugin': 0.,
             'all': 0.}
    for i in range(duration):
        d = end_date_dt - timedelta(i)
        crashes[d] = {}
        crashes[d]['socorro'] = {'global': stats.copy(), 'startup': stats.copy()}
        crashes[d]['telemetry'] = crash_pings[d]

    base = {'product': product,
            'version': None,
            'date': socorro.SuperSearch.get_search_date(start_date_str, end_date_str),
            'release_channel': channel,
            '_results_number': 1,
            '_histogram.date': ['product', 'process_type'],
            '_facets_size': 3}

    if diff_throttle:
        # in this case each version could have a different throttle so we need to compute stats for each version
        queries = []
        for v, t in versions_info.items():
            cparams = base.copy()
            cparams['version'] = v
            queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes))
            cparams = copy.deepcopy(cparams)
            cparams['uptime'] = '<60'
            cparams['_histogram.date'].append('uptime')
            queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes))
    else:
        base['version'] = versions
        queries = []
        queries.append(Query(socorro.SuperSearch.URL, base, functools.partial(__crash_handler, throttle), crashes))
        cparams = copy.deepcopy(base)
        cparams['uptime'] = '<60'
        cparams['_histogram.date'].append('uptime')
        queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, throttle), crashes))

    socorro.SuperSearch(queries=queries).wait()
    crashes = [crashes[key] for key in sorted(crashes.keys(), reverse=False)]

    # Now we compute the rates and the averages
    stats = {'m+c': [0., 0., 0., 0.],
             'main': [0., 0., 0., 0.],
             'content': [0., 0., 0., 0.],
             'plugin': [0., 0., 0., 0.],
             'all': [0., 0., 0., 0.]}
    averages = {}
    averages['socorro'] = {'global': stats, 'startup': copy.deepcopy(stats)}
    averages['telemetry'] = copy.deepcopy(stats)
    N = len(adi)

    # sum
    for i in range(N):
        crash_soc = crashes[i]['socorro']
        for k1, v1 in averages['socorro'].items():
            for k2, av in v1.items():
                c = crash_soc[k1][k2]
                # the rate is computed for 100 adi
                x = utils.rate(100. * c, adi[i])
                av[0] += x
                av[1] += x ** 2
                y = utils.rate(c, khours[i])
                av[2] += y
                av[3] += y ** 2
                crash_soc[k1][k2] = (c, x, y)
        crash_tel = crashes[i]['telemetry']
        for k1, av in averages['telemetry'].items():
            c = crash_tel[k1]
            # the rate is computed for 100 adi
            x = utils.rate(100. * c, adi[i])
            av[0] += x
            av[1] += x ** 2
            y = utils.rate(c, khours[i])
            av[2] += y
            av[3] += y ** 2
            crash_tel[k1] = (c, x, y)

    N = float(N)
    averages_old = {'socorro': {}, 'telemetry': {}}
    averages_new = copy.deepcopy(averages_old)

    # mean & standard deviation
    av_new_soc = averages_new['socorro']
    av_old_soc = averages_old['socorro']
    for k1, v1 in averages['socorro'].items():
        d1 = {}
        av_old_soc[k1] = d1
        d2 = {}
        av_new_soc[k1] = d2
        for k2, av in v1.items():
            m = av[0] / N
            d1[k2] = (m, math.sqrt(av[1] / N - m ** 2))
            m = av[2] / N
            d2[k2] = (m, math.sqrt(av[3] / N - m ** 2))

    av_new_tel = averages_new['telemetry']
    av_old_tel = averages_old['telemetry']
    for k1, av in averages['telemetry'].items():
        m = av[0] / N
        av_old_tel[k1] = (m, math.sqrt(av[1] / N - m ** 2))
        m = av[2] / N
        av_new_tel[k1] = (m, math.sqrt(av[3] / N - m ** 2))

    return {'start_date': start_date_str,
            'end_date': end_date_str,
            'versions': versions,
            'adi': adi,
            'khours': khours,
            'crashes': crashes,
            'averages_old': averages_old,
            'averages_new': averages_new}
Exemplo n.º 33
0
def monitor(emails=[],
            date='yesterday',
            path='',
            data=None,
            verbose=False,
            writejson=False):
    if not data:
        try:
            with open(path, 'r') as In:
                data = json.load(In)
        except IOError:
            data = {p: {c: {} for c in channels} for p in products}

    searches = []
    start_date = utils.get_date_ymd(date)
    end_date = start_date + datetime.timedelta(days=1)
    search_date = socorro.SuperSearch.get_search_date(start_date, end_date)
    all_versions = {}
    versions_pc = defaultdict(lambda: defaultdict(lambda: []))

    def handler_ss(date, product, json, data):
        if not json['errors']:
            for info in json['facets']['release_channel']:
                chan = info['term']
                total = info['count']
                if product == 'FennecAndroid':
                    d = {'total': total}
                    if chan in data:
                        data[chan][date] = d
                    else:
                        data[chan] = {date: d}
                else:
                    throttle = 10 if chan == 'release' else 1
                    total *= throttle
                    d = {
                        'browser': 0,
                        'plugin': 0,
                        'content': 0,
                        'total': total
                    }
                    for pt in info['facets']['process_type']:
                        term = pt['term']
                        count = pt['count']
                        if term in d:  # term can be 'gpu' (very rare)
                            d[term] += count * throttle
                    d['browser'] = total - (d['plugin'] + d['content'])
                    if chan in data:
                        data[chan][date] = d
                    else:
                        data[chan] = {date: d}

    if date == 'today':
        dates = ['yesterday', 'today']
    else:
        dates = [date]

    delay_by_channel = {
        'release':
        config.get('MonitorStartupCrashes', 'delay_release', 12, type=int),
        'beta':
        config.get('MonitorStartupCrashes', 'delay_beta', 4, type=int),
        'aurora':
        config.get('MonitorStartupCrashes', 'delay_aurora', 9, type=int),
        'nightly':
        config.get('MonitorStartupCrashes', 'delay_nightly', 9, type=int)
    }

    for data_date in dates:
        data_date = utils.get_date_ymd(data_date)
        next_data_date = data_date + datetime.timedelta(days=1)
        search_data_date = socorro.SuperSearch.get_search_date(
            data_date, next_data_date)
        for product in products:
            versions = socorro.ProductVersions.get_all_versions(product)
            all_versions[product] = []
            for chan in channels:
                info = versions[chan]
                last_ver_major = max(info.keys())
                _start_date = data_date - datetime.timedelta(
                    weeks=delay_by_channel[chan])
                for major in range(last_ver_major, last_ver_major - 4, -1):
                    for v, d in info[major]['versions'].items():
                        if not v.endswith(
                                'b') and _start_date <= d <= data_date:
                            all_versions[product].append(v)
                            versions_pc[product][chan].append(v)
            searches.append(
                socorro.SuperSearch(params={
                    'product': product,
                    'date': search_data_date,
                    'release_channel': channels,
                    'version': all_versions[product],
                    'uptime': '<60',
                    '_results_number': 0,
                    '_facets_size': 100,
                    '_aggs.release_channel': 'process_type'
                },
                                    handler=functools.partial(
                                        handler_ss,
                                        utils.get_date_str(data_date),
                                        product),
                                    handlerdata=data[product]))

    for s in searches:
        s.wait()

    if writejson and path:
        with open(path, 'w') as Out:
            json.dump(data, Out, sort_keys=True)

    new_start_date = start_date - datetime.timedelta(days=1)
    new_search_date = socorro.SuperSearch.get_search_date(
        new_start_date, end_date)

    def handler_ss_spikers(json, data):
        if not json['errors']:
            for facets in json['facets']['histogram_date']:
                date = utils.get_date_ymd(facets['term'])
                s = facets['facets']['signature']
                d = {}
                data[date] = d
                for signature in s:
                    count = signature['count']
                    sgn = signature['term']
                    d[sgn] = count

    spikers_info = defaultdict(lambda: defaultdict(lambda: dict()))

    searches = []
    for product, i1 in data.items():
        for chan, i2 in i1.items():
            _data = [
                float(i[1]['total'])
                for i in sorted(i2.items(),
                                key=lambda p: utils.get_date_ymd(p[0]))
                if utils.get_date_ymd(i[0]) <= start_date
            ]
            # print(product, chan)
            issp = spikeanalysis.is_spiking_ma(_data,
                                               alpha=2.5,
                                               win=7,
                                               method='mean',
                                               plot=False) == 'up'
            # spikeanalysis.get_spikes_ma(_data, alpha=2.5, win=7, method='mean', plot=True)
            if issp:
                # spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=True)
                searches.append(
                    socorro.SuperSearch(
                        params={
                            'product': product,
                            'date': new_search_date,
                            'release_channel': chan,
                            'version': all_versions[product],
                            'uptime': '<60',
                            '_results_number': 0,
                            '_histogram.date': 'signature',
                            '_facets_size': 100
                        },
                        handler=handler_ss_spikers,
                        handlerdata=spikers_info[product][chan]))

    for s in searches:
        s.wait()

    if spikers_info:
        # So we've some spikes... need to send an email with all the info
        searches = []
        interesting_sgns = get_most_signifiant_increases(spikers_info)
        bugs_by_signature = get_bugs(interesting_sgns)
        affected_chans = set()
        crash_data = {p: {} for p in spikers_info.keys()}
        spikes_number = 0

        def handler_global(product, json, data):
            if not json['errors']:
                for info in json['facets']['release_channel']:
                    chan = info['term']
                    throttle = 10 if product == 'FennecAndroid' and chan == 'release' else 1
                    total = info['count'] * throttle
                    data[chan] = total

        for p, i1 in spikers_info.items():
            searches.append(
                socorro.SuperSearch(params={
                    'product': p,
                    'date': search_date,
                    'release_channel': list(i1.keys()),
                    'version': all_versions[p],
                    '_results_number': 0,
                    '_facets_size': 5,
                    '_aggs.release_channel': 'signature'
                },
                                    handler=functools.partial(
                                        handler_global, p),
                                    handlerdata=crash_data[p]))

            for c in i1.keys():
                spikes_number += 1
                affected_chans.add(c)
                url = socorro.SuperSearch.get_link({
                    'product': p,
                    'date': search_date,
                    'release_channel': c,
                    'version': versions_pc[p][c],
                    'uptime': '<60'
                })
                sgns_chan = interesting_sgns[p]
                sgns_stats = [(s, bugs_by_signature[s], t[2], t[1], t[3])
                              for s, t in sorted(sgns_chan[c].items(),
                                                 key=lambda p: p[1][0],
                                                 reverse=True)]
                sgns_chan[c] = (url, sgns_stats)

        for s in searches:
            s.wait()

        env = Environment(loader=FileSystemLoader('templates'))
        env.filters['inflect'] = inflect
        template = env.get_template('startup_crashes_email')
        _is = OrderedDict()
        for product in sorted(interesting_sgns.keys()):
            _is[product] = OrderedDict()
            for chan in sorted(interesting_sgns[product].keys()):
                _is[product][chan] = interesting_sgns[product][chan]
        interesting_sgns = _is

        body = template.render(
            spikes_number=spikes_number,
            spikes_number_word=inflect.engine().number_to_words(spikes_number),
            crash_data=crash_data,
            start_date=utils.get_date_str(new_start_date),
            end_date=utils.get_date_str(start_date),
            interesting_sgns=interesting_sgns)
        title = 'Spikes in startup crashes in %s' % ', '.join(affected_chans)

        if emails:
            gmail.send(emails, title, body, html=True)
        if verbose:
            print('Title: %s' % title)
            print('Body:')
            print(body)

        return {'title': title, 'body': body}

    return None
Exemplo n.º 34
0
 def test_get_date_str(self):
     date = '1991-04-16'
     self.assertEqual(
         utils.get_date_str(datetime.datetime.strptime(date, '%Y-%m-%d')),
         date)
Exemplo n.º 35
0
def reformat_data(data):
    _data = {}
    for k, v in data.items():
        _data[utils.get_date_str(k)] = v
    return _data
Exemplo n.º 36
0
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5):
    psttz = pytz.timezone('US/Pacific')
    end_date = utils.get_date_ymd(date)  # 2016-10-18 UTC
    end_date_moz = psttz.localize(datetime(end_date.year, end_date.month, end_date.day))  # 2016-10-18 PST
    end_buildid = utils.get_buildid_from_date(end_date_moz)  # < 20161018000000
    start_date_moz = end_date_moz - timedelta(days=max_days + 1)  # 2016-10-14 PST (max_days == 3)
    start_buildid = utils.get_buildid_from_date(start_date_moz)  # >= 20161014000000
    search_buildid = ['>=' + start_buildid, '<' + end_buildid]
    start_date = utils.as_utc(start_date_moz)  # 2016-10-14 07:00:00 UTC
    search_date = '>=' + utils.get_date_str(start_date)
    data = defaultdict(lambda: defaultdict(lambda: 0))
    buildids = {}

    def handler(json, data):
        if not json['errors']:
            for facets in json['facets']['build_id']:
                date = utils.get_date_from_buildid(facets['term']).astimezone(psttz)
                buildids[date] = facets['count']
                for s in facets['facets']['signature']:
                    sgn = s['term']
                    count = s['count']
                    data[sgn][date] += count

    socorro.SuperSearch(params={'product': product,
                                'date': search_date,
                                'build_id': search_buildid,
                                'release_channel': channel,
                                '_aggs.build_id': 'signature',
                                '_facets_size': limit,
                                '_results_number': 0},
                        handler=handler, handlerdata=data).wait()

    _data = {}
    base = {start_date_moz + timedelta(days=i): {'buildids': {}, 'total': 0} for i in range(max_days + 1)}  # from 2016-10-14 to 2016-10-17 PST

    for sgn, info in data.items():
        d = copy.deepcopy(base)
        _data[sgn] = d
        for bid, count in info.items():
            date = psttz.localize(datetime(bid.year, bid.month, bid.day))
            d[date]['buildids'][bid] = count
            d[date]['total'] += count
    data = _data

    spiking_signatures = []
    for sgn, info in data.items():
        stats2 = [i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])]
        if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold:
            spiking_signatures.append(sgn)

    data = None
    if spiking_signatures:
        # sort the signatures to be sure to always have the same order for the test
        spiking_signatures = sorted(spiking_signatures)

        start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1))
        search_buildid = ['>=' + start_buildid, '<' + end_buildid]
        queries = []
        data = defaultdict(lambda: list())

        def handler(json, data):
            if not json['errors']:
                for facets in json['facets']['proto_signature']:
                    proto = facets['term']
                    count = facets['count']
                    facets = facets['facets']
                    sgn = facets['signature'][0]['term']
                    first_uuid = facets['uuid'][0]['term']
                    uuids = {i['term'] for i in facets['uuid']}
                    if cache:
                        i = uuids.intersection(cache['uuids'])
                        uuid = i.pop() if i else first_uuid
                    else:
                        uuid = first_uuid
                    data[sgn].append({'proto': proto, 'uuid': uuid, 'count': count})

        for sgns in Connection.chunks(spiking_signatures, 5):
            queries.append(Query(socorro.SuperSearch.URL,
                                 {'product': product,
                                  'date': search_date,
                                  'build_id': search_buildid,
                                  'signature': ['=' + s for s in sgns],
                                  'release_channel': channel,
                                  '_aggs.proto_signature': ['uuid', 'signature'],
                                  '_facets_size': 10000,
                                  '_results_number': 0},
                                 handler=handler, handlerdata=data))

        socorro.SuperSearch(queries=queries).wait()

    return data
Exemplo n.º 37
0
def get(channel,
        date,
        product='Firefox',
        duration=11,
        tc_limit=50,
        crash_type='all',
        startup=False):
    """Get crashes info

    Args:
        channel (str): the channel
        date (str): the final date
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data
        tc_limit (Optional[int]): the number of topcrashes to load
        crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin'

    Returns:
        dict: contains all the info relative to the crashes
    """
    channel = channel.lower()
    version = v[channel]
    sys.stdout.write('Getting version information from Socorro...')
    sys.stdout.flush()
    versions = versions_util.get_channel_versions(channel, product)
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    if crash_type and isinstance(crash_type, six.string_types):
        crash_type = [crash_type]

    _date = utils.get_date_ymd(date)
    start_date = utils.get_date_str(_date - timedelta(duration - 1))
    end_date = utils.get_date_str(_date)

    signatures = {}

    def signature_handler(json):
        for signature in json['facets']['signature']:
            signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0]

            for platform in signature['facets']['platform']:
                if platform['term'] == 'Linux':
                    signatures[signature['term']][3] = platform['count']
                elif platform['term'] == 'Windows NT':
                    signatures[signature['term']][1] = platform['count']
                elif platform['term'] == 'Mac OS X':
                    signatures[signature['term']][2] = platform['count']

            for startup_crash in signature['facets']['startup_crash']:
                if startup_crash['term'] in ['1', 'T']:
                    signatures[signature['term']][4] += startup_crash['count']

            signatures[signature['term']][5] = signature['facets'][
                'cardinality_install_time']['value']

    params = {
        'product':
        product,
        'version':
        versions,
        'date':
        socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel':
        channel,
        '_aggs.signature':
        ['platform', '_cardinality.install_time', 'startup_crash'],
        '_results_number':
        0,
        '_facets_size':
        tc_limit,
    }

    if startup:
        params['startup_crash'] = True

    sys.stdout.write('Getting top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(params=params, handler=signature_handler).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    bug_flags = [
        'resolution', 'id', 'last_change_time',
        'cf_tracking_firefox' + str(version)
    ]
    for i in range(int(version), int(v['nightly']) + 1):
        bug_flags.append('cf_status_firefox' + str(i))

    # TODO: too many requests... should be improved with chunks
    bugs = {}
    # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed.
    base = {
        'j_top': 'OR',
        'o1': 'substring',
        'f1': 'cf_crash_signature',
        'v1': None,
        'o2': 'substring',
        'f2': 'cf_crash_signature',
        'v2': None,
        'o3': 'substring',
        'f3': 'cf_crash_signature',
        'v3': None,
        'o4': 'substring',
        'f4': 'cf_crash_signature',
        'v4': None,
        'include_fields': bug_flags
    }

    queries = []
    for sgn in signatures.keys():
        cparams = base.copy()
        cparams['v1'] = '[@' + sgn + ']'
        cparams['v2'] = '[@ ' + sgn + ' ]'
        cparams['v3'] = '[@ ' + sgn + ']'
        cparams['v4'] = '[@' + sgn + ' ]'
        bugs[sgn] = []
        queries.append(
            Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn]))
    res_bugs = Bugzilla(queries=queries)

    # we have stats by signature in self.signatures
    # for each signature get the number of crashes on the last X days
    # so get the signature trend
    trends = {}
    default_trend = {}
    for i in range(duration):
        default_trend[_date - timedelta(i)] = 0

    base = {
        'product': product,
        'version': versions,
        'signature': None,
        'date': socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel': channel,
        '_results_number': 0,
        '_histogram.date': ['signature'],
        '_histogram_interval': 1
    }

    queries = []
    for sgns in Connection.chunks(
            list(map(lambda sgn: '=' + sgn, signatures.keys())), 10):
        sgn_group = []
        for sgn in sgns:
            if sum(len(s) for s in sgn_group) >= 1000:
                cparams = base.copy()
                cparams['signature'] = sgn_group
                queries.append(
                    Query(socorro.SuperSearch.URL, cparams,
                          functools.partial(__trend_handler, default_trend),
                          trends))
                sgn_group = []

            sgn_group.append(sgn)

        if len(sgn_group) > 0:
            cparams = base.copy()
            cparams['signature'] = sgn_group
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__trend_handler, default_trend),
                      trends))

    sys.stdout.write('Getting trends for top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, trend in trends.items():
        signatures[sgn] = (signatures[sgn], [
            trend[key] for key in sorted(trend.keys(), reverse=True)
        ])

    _signatures = {}
    # order self.signatures by crash count
    sorted_signatures = sorted(signatures.items(),
                               key=lambda x: x[1][0][0],
                               reverse=True)
    i = 1
    for s in sorted_signatures:
        _signatures[s[0]] = i  # top crash rank
        i += 1

    sys.stdout.write(
        'Getting bugs linked to the top signatures from Bugzilla...')
    sys.stdout.flush()
    while True:
        try:
            for r in res_bugs.results:
                r.result(timeout=2)
            break
        except TimeoutError:
            sys.stdout.write('.')
            sys.stdout.flush()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query
    #       in follow_dup (so modify follow_dup to accept both a bug ID or a bug object).
    queries = []
    for sgn in signatures.keys():
        duplicate_ids = [
            bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE'
        ]

        # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature.
        bugs[sgn] = [
            bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids
        ]

        # Find duplicates for bugs resolved as DUPLICATE.
        duplicates = {
            k: v
            for k, v in Bugzilla.follow_dup(duplicate_ids).items()
            if v is not None
        }
        duplicate_targets = [
            bug_id for bug_id in duplicates.values()
            if int(bug_id) not in [bug['id'] for bug in bugs[sgn]]
        ]
        if len(duplicate_targets) == 0:
            continue

        # Get info about bugs that the DUPLICATE bugs have been duped to.
        params = {
            'id': ','.join(duplicate_targets),
            'include_fields': bug_flags,
        }
        queries.append(
            Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn]))
    sys.stdout.write(
        'Resolving duplicate bugs to the bugs they\'ve been duplicated to...')
    sys.stdout.flush()
    Bugzilla(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, stats in signatures.items():
        # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend)
        startup_percent = float(stats[0][4]) / float(stats[0][0])
        _signatures[sgn] = {
            'tc_rank': _signatures[sgn],
            'crash_count': stats[0][0],
            'estimated_user_count': stats[0][5],
            'startup_percent': startup_percent,
            'bugs': bugs[sgn]
        }

    return {
        'start_date': start_date,
        'end_date': end_date,
        'versions': list(versions),
        'signatures': _signatures,
    }
Exemplo n.º 38
0
def monitor(emails=[], date='yesterday', path='', data=None, verbose=False, writejson=False):
    if not data:
        try:
            with open(path, 'r') as In:
                data = json.load(In)
        except IOError:
            data = {p: {c: {} for c in channels} for p in products}

    searches = []
    start_date = utils.get_date_ymd(date)
    end_date = start_date + datetime.timedelta(days=1)
    search_date = socorro.SuperSearch.get_search_date(start_date, end_date)
    all_versions = {}
    versions_pc = defaultdict(lambda: defaultdict(lambda: []))

    def handler_ss(date, product, json, data):
        if not json['errors']:
            for info in json['facets']['release_channel']:
                chan = info['term']
                total = info['count']
                if product == 'FennecAndroid':
                    d = {'total': total}
                    if chan in data:
                        data[chan][date] = d
                    else:
                        data[chan] = {date: d}
                else:
                    throttle = 10 if chan == 'release' else 1
                    total *= throttle
                    d = {'browser': 0, 'plugin': 0, 'content': 0, 'total': total}
                    for pt in info['facets']['process_type']:
                        term = pt['term']
                        count = pt['count']
                        if term in d:  # term can be 'gpu' (very rare)
                            d[term] += count * throttle
                    d['browser'] = total - (d['plugin'] + d['content'])
                    if chan in data:
                        data[chan][date] = d
                    else:
                        data[chan] = {date: d}

    if date == 'today':
        dates = ['yesterday', 'today']
    else:
        dates = [date]

    delay_by_channel = {'release': config.get('MonitorStartupCrashes', 'delay_release', 12, type=int),
                        'beta': config.get('MonitorStartupCrashes', 'delay_beta', 4, type=int),
                        'aurora': config.get('MonitorStartupCrashes', 'delay_aurora', 9, type=int),
                        'nightly': config.get('MonitorStartupCrashes', 'delay_nightly', 9, type=int)}

    for data_date in dates:
        data_date = utils.get_date_ymd(data_date)
        next_data_date = data_date + datetime.timedelta(days=1)
        search_data_date = socorro.SuperSearch.get_search_date(data_date, next_data_date)
        for product in products:
            versions = socorro.ProductVersions.get_all_versions(product)
            all_versions[product] = []
            for chan in channels:
                info = versions[chan]
                last_ver_major = max(info.keys())
                _start_date = data_date - datetime.timedelta(weeks=delay_by_channel[chan])
                for major in range(last_ver_major, last_ver_major - 4, -1):
                    for v, d in info[major]['versions'].items():
                        if not v.endswith('b') and _start_date <= d <= data_date:
                            all_versions[product].append(v)
                            versions_pc[product][chan].append(v)
            searches.append(socorro.SuperSearch(params={'product': product,
                                                        'date': search_data_date,
                                                        'release_channel': channels,
                                                        'version': all_versions[product],
                                                        'uptime': '<60',
                                                        '_results_number': 0,
                                                        '_facets_size': 100,
                                                        '_aggs.release_channel': 'process_type'},
                                                handler=functools.partial(handler_ss, utils.get_date_str(data_date), product), handlerdata=data[product]))

    for s in searches:
        s.wait()

    if writejson and path:
        with open(path, 'w') as Out:
            json.dump(data, Out, sort_keys=True)

    new_start_date = start_date - datetime.timedelta(days=1)
    new_search_date = socorro.SuperSearch.get_search_date(new_start_date, end_date)

    def handler_ss_spikers(json, data):
        if not json['errors']:
            for facets in json['facets']['histogram_date']:
                date = utils.get_date_ymd(facets['term'])
                s = facets['facets']['signature']
                d = {}
                data[date] = d
                for signature in s:
                    count = signature['count']
                    sgn = signature['term']
                    d[sgn] = count

    spikers_info = defaultdict(lambda: defaultdict(lambda: dict()))

    searches = []
    for product, i1 in data.items():
        for chan, i2 in i1.items():
            _data = [float(i[1]['total']) for i in sorted(i2.items(), key=lambda p: utils.get_date_ymd(p[0])) if utils.get_date_ymd(i[0]) <= start_date]
            # print(product, chan)
            issp = spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=False) == 'up'
            # spikeanalysis.get_spikes_ma(_data, alpha=2.5, win=7, method='mean', plot=True)
            if issp:
                # spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=True)
                searches.append(socorro.SuperSearch(params={'product': product,
                                                            'date': new_search_date,
                                                            'release_channel': chan,
                                                            'version': all_versions[product],
                                                            'uptime': '<60',
                                                            '_results_number': 0,
                                                            '_histogram.date': 'signature',
                                                            '_facets_size': 100},
                                                    handler=handler_ss_spikers, handlerdata=spikers_info[product][chan]))

    for s in searches:
        s.wait()

    if spikers_info:
        # So we've some spikes... need to send an email with all the info
        searches = []
        interesting_sgns = get_most_signifiant_increases(spikers_info)
        bugs_by_signature = get_bugs(interesting_sgns)
        affected_chans = set()
        crash_data = {p: {} for p in spikers_info.keys()}
        spikes_number = 0

        def handler_global(product, json, data):
            if not json['errors']:
                for info in json['facets']['release_channel']:
                    chan = info['term']
                    throttle = 10 if product == 'FennecAndroid' and chan == 'release' else 1
                    total = info['count'] * throttle
                    data[chan] = total

        for p, i1 in spikers_info.items():
            searches.append(socorro.SuperSearch(params={'product': p,
                                                        'date': search_date,
                                                        'release_channel': list(i1.keys()),
                                                        'version': all_versions[p],
                                                        '_results_number': 0,
                                                        '_facets_size': 5,
                                                        '_aggs.release_channel': 'signature'},
                                                handler=functools.partial(handler_global, p), handlerdata=crash_data[p]))

            for c in i1.keys():
                spikes_number += 1
                affected_chans.add(c)
                url = socorro.SuperSearch.get_link({'product': p,
                                                    'date': search_date,
                                                    'release_channel': c,
                                                    'version': versions_pc[p][c],
                                                    'uptime': '<60'})
                sgns_chan = interesting_sgns[p]
                sgns_stats = [(s, bugs_by_signature[s], t[2], t[1], t[3]) for s, t in sorted(sgns_chan[c].items(), key=lambda p: p[1][0], reverse=True)]
                sgns_chan[c] = (url, sgns_stats)

        for s in searches:
            s.wait()

        env = Environment(loader=FileSystemLoader('templates'))
        env.filters['inflect'] = inflect
        template = env.get_template('startup_crashes_email')
        _is = OrderedDict()
        for product in sorted(interesting_sgns.keys()):
            _is[product] = OrderedDict()
            for chan in sorted(interesting_sgns[product].keys()):
                _is[product][chan] = interesting_sgns[product][chan]
        interesting_sgns = _is

        body = template.render(spikes_number=spikes_number,
                               spikes_number_word=inflect.engine().number_to_words(spikes_number),
                               crash_data=crash_data,
                               start_date=utils.get_date_str(new_start_date),
                               end_date=utils.get_date_str(start_date),
                               interesting_sgns=interesting_sgns)
        title = 'Spikes in startup crashes in %s' % ', '.join(affected_chans)

        if emails:
            gmail.send(emails, title, body, html=True)
        if verbose:
            print('Title: %s' % title)
            print('Body:')
            print(body)

        return {'title': title, 'body': body}

    return None
def get(channel, date, versions=None, product='Firefox', duration=1):
    """Get stability info

    Args:
        channel (str): the channel
        date (str): the final date
        versions (Optional[List[str]]): the versions to treat
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data

    Returns:
        dict: contains all the info relative to stability
    """
    channel = channel.lower()
    cycle = duration <= 0
    versions_info = socorro.ProductVersions.get_version_info(versions,
                                                             channel=channel,
                                                             product=product)

    versions = versions_info.keys()
    throttle = set(map(lambda p: p[1], versions_info.values()))
    diff_throttle = len(throttle) != 1
    # normally the throttle is 10% for release and 100% for others channel
    if not diff_throttle:
        throttle = throttle.pop()

    platforms = socorro.Platforms.get_cached_all()

    end_date_dt = utils.get_date_ymd(date)
    if cycle:
        # we get all the start date for each versions and get the min
        start_date_dt = min(
            map(lambda p: utils.get_date_ymd(p[0]), versions_info.values()))
        duration = (end_date_dt - start_date_dt).days + 1
    else:
        start_date_dt = end_date_dt - timedelta(duration - 1)

    start_date_str = utils.get_date_str(start_date_dt)
    end_date_str = utils.get_date_str(end_date_dt)

    # First, we get the ADI
    adi = socorro.ADI.get(version=versions,
                          product=product,
                          end_date=end_date_str,
                          duration=duration,
                          platforms=platforms)
    adi = [adi[key] for key in sorted(adi.keys(), reverse=False)]

    # Get the khours
    khours = Redash.get_khours(start_date_dt, end_date_dt, channel, versions,
                               product)
    khours = [khours[key] for key in sorted(khours.keys(), reverse=False)]

    # Get the # of crashes (crash pings)
    crash_pings = Redash.get_number_of_crash(start_date_dt, end_date_dt,
                                             channel, versions, product)

    crashes = {}
    stats = {'m+c': 0., 'main': 0., 'content': 0., 'plugin': 0., 'all': 0.}
    for i in range(duration):
        d = end_date_dt - timedelta(i)
        crashes[d] = {}
        crashes[d]['socorro'] = {
            'global': stats.copy(),
            'startup': stats.copy()
        }
        crashes[d]['telemetry'] = crash_pings[d]

    base = {
        'product': product,
        'version': None,
        'date': socorro.SuperSearch.get_search_date(start_date_str,
                                                    end_date_str),
        'release_channel': channel,
        '_results_number': 1,
        '_histogram.date': ['product', 'process_type'],
        '_facets_size': 3
    }

    if diff_throttle:
        # in this case each version could have a different throttle so we need to compute stats for each version
        queries = []
        for v, t in versions_info.items():
            cparams = base.copy()
            cparams['version'] = v
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__crash_handler, t[1]), crashes))
            cparams = copy.deepcopy(cparams)
            cparams['uptime'] = '<60'
            cparams['_histogram.date'].append('uptime')
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__crash_handler, t[1]), crashes))
    else:
        base['version'] = versions
        queries = []
        queries.append(
            Query(socorro.SuperSearch.URL, base,
                  functools.partial(__crash_handler, throttle), crashes))
        cparams = copy.deepcopy(base)
        cparams['uptime'] = '<60'
        cparams['_histogram.date'].append('uptime')
        queries.append(
            Query(socorro.SuperSearch.URL, cparams,
                  functools.partial(__crash_handler, throttle), crashes))

    socorro.SuperSearch(queries=queries).wait()
    crashes = [crashes[key] for key in sorted(crashes.keys(), reverse=False)]

    # Now we compute the rates and the averages
    stats = {
        'm+c': [0., 0., 0., 0.],
        'main': [0., 0., 0., 0.],
        'content': [0., 0., 0., 0.],
        'plugin': [0., 0., 0., 0.],
        'all': [0., 0., 0., 0.]
    }
    averages = {}
    averages['socorro'] = {'global': stats, 'startup': copy.deepcopy(stats)}
    averages['telemetry'] = copy.deepcopy(stats)
    N = len(adi)

    # sum
    for i in range(N):
        crash_soc = crashes[i]['socorro']
        for k1, v1 in averages['socorro'].items():
            for k2, av in v1.items():
                c = crash_soc[k1][k2]
                # the rate is computed for 100 adi
                x = utils.rate(100. * c, adi[i])
                av[0] += x
                av[1] += x**2
                y = utils.rate(c, khours[i])
                av[2] += y
                av[3] += y**2
                crash_soc[k1][k2] = (c, x, y)
        crash_tel = crashes[i]['telemetry']
        for k1, av in averages['telemetry'].items():
            c = crash_tel[k1]
            # the rate is computed for 100 adi
            x = utils.rate(100. * c, adi[i])
            av[0] += x
            av[1] += x**2
            y = utils.rate(c, khours[i])
            av[2] += y
            av[3] += y**2
            crash_tel[k1] = (c, x, y)

    N = float(N)
    averages_old = {'socorro': {}, 'telemetry': {}}
    averages_new = copy.deepcopy(averages_old)

    # mean & standard deviation
    av_new_soc = averages_new['socorro']
    av_old_soc = averages_old['socorro']
    for k1, v1 in averages['socorro'].items():
        d1 = {}
        av_old_soc[k1] = d1
        d2 = {}
        av_new_soc[k1] = d2
        for k2, av in v1.items():
            m = av[0] / N
            d1[k2] = (m, math.sqrt(av[1] / N - m**2))
            m = av[2] / N
            d2[k2] = (m, math.sqrt(av[3] / N - m**2))

    av_new_tel = averages_new['telemetry']
    av_old_tel = averages_old['telemetry']
    for k1, av in averages['telemetry'].items():
        m = av[0] / N
        av_old_tel[k1] = (m, math.sqrt(av[1] / N - m**2))
        m = av[2] / N
        av_new_tel[k1] = (m, math.sqrt(av[3] / N - m**2))

    return {
        'start_date': start_date_str,
        'end_date': end_date_str,
        'versions': versions,
        'adi': adi,
        'khours': khours,
        'crashes': crashes,
        'averages_old': averages_old,
        'averages_new': averages_new
    }
Exemplo n.º 40
0
 def get_last():
     lastdate = db.session.query(Lastdate.last)
     lastdate = utils.get_date_str(lastdate.first().last)
     return lastdate
def get_uuids_for_spiking_signatures(channel,
                                     cache=None,
                                     product='Firefox',
                                     date='today',
                                     limit=10000,
                                     max_days=3,
                                     threshold=5):
    psttz = pytz.timezone('US/Pacific')
    end_date = utils.get_date_ymd(date)  # 2016-10-18 UTC
    end_date_moz = psttz.localize(
        datetime(end_date.year, end_date.month,
                 end_date.day))  # 2016-10-18 PST
    end_buildid = utils.get_buildid_from_date(end_date_moz)  # < 20161018000000
    start_date_moz = end_date_moz - timedelta(
        days=max_days + 1)  # 2016-10-14 PST (max_days == 3)
    start_buildid = utils.get_buildid_from_date(
        start_date_moz)  # >= 20161014000000
    search_buildid = ['>=' + start_buildid, '<' + end_buildid]
    start_date = utils.as_utc(start_date_moz)  # 2016-10-14 07:00:00 UTC
    search_date = '>=' + utils.get_date_str(start_date)
    data = defaultdict(lambda: defaultdict(lambda: 0))
    buildids = {}

    def handler(json, data):
        if not json['errors']:
            for facets in json['facets']['build_id']:
                date = utils.get_date_from_buildid(
                    facets['term']).astimezone(psttz)
                buildids[date] = facets['count']
                for s in facets['facets']['signature']:
                    sgn = s['term']
                    count = s['count']
                    data[sgn][date] += count

    socorro.SuperSearch(params={
        'product': product,
        'date': search_date,
        'build_id': search_buildid,
        'release_channel': channel,
        '_aggs.build_id': 'signature',
        '_facets_size': limit,
        '_results_number': 0
    },
                        handler=handler,
                        handlerdata=data).wait()

    _data = {}
    base = {
        start_date_moz + timedelta(days=i): {
            'buildids': {},
            'total': 0
        }
        for i in range(max_days + 1)
    }  # from 2016-10-14 to 2016-10-17 PST

    for sgn, info in data.items():
        d = copy.deepcopy(base)
        _data[sgn] = d
        for bid, count in info.items():
            date = psttz.localize(datetime(bid.year, bid.month, bid.day))
            d[date]['buildids'][bid] = count
            d[date]['total'] += count
    data = _data

    spiking_signatures = []
    for sgn, info in data.items():
        stats2 = [
            i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])
        ]
        if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold:
            spiking_signatures.append(sgn)

    data = None
    if spiking_signatures:
        # sort the signatures to be sure to always have the same order for the test
        spiking_signatures = sorted(spiking_signatures)

        start_buildid = utils.get_buildid_from_date(end_date_moz -
                                                    timedelta(days=1))
        search_buildid = ['>=' + start_buildid, '<' + end_buildid]
        queries = []
        data = defaultdict(lambda: list())

        def handler(json, data):
            if not json['errors']:
                for facets in json['facets']['proto_signature']:
                    proto = facets['term']
                    count = facets['count']
                    facets = facets['facets']
                    sgn = facets['signature'][0]['term']
                    first_uuid = facets['uuid'][0]['term']
                    uuids = {i['term'] for i in facets['uuid']}
                    if cache:
                        i = uuids.intersection(cache['uuids'])
                        uuid = i.pop() if i else first_uuid
                    else:
                        uuid = first_uuid
                    data[sgn].append({
                        'proto': proto,
                        'uuid': uuid,
                        'count': count
                    })

        for sgns in Connection.chunks(spiking_signatures, 5):
            queries.append(
                Query(socorro.SuperSearch.URL, {
                    'product': product,
                    'date': search_date,
                    'build_id': search_buildid,
                    'signature': ['=' + s for s in sgns],
                    'release_channel': channel,
                    '_aggs.proto_signature': ['uuid', 'signature'],
                    '_facets_size': 10000,
                    '_results_number': 0
                },
                      handler=handler,
                      handlerdata=data))

        socorro.SuperSearch(queries=queries).wait()

    return data
Exemplo n.º 42
0
def get_correct_date(date):
    date = get_date(date)
    if date:
        return utils.get_date_str(date)
    return utils.get_date('today')