示例#1
0
def classifying_rate(fails, data, engine, classifier):
    """Builds and prints the classification rate.

    It's important to know how good a job we are doing, so this
    tool runs through all the failures we've got and builds the
    classification rate. For every failure in the gate queue did
    we find a match for it.
    """
    found_fails = {k: False for (k, v) in fails.iteritems()}

    for bugnum in data:
        bug = data[bugnum]
        for job in bug['failed_jobs']:
            found_fails[job] = True

    bad_jobs = collections.defaultdict(int)
    total_job_failures = collections.defaultdict(int)
    bad_job_urls = collections.defaultdict(list)
    count = 0
    total = 0
    for f in fails:
        total += 1
        build, job = f.split('.', 1)
        total_job_failures[job] += 1
        if found_fails[f] is True:
            count += 1
        else:
            bad_jobs[job] += 1
            bad_job_urls[job].append(fails[f])

    for job in bad_job_urls:
        # sort by timestamp.
        bad_job_urls[job] = sorted(bad_job_urls[job],
                                   key=lambda v: v['timestamp'], reverse=True)
        # Convert timestamp into string
        for url in bad_job_urls[job]:
            url['timestamp'] = url['timestamp'].strftime(
                "%Y-%m-%dT%H:%M")
            # setup crm114 query for build_uuid
            query = ('build_uuid: "%s" '
                     'AND error_pr:["-1000.0" TO "-10.0"] '
                     % url['build_uuid'])
            logstash_query = qb.encode_logstash_query(query)
            logstash_url = 'http://logstash.openstack.org/#%s' % logstash_query
            results = classifier.hits_by_query(query, size=1)
            if results:
                url['crm114'] = logstash_url

    classifying_rate = collections.defaultdict(int)
    classifying_rate['overall'] = "%.1f" % (
        (float(count) / float(total)) * 100.0)
    for job in bad_jobs:
        if bad_jobs[job] == 0 and total_job_failures[job] == 0:
            classifying_rate[job] = 0
        else:
            classifying_rate[job] = "%.1f" % (
                100.0 -
                (float(bad_jobs[job]) / float(total_job_failures[job]))
                * 100.0)
    sort = sorted(
        bad_jobs.iteritems(),
        key=operator.itemgetter(1),
        reverse=True)

    tvars = {
        "rate": classifying_rate,
        "count": count,
        "total": total,
        "uncounted": total - count,
        "jobs": sort,
        "total_job_failures": total_job_failures,
        "urls": bad_job_urls,
        "generated_at": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")
    }
    return engine.render(tvars)
示例#2
0
def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries', help='path to query file')
    parser.add_argument('-o', dest='output', help='output filename')
    parser.add_argument('-q',
                        dest='queue',
                        help='limit results to a specific query')
    parser.add_argument('-v',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    classifier = er.Classifier(args.queries)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    for query in classifier.queries:
        if args.queue:
            query['query'] = query['query'] + (' AND build_queue:"%s"' %
                                               args.queue)
        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_query=logstash_query,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[])
        buglist.append(bug)
        results = classifier.hits_by_query(query['query'],
                                           args.queue,
                                           size=3000)

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results, ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    out = open(args.output, 'w')
    out.write(json.dumps(jsondata))
    out.close()
示例#3
0
def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries',
                        help='path to query file')
    parser.add_argument('-o', dest='output',
                        help='output filename')
    parser.add_argument('-q', dest='queue',
                        help='limit results to a specific query')
    parser.add_argument('-v', dest='verbose',
                        action='store_true', default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    classifier = er.Classifier(args.queries)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    for query in classifier.queries:
        if args.queue:
            query['query'] = query['query'] + (' AND build_queue:"%s"' %
                                               args.queue)
        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_query=logstash_query,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[])
        buglist.append(bug)
        results = classifier.hits_by_query(query['query'],
                                           args.queue,
                                           size=3000)

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results,
                                      ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    out = open(args.output, 'w')
    out.write(json.dumps(jsondata))
    out.close()
示例#4
0
def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries',
                        help='path to query file')
    parser.add_argument('-o', dest='output',
                        help='output filename. Omit for stdout')
    parser.add_argument('-q', dest='queue',
                        help='limit results to a build queue regex')
    parser.add_argument('--es-query-suffix',
                        help='further limit results with an '
                             'elastic search query suffix. This will be ANDed '
                             'to all queries. '
                             'For example, to limit all queries to a '
                             'specific branch use: '
                             ' --es-query-suffix "build_branch:\\"stable/'
                             'liberty\\""')
    parser.add_argument('-c', '--conf', help="Elastic Recheck Configuration "
                        "file to use for data_source options such as "
                        "elastic search url, logstash url, and database "
                        "uri.")
    parser.add_argument('-v', dest='verbose',
                        action='store_true', default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    config = er_conf.Config(config_file=args.conf)

    classifier = er.Classifier(args.queries, config=config)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    # Get the cluster health for the header
    es = pyelasticsearch.ElasticSearch(config.es_url)
    jsondata['status'] = es.health()['status']

    for query in classifier.queries:
        if args.queue:
            query['query'] += ' AND build_queue:%s' % args.queue
        if args.es_query_suffix:
            query['query'] += ' AND (%s)' % args.es_query_suffix

        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        logstash_url = ("%s/#/dashboard/file/logstash.json?%s"
                        % (config.ls_url, logstash_query))
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_url=logstash_url,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[],
                   voting=(False if query.get('allow-nonvoting') else True))
        buglist.append(bug)
        try:
            results = classifier.hits_by_query(query['query'],
                                               args.queue,
                                               size=3000,
                                               days=days)
        except pyelasticsearch.exceptions.InvalidJsonResponseError:
            LOG.exception("Invalid Json while collecting metrics for query %s"
                          % query['query'])
            continue
        except requests.exceptions.ReadTimeout:
            LOG.exception("Timeout while collecting metrics for query %s" %
                          query['query'])
            continue

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results,
                                      ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    if args.output:
        out = open(args.output, 'w')
    else:
        out = sys.stdout

    try:
        # indent the json output if we're writing to a file
        indent = 4 if args.output else None
        out.write(json.dumps(jsondata, indent=indent))
    finally:
        out.close()
示例#5
0
def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries', help='path to query file')
    parser.add_argument('-o',
                        dest='output',
                        help='output filename. Omit for stdout')
    parser.add_argument('-q',
                        dest='queue',
                        help='limit results to a build queue regex')
    parser.add_argument('--es-query-suffix',
                        help='further limit results with an '
                        'elastic search query suffix. This will be ANDed '
                        'to all queries. '
                        'For example, to limit all queries to a '
                        'specific branch use: '
                        ' --es-query-suffix "build_branch:\\"stable/'
                        'liberty\\""')
    parser.add_argument('-c',
                        '--conf',
                        help="Elastic Recheck Configuration "
                        "file to use for data_source options such as "
                        "elastic search url, logstash url, and database "
                        "uri.")
    parser.add_argument('-v',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    config = er_conf.Config(config_file=args.conf)

    classifier = er.Classifier(args.queries, config=config)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    # Get the cluster health for the header
    es = pyelasticsearch.ElasticSearch(config.es_url)
    jsondata['status'] = es.health()['status']

    for query in classifier.queries:
        if args.queue:
            query['query'] += ' AND build_queue:%s' % args.queue
        if args.es_query_suffix:
            query['query'] += ' AND (%s)' % args.es_query_suffix

        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        logstash_url = ("%s/#/dashboard/file/logstash.json?%s" %
                        (config.ls_url, logstash_query))
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_url=logstash_url,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[],
                   voting=(False if query.get('allow-nonvoting') else True))
        buglist.append(bug)
        try:
            results = classifier.hits_by_query(query['query'],
                                               args.queue,
                                               size=3000,
                                               days=days)
        except pyelasticsearch.exceptions.InvalidJsonResponseError:
            LOG.exception(
                "Invalid Json while collecting metrics for query %s" %
                query['query'])
            continue
        except requests.exceptions.ReadTimeout:
            LOG.exception("Timeout while collecting metrics for query %s" %
                          query['query'])
            continue
        except pyelasticsearch.exceptions.ElasticHttpError as ex:
            LOG.error('Error from elasticsearch query for bug %s: %s',
                      query['bug'], ex)
            continue

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results, ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    if args.output:
        out = open(args.output, 'w')
    else:
        out = sys.stdout

    try:
        out.write(json.dumps(jsondata))
    finally:
        out.close()