Python FacetSet示例，elastic_recheck.results.FacetSet Python示例

示例#1

0

显示文件

def all_fails(classifier):
    """Find all the the fails in the integrated gate.

    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """
    all_fails = {}
    query = ('filename:"console.html" '
             'AND message:"Finished: FAILURE" '
             'AND build_queue:"gate"')
    results = classifier.hits_by_query(query, size=30000)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # If the job is on the exclude list, skip
            if re.search(EXCLUDED_JOBS_REGEX, result.build_name):
                continue

            # not perfect, but basically an attempt to show the integrated
            # gate. Would be nice if there was a zuul attr for this in es.
            if re.search("(^openstack/|devstack|grenade)", result.project):
                name = result.build_name
                timestamp = dp.parse(result.timestamp)
                log = result.log_url.split("console.html")[0]
                all_fails["%s.%s" % (build, name)] = {
                    'log': log,
                    'timestamp': timestamp,
                    'build_uuid': result.build_uuid
                }
    return all_fails

示例#2

0

显示文件

def _count_fails_per_build_name(hits):
    facets = er_results.FacetSet()
    counts = collections.defaultdict(int)
    facets.detect_facets(hits, ["build_status", "build_name", "build_uuid"])
    if "FAILURE" in facets:
        for build_name in facets["FAILURE"]:
            counts[build_name] += 1
    return counts

示例#3

0

显示文件

def _status_count(results):
    counts = {}
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_status", "build_uuid"])

    for key in facets:
        counts[key] = len(facets[key])
    return counts

示例#4

0

显示文件

 def test_facet_multi_level(self):
     data = load_sample(1226337)
     result_set = results.ResultSet(data)
     facets = results.FacetSet()
     facets.detect_facets(result_set, ["build_status", "build_uuid"])
     self.assertEqual(len(facets.keys()), 2)
     self.assertEqual(len(facets['FAILURE'].keys()), 12)
     self.assertEqual(len(facets['SUCCESS'].keys()), 3)

示例#5

0

显示文件

def _failed_jobs(results):
    failed_jobs = []
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_status", "build_uuid"])
    if "FAILURE" in facets:
        for build in facets["FAILURE"]:
            for result in facets["FAILURE"][build]:
                failed_jobs.append("%s.%s" % (build, result.build_name))
    return failed_jobs

示例#6

0

显示文件

 def test_facet_histogram(self):
     data = load_sample(1226337)
     result_set = results.ResultSet(data)
     facets = results.FacetSet()
     facets.detect_facets(result_set,
                          ["timestamp", "build_status", "build_uuid"])
     self.assertEqual(len(facets.keys()), 14)
     print facets[1382104800000].keys()
     self.assertEqual(facets[1382104800000].keys(), ["FAILURE"])
     self.assertEqual(len(facets[1382104800000]["FAILURE"]), 2)
     self.assertEqual(facets[1382101200000].keys(), ["FAILURE"])

示例#7

0

显示文件

    def test_facet_one_level(self):
        data = load_sample(1218391)
        result_set = results.ResultSet(data)
        facets = results.FacetSet()
        facets.detect_facets(result_set, ["build_uuid"])
        self.assertEqual(len(facets.keys()), 20)

        facets = results.FacetSet()
        facets.detect_facets(result_set, ["build_status"])
        self.assertEqual(facets.keys(), ['FAILURE'])

        data = load_sample(1226337)
        result_set = results.ResultSet(data)
        facets = results.FacetSet()
        facets.detect_facets(result_set, ["build_status"])
        self.assertEqual(len(facets.keys()), 2)
        self.assertIn('FAILURE', facets.keys())
        self.assertIn('SUCCESS', facets.keys())
        self.assertEqual(len(facets['FAILURE']), 202)
        self.assertEqual(len(facets['SUCCESS']), 27)

示例#8

0

显示文件

def all_fails(classifier):
    """Find all the fails in the integrated gate.

    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """
    all_fails = {}
    results = classifier.hits_by_query(er_config.ALL_FAILS_QUERY, size=30000)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # not perfect, but basically an attempt to show the integrated
            # gate. Would be nice if there was a zuul attr for this in es.
            if re.search(er_config.INCLUDED_PROJECTS_REGEX, result.project):
                all_fails["%s.%s" % (build, result.build_name)] = False
    return all_fails

示例#9

0

显示文件

def all_fails(classifier):
    """Find all the the fails in the integrated gate.

    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """
    all_fails = {}
    query = ('filename:"console.html" '
             'AND message:"Finished: FAILURE" '
             'AND build_queue:"gate"')
    results = classifier.hits_by_query(query, size=30000)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # not perfect, but basically an attempt to show the integrated
            # gate. Would be nice if there was a zuul attr for this in es.
            if re.search("(^openstack/|devstack|grenade)", result.project):
                all_fails["%s.%s" % (build, result.build_name)] = False
    return all_fails

示例#10

0

显示文件

def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries', help='path to query file')
    parser.add_argument('-o', dest='output', help='output filename')
    parser.add_argument('-q',
                        dest='queue',
                        help='limit results to a specific query')
    parser.add_argument('-v',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    classifier = er.Classifier(args.queries)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    for query in classifier.queries:
        if args.queue:
            query['query'] = query['query'] + (' AND build_queue:"%s"' %
                                               args.queue)
        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_query=logstash_query,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[])
        buglist.append(bug)
        results = classifier.hits_by_query(query['query'],
                                           args.queue,
                                           size=3000)

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results, ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    out = open(args.output, 'w')
    out.write(json.dumps(jsondata))
    out.close()

示例#11

0

显示文件

def all_fails(classifier, config=None):
    """Find all the the fails in the integrated gate.

    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """

    config = config or er_config.Config()
    integrated_fails = {}
    other_fails = {}
    all_fails = {}
    results = classifier.hits_by_query(config.all_fails_query,
                                       size=config.uncat_search_size)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # If the job is on the exclude list, skip
            if re.search(config.excluded_jobs_regex, result.build_name):
                continue

            integrated_gate_projects = [
                'openstack/cinder',
                'openstack/glance',
                'openstack/keystone',
                'openstack/neutron',
                'openstack/nova',
                'openstack/requirements',
                'openstack/tempest',
                'openstack-dev/devstack',
                'openstack-dev/grenade',
                'openstack-infra/devstack-gate',
            ]
            if result.project in integrated_gate_projects:
                name = result.build_name
                timestamp = dp.parse(result.timestamp)
                if 'console.html' in result.log_url:
                    log = result.log_url.split('console.html')[0]
                elif 'job-output.txt' in result.log_url:
                    log = result.log_url.split('job-output.txt')[0]
                integrated_fails["%s.%s" % (build, name)] = {
                    'log': log,
                    'timestamp': timestamp,
                    'build_uuid': result.build_uuid
                }
            else:
                # not perfect, but basically an attempt to show the integrated
                # gate. Would be nice if there was a zuul attr for this in es.
                if re.search(config.included_projects_regex, result.project):
                    name = result.build_name
                    timestamp = dp.parse(result.timestamp)
                    if 'console.html' in result.log_url:
                        log = result.log_url.split('console.html')[0]
                    elif 'job-output.txt' in result.log_url:
                        log = result.log_url.split('job-output.txt')[0]
                    other_fails["%s.%s" % (build, name)] = {
                        'log': log,
                        'timestamp': timestamp,
                        'build_uuid': result.build_uuid
                    }

            LOG.debug("Found failure: %s build_uuid: %s project %s",
                      len(all_fails), result.build_uuid, result.project)

    all_fails = {'integrated_gate': integrated_fails, 'others': other_fails}
    return all_fails

示例#12

0

显示文件

文件： graph.py 项目： ssbarnea/elastic-recheck

def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries', help='path to query file')
    parser.add_argument('-o',
                        dest='output',
                        help='output filename. Omit for stdout')
    parser.add_argument('-q',
                        dest='queue',
                        help='limit results to a build queue regex')
    parser.add_argument('--es-query-suffix',
                        help='further limit results with an '
                        'elastic search query suffix. This will be ANDed '
                        'to all queries. '
                        'For example, to limit all queries to a '
                        'specific branch use: '
                        ' --es-query-suffix "build_branch:\\"stable/'
                        'liberty\\""')
    parser.add_argument('-c',
                        '--conf',
                        help="Elastic Recheck Configuration "
                        "file to use for data_source options such as "
                        "elastic search url, logstash url, and database "
                        "uri.")
    parser.add_argument('-v',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    config = er_conf.Config(config_file=args.conf)

    classifier = er.Classifier(args.queries, config=config)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    # Get the cluster health for the header
    es = pyelasticsearch.ElasticSearch(config.es_url)
    jsondata['status'] = es.health()['status']

    for query in classifier.queries:
        if args.queue:
            query['query'] += ' AND build_queue:%s' % args.queue
        if args.es_query_suffix:
            query['query'] += ' AND (%s)' % args.es_query_suffix

        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        logstash_url = ("%s/#/dashboard/file/logstash.json?%s" %
                        (config.ls_url, logstash_query))
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_url=logstash_url,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[],
                   voting=(False if query.get('allow-nonvoting') else True))
        buglist.append(bug)
        try:
            results = classifier.hits_by_query(query['query'],
                                               args.queue,
                                               size=3000,
                                               days=days)
        except pyelasticsearch.exceptions.InvalidJsonResponseError:
            LOG.exception(
                "Invalid Json while collecting metrics for query %s" %
                query['query'])
            continue
        except requests.exceptions.ReadTimeout:
            LOG.exception("Timeout while collecting metrics for query %s" %
                          query['query'])
            continue
        except pyelasticsearch.exceptions.ElasticHttpError as ex:
            LOG.error('Error from elasticsearch query for bug %s: %s',
                      query['bug'], ex)
            continue

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results, ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    if args.output:
        out = open(args.output, 'w')
    else:
        out = sys.stdout

    try:
        out.write(json.dumps(jsondata))
    finally:
        out.close()