示例#1
0
 def test_read_channel_config_invalid_path(self):
     self.fake_config.set('ircbot', 'channel_config', 'fake_path.yaml')
     with self.assertRaises(bot.ElasticRecheckException) as exc:
         bot._main([], er_conf.Config(config_obj=self.fake_config))
     raised_exc = exc.exception
     error_msg = "Unable to read layout config file at fake_path.yaml"
     self.assertEqual(str(raised_exc), error_msg)
示例#2
0
 def test_read_channel_config_not_specified(self):
     self.fake_config.set('ircbot', 'channel_config', None)
     with self.assertRaises(bot.ElasticRecheckException) as exc:
         bot._main([], er_conf.Config(config_obj=self.fake_config))
     raised_exc = exc.exception
     self.assertEqual(str(raised_exc), "Channel Config must be specified "
                      "in config file.")
示例#3
0
def main():
    args = get_options()

    config = er_conf.Config(config_file=args.conffile)

    if args.foreground:
        _main(args, config)
    else:
        pid = pid_file_module.TimeoutPIDLockFile(config.pid_fn, 10)
        with daemon.DaemonContext(pidfile=pid):
            _main(args, config)
示例#4
0
    def setUp(self):
        super(TestQueries, self).setUp()
        config = er_conf.Config(config_file='elasticRecheck.conf')
        self.classifier = elasticRecheck.Classifier(config.gerrit_query_file,
                                                    config=config)

        self.lp = launchpad.Launchpad.login_anonymously(
            'grabbing bugs', 'production', LPCACHEDIR)
        self.openstack_projects = (
            self.get_group_projects('openstack') +
            self.get_group_projects('oslo') +
            # Fix for story 2006737 since os-brick is
            # not in the openstack group in launchpad.
            ['os-brick'])
示例#5
0
 def setUp(self):
     super(TestBot, self).setUp()
     self.fake_config = configparser.ConfigParser({'server_password': None},
                                                  allow_no_value=True)
     _set_fake_config(self.fake_config)
     config = er_conf.Config(config_obj=self.fake_config)
     self.channel_config = bot.ChannelConfig(
         yaml.load(open('recheckwatchbot.yaml')))
     with mock.patch('launchpadlib.launchpad.Launchpad'):
         self.recheck_watch = bot.RecheckWatch(None,
                                               self.channel_config,
                                               None,
                                               config=config,
                                               commenting=False)
示例#6
0
def main():
    opts = get_options()
    config = er_config.Config(
        config_file=opts.conf,
        uncat_search_size=opts.search_size,
        all_fails_query=opts.all_fails_query,
        excluded_jobs_regex=opts.excluded_jobs_regex,
        included_projects_regex=opts.included_projects_regex)

    classifier = er.Classifier(opts.dir, config=config)
    all_gate_fails = all_fails(classifier, config=config)
    if opts.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO

    logging.basicConfig(format='%(asctime)s [%(name)s]  %(levelname)s: '
                        '%(message)s',
                        level=level)
    if level == logging.INFO:
        # NOTE(mtreinish: This logger is overly chatty at INFO logging every
        # time an HTTP connection is established. This isn't really useful
        # at INFO for this command
        logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
            logging.WARN)

    LOG.info("Starting search for unclassified failures")
    for group in all_gate_fails:
        LOG.info("Processing failures for group: %s", group)
        fails = all_gate_fails[group]
        if not fails:
            # It would be pretty spectacular if we had no failures so if we're
            # using the default all failures query, there could be a problem
            # with the query, so log a hint.
            if opts.all_fails_query == er_config.ALL_FAILS_QUERY:
                LOG.warning(
                    'No failures found in group "%s". The default '
                    'ALL_FAILS_QUERY might be broken.', group)
            continue
        data = collect_metrics(classifier, fails, config=config)
        engine = setup_template_engine(opts.templatedir, group=group)
        html = classifying_rate(fails, data, engine, classifier, config.ls_url)
        if opts.output:
            out_dir = opts.output
        else:
            out_dir = os.getcwd()
        with open(os.path.join(out_dir, group + '.html'), "w") as f:
            f.write(html)
示例#7
0
 def __init__(self, ircbot, channel_config, msgs, config=None,
              commenting=True):
     super(RecheckWatch, self).__init__()
     self.config = config or er_conf.Config()
     self.ircbot = ircbot
     self.channel_config = channel_config
     self.msgs = msgs
     self.log = logging.getLogger('recheckwatchbot')
     self.username = config.gerrit_user
     self.queries = config.gerrit_query_file
     self.host = config.gerrit_host
     self.connected = False
     self.commenting = commenting
     self.key = config.gerrit_host_key
     self.lp = launchpad.Launchpad.login_anonymously('grabbing bugs',
                                                     'production',
                                                     LPCACHEDIR,
                                                     timeout=60)
示例#8
0
def collect_metrics(classifier, fails, config=None):
    config = config or er_config.Config()
    data = {}
    for q in classifier.queries:
        try:
            results = classifier.hits_by_query(q['query'],
                                               size=config.uncat_search_size)
            hits = _status_count(results)
            LOG.debug("Collected metrics for query %s, hits %s", q['query'],
                      hits)
            data[q['bug']] = {
                'fails': _failure_count(hits),
                'hits': hits,
                'percentages': _failure_percentage(results, fails),
                'query': q['query'],
                'failed_jobs': _failed_jobs(results)
            }
        except requests.exceptions.ReadTimeout:
            LOG.exception("Failed to collection metrics for query %s" %
                          q['query'])
    return data
示例#9
0
def all_fails(classifier, config=None):
    """Find all the the fails in the integrated gate.

    This attempts to find all the build jobs in the integrated gate
    so we can figure out how good we are doing on total classification.
    """

    config = config or er_config.Config()
    integrated_fails = {}
    other_fails = {}
    all_fails = {}
    results = classifier.hits_by_query(config.all_fails_query,
                                       size=config.uncat_search_size)
    facets = er_results.FacetSet()
    facets.detect_facets(results, ["build_uuid"])
    for build in facets:
        for result in facets[build]:
            # If the job is on the exclude list, skip
            if re.search(config.excluded_jobs_regex, result.build_name):
                continue

            integrated_gate_projects = [
                'openstack/cinder',
                'openstack/glance',
                'openstack/keystone',
                'openstack/neutron',
                'openstack/nova',
                'openstack/requirements',
                'openstack/tempest',
                'openstack-dev/devstack',
                'openstack-dev/grenade',
                'openstack-infra/devstack-gate',
            ]
            if result.project in integrated_gate_projects:
                name = result.build_name
                timestamp = dp.parse(result.timestamp)
                if 'console.html' in result.log_url:
                    log = result.log_url.split('console.html')[0]
                elif 'job-output.txt' in result.log_url:
                    log = result.log_url.split('job-output.txt')[0]
                integrated_fails["%s.%s" % (build, name)] = {
                    'log': log,
                    'timestamp': timestamp,
                    'build_uuid': result.build_uuid
                }
            else:
                # not perfect, but basically an attempt to show the integrated
                # gate. Would be nice if there was a zuul attr for this in es.
                if re.search(config.included_projects_regex, result.project):
                    name = result.build_name
                    timestamp = dp.parse(result.timestamp)
                    if 'console.html' in result.log_url:
                        log = result.log_url.split('console.html')[0]
                    elif 'job-output.txt' in result.log_url:
                        log = result.log_url.split('job-output.txt')[0]
                    other_fails["%s.%s" % (build, name)] = {
                        'log': log,
                        'timestamp': timestamp,
                        'build_uuid': result.build_uuid
                    }

            LOG.debug("Found failure: %s build_uuid: %s project %s",
                      len(all_fails), result.build_uuid, result.project)

    all_fails = {'integrated_gate': integrated_fails, 'others': other_fails}
    return all_fails
示例#10
0
def setup():
    global config
    if not config:
        args = parse_command_line_args()
        config = ConfigParser.ConfigParser()
        config.read(args.config_file)
    # Database Configuration
    global engine
    db_uri = _config_get(config.get, 'default', 'db_uri')
    pool_size = _config_get(config.getint, 'default', 'pool_size', 20)
    pool_recycle = _config_get(config.getint, 'default', 'pool_recycle', 3600)
    engine = create_engine(db_uri,
                           pool_size=pool_size,
                           pool_recycle=pool_recycle)
    global Session
    Session = sessionmaker(bind=engine)
    # RSS Configuration
    rss_opts['frontend_url'] = _config_get(
        config.get, 'default', 'frontend_url',
        'http://status.openstack.org/openstack-health')
    # Elastic-recheck Configuration
    global query_dir
    query_dir = _config_get(config.get, 'default', 'query_dir', None)
    global es_url
    es_url = _config_get(config.get, 'default', 'es_url', None)
    if query_dir and er:
        elastic_config = er_config.Config(es_url=es_url)
        global classifier
        classifier = er.Classifier(query_dir, config=elastic_config)
    # Cache Configuration
    backend = _config_get(config.get, 'default', 'cache_backend',
                          'dogpile.cache.dbm')
    expire = _config_get(config.getint, 'default', 'cache_expiration',
                         datetime.timedelta(minutes=30))
    cache_file = _config_get(
        config.get, 'default', 'cache_file',
        os.path.join(tempfile.gettempdir(), 'openstack-health.dbm'))
    cache_url = _config_get(config.get, 'default', 'cache_url', None)

    global region
    if backend == 'dogpile.cache.dbm':
        args = {'filename': cache_file}
        if cache_url:

            def _key_generator(namespace, fn, **kw):
                namespace = fn.__name__ + (namespace or '')

                def generate_key(*arg):
                    return namespace + "_".join(
                        str(s).replace(' ', '_') for s in arg)

                return generate_key

            memcache_proxy = distributed_dbm.MemcachedLockedDBMProxy(cache_url)
            region = dogpile.cache.make_region(
                async_creation_runner=_periodic_refresh_cache,
                function_key_generator=_key_generator).configure(
                    backend,
                    expiration_time=expire,
                    arguments=args,
                    wrap=[memcache_proxy])
        else:
            region = dogpile.cache.make_region().configure(
                backend, expiration_time=expire, arguments=args)
    else:
        args = {'distributed_lock': True}
        if cache_url:
            args['url'] = cache_url
        region = dogpile.cache.make_region(
            async_creation_runner=_periodic_refresh_cache).configure(
                backend, expiration_time=expire, arguments=args)
示例#11
0
def main():
    parser = argparse.ArgumentParser(description='Generate data for graphs.')
    parser.add_argument(dest='queries', help='path to query file')
    parser.add_argument('-o',
                        dest='output',
                        help='output filename. Omit for stdout')
    parser.add_argument('-q',
                        dest='queue',
                        help='limit results to a build queue regex')
    parser.add_argument('--es-query-suffix',
                        help='further limit results with an '
                        'elastic search query suffix. This will be ANDed '
                        'to all queries. '
                        'For example, to limit all queries to a '
                        'specific branch use: '
                        ' --es-query-suffix "build_branch:\\"stable/'
                        'liberty\\""')
    parser.add_argument('-c',
                        '--conf',
                        help="Elastic Recheck Configuration "
                        "file to use for data_source options such as "
                        "elastic search url, logstash url, and database "
                        "uri.")
    parser.add_argument('-v',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='print out details as we go')
    args = parser.parse_args()

    config = er_conf.Config(config_file=args.conf)

    classifier = er.Classifier(args.queries, config=config)

    buglist = []

    # if you don't hate timezones, you don't program enough
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    ts = datetime.utcnow().replace(tzinfo=pytz.utc)
    # rawnow is useful for sending to javascript
    rawnow = int(((ts - epoch).total_seconds()) * 1000)

    ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc)
    # ms since epoch
    now = int(((ts - epoch).total_seconds()) * 1000)
    # number of days to match to, this should be the same as we are
    # indexing in logstash
    days = 10
    # How far back to start in the graphs
    start = now - (days * 24 * STEP)
    # ER timeframe for search
    timeframe = days * 24 * STEP / 1000

    last_indexed = int(
        ((classifier.most_recent() - epoch).total_seconds()) * 1000)
    behind = now - last_indexed

    # the data we're going to return, including interesting headers
    jsondata = {
        'now': rawnow,
        'last_indexed': last_indexed,
        'behind': behind,
        'buglist': []
    }

    # Get the cluster health for the header
    es = pyelasticsearch.ElasticSearch(config.es_url)
    jsondata['status'] = es.health()['status']

    for query in classifier.queries:
        if args.queue:
            query['query'] += ' AND build_queue:%s' % args.queue
        if args.es_query_suffix:
            query['query'] += ' AND (%s)' % args.es_query_suffix

        if query.get('suppress-graph'):
            continue
        if args.verbose:
            LOG.debug("Starting query for bug %s" % query['bug'])
        logstash_query = qb.encode_logstash_query(query['query'],
                                                  timeframe=timeframe)
        logstash_url = ("%s/#/dashboard/file/logstash.json?%s" %
                        (config.ls_url, logstash_query))
        bug_data = get_launchpad_bug(query['bug'])
        bug = dict(number=query['bug'],
                   query=query['query'],
                   logstash_url=logstash_url,
                   bug_data=bug_data,
                   fails=0,
                   fails24=0,
                   data=[],
                   voting=(False if query.get('allow-nonvoting') else True))
        buglist.append(bug)
        try:
            results = classifier.hits_by_query(query['query'],
                                               args.queue,
                                               size=3000,
                                               days=days)
        except pyelasticsearch.exceptions.InvalidJsonResponseError:
            LOG.exception(
                "Invalid Json while collecting metrics for query %s" %
                query['query'])
            continue
        except requests.exceptions.ReadTimeout:
            LOG.exception("Timeout while collecting metrics for query %s" %
                          query['query'])
            continue
        except pyelasticsearch.exceptions.ElasticHttpError as ex:
            LOG.error('Error from elasticsearch query for bug %s: %s',
                      query['bug'], ex)
            continue

        facets_for_fail = er_results.FacetSet()
        facets_for_fail.detect_facets(results, ["build_status", "build_uuid"])
        if "FAILURE" in facets_for_fail:
            bug['fails'] = len(facets_for_fail['FAILURE'])

        facets = er_results.FacetSet()
        facets.detect_facets(results,
                             ["build_status", "timestamp", "build_uuid"])

        for status in facets.keys():
            data = []
            for ts in range(start, now, STEP):
                if ts in facets[status]:
                    fails = len(facets[status][ts])
                    data.append([ts, fails])
                    # get the last 24 hr count as well, can't wait to have
                    # the pandas code and able to do it that way
                    if status == "FAILURE" and ts > (now - (24 * STEP)):
                        bug['fails24'] += fails
                else:
                    data.append([ts, 0])
            bug["data"].append(dict(label=status, data=data))

    # the sort order is a little odd, but basically sort by failures in
    # the last 24 hours, then with all failures for ones that we haven't
    # seen in the last 24 hours.
    buglist = sorted(buglist,
                     key=lambda bug: -(bug['fails24'] * 100000 + bug['fails']))

    jsondata['buglist'] = buglist
    if args.output:
        out = open(args.output, 'w')
    else:
        out = sys.stdout

    try:
        out.write(json.dumps(jsondata))
    finally:
        out.close()