def run(self): # Import here because it needs to happen after daemonization import elastic_recheck.elasticRecheck as er classifier = er.Classifier(self.queries, config=self.config) stream = er.Stream(self.username, self.host, self.key, config=self.config) while True: try: event = stream.get_failed_tempest() for job in event.failed_jobs: job.bugs = set(classifier.classify( event.change, event.rev, job.build_short_uuid, recent=True)) if not event.get_all_bugs(): self._read(event) else: self._read(event) stream.leave_comment( event, self.msgs, debug=not self.commenting) except er.ResultTimedOut as e: self.log.warning(e.message) self._read(msg=e.message) except Exception: self.log.exception("Uncaught exception processing event.")
def test_hits_by_query(self): c = er.Classifier("queries.yaml") q = ('''message:"Cannot ''createImage''"''' ''' AND filename:"console.html" AND voting:1''') results = c.hits_by_query(q) self.assertEqual(len(results), 20) self.assertEqual(results.took, 46) self.assertEqual(results.timed_out, False)
def main(): opts = get_options() classifier = er.Classifier(opts.dir) fails = all_fails(classifier) data = collect_metrics(classifier, fails) print_metrics(data, with_lp=opts.lp) if opts.rate: classifying_rate(fails, data)
def test_classify_with_test_id_filter_no_match(self, mock_id_check): c = er.Classifier('./elastic_recheck/tests/unit/queries_with_filters') es_mock = mock.patch.object(c.es, 'search', return_value=[1, 2, 3]) es_mock.start() self.addCleanup(es_mock.stop) res = c.classify(1234, 1, 'fake') self.assertEqual( res, [], "classify() returned bug matches %s when none should " "have been found" % res)
def setUp(self): super(TestQueries, self).setUp() config = ConfigParser.ConfigParser({'server_password': None}) config.read('elasticRecheck.conf') self.queries = 'queries' self.classifier = elasticRecheck.Classifier(self.queries) self.lp = launchpad.Launchpad.login_anonymously( 'grabbing bugs', 'production', LPCACHEDIR) self.openstack_projects = (self.get_group_projects('openstack') + self.get_group_projects('oslo'))
def test_classify_with_test_id_filter_match(self, mock_id_check): c = er.Classifier('./elastic_recheck/tests/unit/queries_with_filters') es_mock = mock.patch.object(c.es, 'search', return_value=[1, 2, 3]) es_mock.start() self.addCleanup(es_mock.stop) res = c.classify(1234, 1, 'fake') self.assertEqual( res, ['1234567'], "classify() returned %s when it should have returned " "a list with one bug id: '1234567'" % res)
def main(): opts = get_options() classifier = er.Classifier(opts.dir) fails = all_fails(classifier) data = collect_metrics(classifier, fails) engine = setup_template_engine(opts.templatedir) html = classifying_rate(fails, data, engine, classifier) if opts.output: with open(opts.output, "w") as f: f.write(html) else: print html
def setUp(self): super(TestQueries, self).setUp() config = er_conf.Config(config_file='elasticRecheck.conf') self.classifier = elasticRecheck.Classifier(config.gerrit_query_file, config=config) self.lp = launchpad.Launchpad.login_anonymously( 'grabbing bugs', 'production', LPCACHEDIR) self.openstack_projects = ( self.get_group_projects('openstack') + self.get_group_projects('oslo') + # Fix for story 2006737 since os-brick is # not in the openstack group in launchpad. ['os-brick'])
def main(): opts = get_options() config = er_config.Config( config_file=opts.conf, uncat_search_size=opts.search_size, all_fails_query=opts.all_fails_query, excluded_jobs_regex=opts.excluded_jobs_regex, included_projects_regex=opts.included_projects_regex) classifier = er.Classifier(opts.dir, config=config) all_gate_fails = all_fails(classifier, config=config) if opts.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(format='%(asctime)s [%(name)s] %(levelname)s: ' '%(message)s', level=level) if level == logging.INFO: # NOTE(mtreinish: This logger is overly chatty at INFO logging every # time an HTTP connection is established. This isn't really useful # at INFO for this command logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) LOG.info("Starting search for unclassified failures") for group in all_gate_fails: LOG.info("Processing failures for group: %s", group) fails = all_gate_fails[group] if not fails: # It would be pretty spectacular if we had no failures so if we're # using the default all failures query, there could be a problem # with the query, so log a hint. if opts.all_fails_query == er_config.ALL_FAILS_QUERY: LOG.warning( 'No failures found in group "%s". The default ' 'ALL_FAILS_QUERY might be broken.', group) continue data = collect_metrics(classifier, fails, config=config) engine = setup_template_engine(opts.templatedir, group=group) html = classifying_rate(fails, data, engine, classifier, config.ls_url) if opts.output: out_dir = opts.output else: out_dir = os.getcwd() with open(os.path.join(out_dir, group + '.html'), "w") as f: f.write(html)
def setUp(self): super(TestSuppressNotifcation, self).setUp() self.classifier = elasticRecheck.Classifier( "./elastic_recheck/tests/unit/suppressed_queries")
def main(): parser = argparse.ArgumentParser(description='Generate data for graphs.') parser.add_argument(dest='queries', help='path to query file') parser.add_argument('-o', dest='output', help='output filename') parser.add_argument('-q', dest='queue', help='limit results to a specific query') parser.add_argument('-v', dest='verbose', action='store_true', default=False, help='print out details as we go') args = parser.parse_args() classifier = er.Classifier(args.queries) buglist = [] # if you don't hate timezones, you don't program enough epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc) ts = datetime.utcnow().replace(tzinfo=pytz.utc) # rawnow is useful for sending to javascript rawnow = int(((ts - epoch).total_seconds()) * 1000) ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc) # ms since epoch now = int(((ts - epoch).total_seconds()) * 1000) # number of days to match to, this should be the same as we are # indexing in logstash days = 10 # How far back to start in the graphs start = now - (days * 24 * STEP) # ER timeframe for search timeframe = days * 24 * STEP / 1000 last_indexed = int( ((classifier.most_recent() - epoch).total_seconds()) * 1000) behind = now - last_indexed # the data we're going to return, including interesting headers jsondata = { 'now': rawnow, 'last_indexed': last_indexed, 'behind': behind, 'buglist': [] } for query in classifier.queries: if args.queue: query['query'] = query['query'] + (' AND build_queue:"%s"' % args.queue) if query.get('suppress-graph'): continue if args.verbose: LOG.debug("Starting query for bug %s" % query['bug']) logstash_query = qb.encode_logstash_query(query['query'], timeframe=timeframe) bug_data = get_launchpad_bug(query['bug']) bug = dict(number=query['bug'], query=query['query'], logstash_query=logstash_query, bug_data=bug_data, fails=0, fails24=0, data=[]) buglist.append(bug) results = classifier.hits_by_query(query['query'], args.queue, size=3000) facets_for_fail = er_results.FacetSet() facets_for_fail.detect_facets(results, ["build_status", "build_uuid"]) if "FAILURE" in facets_for_fail: bug['fails'] = len(facets_for_fail['FAILURE']) facets = er_results.FacetSet() facets.detect_facets(results, ["build_status", "timestamp", "build_uuid"]) for status in facets.keys(): data = [] for ts in range(start, now, STEP): if ts in facets[status]: fails = len(facets[status][ts]) data.append([ts, fails]) # get the last 24 hr count as well, can't wait to have # the pandas code and able to do it that way if status == "FAILURE" and ts > (now - (24 * STEP)): bug['fails24'] += fails else: data.append([ts, 0]) bug["data"].append(dict(label=status, data=data)) # the sort order is a little odd, but basically sort by failures in # the last 24 hours, then with all failures for ones that we haven't # seen in the last 24 hours. buglist = sorted(buglist, key=lambda bug: -(bug['fails24'] * 100000 + bug['fails'])) jsondata['buglist'] = buglist out = open(args.output, 'w') out.write(json.dumps(jsondata)) out.close()
def setup(): global config if not config: args = parse_command_line_args() config = ConfigParser.ConfigParser() config.read(args.config_file) # Database Configuration global engine db_uri = _config_get(config.get, 'default', 'db_uri') pool_size = _config_get(config.getint, 'default', 'pool_size', 20) pool_recycle = _config_get(config.getint, 'default', 'pool_recycle', 3600) engine = create_engine(db_uri, pool_size=pool_size, pool_recycle=pool_recycle) global Session Session = sessionmaker(bind=engine) # RSS Configuration rss_opts['frontend_url'] = _config_get( config.get, 'default', 'frontend_url', 'http://status.openstack.org/openstack-health') # Elastic-recheck Configuration global query_dir query_dir = _config_get(config.get, 'default', 'query_dir', None) global es_url es_url = _config_get(config.get, 'default', 'es_url', None) if query_dir and er: elastic_config = er_config.Config(es_url=es_url) global classifier classifier = er.Classifier(query_dir, config=elastic_config) # Cache Configuration backend = _config_get(config.get, 'default', 'cache_backend', 'dogpile.cache.dbm') expire = _config_get(config.getint, 'default', 'cache_expiration', datetime.timedelta(minutes=30)) cache_file = _config_get( config.get, 'default', 'cache_file', os.path.join(tempfile.gettempdir(), 'openstack-health.dbm')) cache_url = _config_get(config.get, 'default', 'cache_url', None) global region if backend == 'dogpile.cache.dbm': args = {'filename': cache_file} if cache_url: def _key_generator(namespace, fn, **kw): namespace = fn.__name__ + (namespace or '') def generate_key(*arg): return namespace + "_".join( str(s).replace(' ', '_') for s in arg) return generate_key memcache_proxy = distributed_dbm.MemcachedLockedDBMProxy(cache_url) region = dogpile.cache.make_region( async_creation_runner=_periodic_refresh_cache, function_key_generator=_key_generator).configure( backend, expiration_time=expire, arguments=args, wrap=[memcache_proxy]) else: region = dogpile.cache.make_region().configure( backend, expiration_time=expire, arguments=args) else: args = {'distributed_lock': True} if cache_url: args['url'] = cache_url region = dogpile.cache.make_region( async_creation_runner=_periodic_refresh_cache).configure( backend, expiration_time=expire, arguments=args)
def main(): parser = argparse.ArgumentParser(description='Generate data for graphs.') parser.add_argument(dest='queries', help='path to query file') parser.add_argument('-o', dest='output', help='output filename. Omit for stdout') parser.add_argument('-q', dest='queue', help='limit results to a build queue regex') parser.add_argument('--es-query-suffix', help='further limit results with an ' 'elastic search query suffix. This will be ANDed ' 'to all queries. ' 'For example, to limit all queries to a ' 'specific branch use: ' ' --es-query-suffix "build_branch:\\"stable/' 'liberty\\""') parser.add_argument('-c', '--conf', help="Elastic Recheck Configuration " "file to use for data_source options such as " "elastic search url, logstash url, and database " "uri.") parser.add_argument('-v', dest='verbose', action='store_true', default=False, help='print out details as we go') args = parser.parse_args() config = er_conf.Config(config_file=args.conf) classifier = er.Classifier(args.queries, config=config) buglist = [] # if you don't hate timezones, you don't program enough epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc) ts = datetime.utcnow().replace(tzinfo=pytz.utc) # rawnow is useful for sending to javascript rawnow = int(((ts - epoch).total_seconds()) * 1000) ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc) # ms since epoch now = int(((ts - epoch).total_seconds()) * 1000) # number of days to match to, this should be the same as we are # indexing in logstash days = 10 # How far back to start in the graphs start = now - (days * 24 * STEP) # ER timeframe for search timeframe = days * 24 * STEP / 1000 last_indexed = int( ((classifier.most_recent() - epoch).total_seconds()) * 1000) behind = now - last_indexed # the data we're going to return, including interesting headers jsondata = { 'now': rawnow, 'last_indexed': last_indexed, 'behind': behind, 'buglist': [] } # Get the cluster health for the header es = pyelasticsearch.ElasticSearch(config.es_url) jsondata['status'] = es.health()['status'] for query in classifier.queries: if args.queue: query['query'] += ' AND build_queue:%s' % args.queue if args.es_query_suffix: query['query'] += ' AND (%s)' % args.es_query_suffix if query.get('suppress-graph'): continue if args.verbose: LOG.debug("Starting query for bug %s" % query['bug']) logstash_query = qb.encode_logstash_query(query['query'], timeframe=timeframe) logstash_url = ("%s/#/dashboard/file/logstash.json?%s" % (config.ls_url, logstash_query)) bug_data = get_launchpad_bug(query['bug']) bug = dict(number=query['bug'], query=query['query'], logstash_url=logstash_url, bug_data=bug_data, fails=0, fails24=0, data=[], voting=(False if query.get('allow-nonvoting') else True)) buglist.append(bug) try: results = classifier.hits_by_query(query['query'], args.queue, size=3000, days=days) except pyelasticsearch.exceptions.InvalidJsonResponseError: LOG.exception( "Invalid Json while collecting metrics for query %s" % query['query']) continue except requests.exceptions.ReadTimeout: LOG.exception("Timeout while collecting metrics for query %s" % query['query']) continue except pyelasticsearch.exceptions.ElasticHttpError as ex: LOG.error('Error from elasticsearch query for bug %s: %s', query['bug'], ex) continue facets_for_fail = er_results.FacetSet() facets_for_fail.detect_facets(results, ["build_status", "build_uuid"]) if "FAILURE" in facets_for_fail: bug['fails'] = len(facets_for_fail['FAILURE']) facets = er_results.FacetSet() facets.detect_facets(results, ["build_status", "timestamp", "build_uuid"]) for status in facets.keys(): data = [] for ts in range(start, now, STEP): if ts in facets[status]: fails = len(facets[status][ts]) data.append([ts, fails]) # get the last 24 hr count as well, can't wait to have # the pandas code and able to do it that way if status == "FAILURE" and ts > (now - (24 * STEP)): bug['fails24'] += fails else: data.append([ts, 0]) bug["data"].append(dict(label=status, data=data)) # the sort order is a little odd, but basically sort by failures in # the last 24 hours, then with all failures for ones that we haven't # seen in the last 24 hours. buglist = sorted(buglist, key=lambda bug: -(bug['fails24'] * 100000 + bug['fails'])) jsondata['buglist'] = buglist if args.output: out = open(args.output, 'w') else: out = sys.stdout try: out.write(json.dumps(jsondata)) finally: out.close()
def test_hits_by_query_no_results(self): c = er.Classifier("queries.yaml") results = c.hits_by_query("this should find no bugs") self.assertEqual(len(results), 0) self.assertEqual(results.took, 53) self.assertEqual(results.timed_out, False)
def main(): parser = argparse.ArgumentParser( description='Remove old queries where the affected projects list the ' 'bug status as one of: %s' % ', '.join(FIXED_STATUSES)) parser.add_argument('--bug', metavar='<bug>', help='Specific bug number/id to clean. Returns an ' 'exit code of 1 if no query is found for the ' 'bug.') parser.add_argument('--dry-run', action='store_true', default=False, help='Print out old queries that would be removed but ' 'do not actually remove them.') parser.add_argument('-v', dest='verbose', action='store_true', default=False, help='Print verbose information during execution.') args = parser.parse_args() verbose = args.verbose dry_run = args.dry_run def info(message): if verbose: print(message) info('Loading queries') classifier = er.Classifier('queries') processed = [] # keep track of the bugs we've looked at cleaned = [] # keep track of the queries we've removed for query in classifier.queries: bug = query['bug'] processed.append(bug) # If we're looking for a specific bug check to see if we found it. if args.bug and bug != args.bug: continue # Skip anything with suppress-graph: true since those are meant to be # kept around even if they don't have hits. if query.get('suppress-graph', False): info('Skipping query for bug %s since it has ' '"suppress-graph: true"' % bug) continue info('Getting data for bug: %s' % bug) bug_data = graph.get_launchpad_bug(bug) affects = bug_data.get('affects') # affects is a comma-separated list of (<project> - <status>), e.g. # "(neutron - Confirmed), (nova - Fix Released)". if affects: affects = affects.split(',') fixed_in_all_affected_projects = True for affected in affects: affected = affected.strip() try: project, status = get_project_status(affected) if status not in FIXED_STATUSES: # TODO(mriedem): It could be useful to report queries # that do not have hits but the bug is not marked as # fixed. info('Bug %s is not fixed for project %s' % (bug, project)) fixed_in_all_affected_projects = False break except InvalidProjectstatus: print('Unable to parse project status "%s" for bug %s' % (affected, bug)) fixed_in_all_affected_projects = False break if fixed_in_all_affected_projects: # TODO(mriedem): It might be good to sanity check that a query # does not have hits if we are going to remove it even if the # bug is marked as fixed, e.g. bug 1745168. The bug may have # re-appeared, or still be a problem on stable branches, or the # query may be too broad. if dry_run: info('[DRY-RUN] Remove query for bug: %s' % bug) else: info('Removing query for bug: %s' % bug) os.remove('queries/%s.yaml' % bug) cleaned.append(bug) else: print('Unable to determine affected projects for bug %s' % bug) # If a specific bug was provided did we find it? if args.bug and args.bug not in processed: print('Unable to find query for bug: %s' % args.bug) return 1 # Print a summary of what we cleaned. prefix = '[DRY-RUN] ' if dry_run else '' # If we didn't remove anything, just print None. if not cleaned: cleaned.append('None') info('%sRemoved queries:\n%s' % (prefix, '\n'.join(sorted(cleaned))))