def test_scan(self): """ Test internal query: _scan """ params = set_params({}) ret = queries._scan(self.eldb.es, self.eldb.index, "unit/repo1", params) ids = [obj["id"] for obj in ret] expected = ["c1_e1", "c1_e2", "c1_e3", "c1_e4", "c1_e5"] self.assertCountEqual(ids, expected)
def test_scan(self): """ Test internal query: _scan """ params = set_params({}) ret = queries._scan(self.eldb.es, self.eldb.index, 'unit/repo1', params) ids = [obj['id'] for obj in ret] expected = ['c1_e1', 'c1_e2', 'c1_e3', 'c1_e4', 'c1_e5'] self.assertCountEqual(ids, expected)
def test_unknown_query(self): """ Test unknown query exception """ params = set_params({}) self.assertRaises( UnknownQueryException, self.eldb.run_named_query, "unknown", "unit/repo1", params, )
def test_all_queries(self): """ Test all public queries """ failing = [] for query in queries.public_queries: params = set_params({}) ret = self.eldb.run_named_query(query, "unit/repo1", params) if (not isinstance(ret, dict) and not isinstance(ret, list) and not isinstance(ret, tuple) and not isinstance(ret, int)): failing.append((query, ret)) self.assertEqual(failing, [])
def do_query(index, repository_fullname, args, name): params = utils.set_params(args) db = ELmonocleDB( elastic_conn=os.getenv('ELASTIC_CONN', 'localhost:9200'), index=index, prefix=CHANGE_PREFIX, create=False, ) try: result = db.run_named_query(name, repository_fullname, params) except InvalidIndexError: return 'Invalid index: %s' % request.args.get('index'), 404 return jsonify(result)
def test_approvals_param(self): """ Test approvals param: changes_and_events """ params = set_params({ "approvals": "Code-Review+2", "gte": "2020-01-01" }) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) self.assertEqual(ret["total"], 2, ret) self.assertCountEqual([item["id"] for item in ret["items"]], ["c1", "c1_e4"]) params = set_params({ "approvals": "CHANGES_REQUESTED,APPROVED", "gte": "2020-01-01" }) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) self.assertEqual(ret["total"], 4, ret) self.assertCountEqual([item["id"] for item in ret["items"]], ["c2", "c2_e4", "c3", "c3_e2"])
def test_change_and_events(self): """ Test change_and_events query """ params = set_params({}) ret = self.eldb.run_named_query('changes_and_events', 'unit/repo1', params) self.assertEqual(ret['total'], 6) change = [c for c in ret['items'] if c['type'] == 'Change'][0] self.assertTrue(change['tests_included']) self.assertTrue(change['has_issue_tracker_links']) self.assertListEqual( change['issue_tracker_links'][0], ['#42', 'https://github.com/unit/repo1/issues/42'], )
def test_approvals_param(self): """ Test approvals param: changes_and_events """ params = set_params({ 'approvals': 'Code-Review+2', 'gte': '2020-01-01' }) ret = self.eldb.run_named_query('changes_and_events', 'unit/repo[12]', params) self.assertEqual(ret['total'], 2, ret) self.assertCountEqual([item['id'] for item in ret['items']], ['c1', 'c1_e4']) params = set_params({ 'approvals': 'CHANGES_REQUESTED,APPROVED', 'gte': '2020-01-01' }) ret = self.eldb.run_named_query('changes_and_events', 'unit/repo[12]', params) self.assertEqual(ret['total'], 4, ret) self.assertCountEqual([item['id'] for item in ret['items']], ['c2', 'c2_e4', 'c3', 'c3_e2'])
def test_change_and_events(self): """ Test change_and_events query """ params = set_params({}) ret = self.eldb.run_named_query("changes_and_events", "unit/repo1", params) self.assertEqual(ret["total"], 6) change = [c for c in ret["items"] if c["type"] == "Change"][0] self.assertTrue(change["tests_included"]) self.assertTrue(change["has_issue_tracker_links"]) self.assertListEqual( change["issue_tracker_links"][0], ["#42", "https://github.com/unit/repo1/issues/42"], )
def test_project_param(self): """ Test project param: last_changes """ params = set_params({"project": "mytestproject"}) params["_project_defs"] = [ ProjectDefinition( name="mytestproject", repository_regex=None, branch_regex=None, file_regex=r".*backend.py", ) ] ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret)
def test_events_histo(self): """ Test query: events_histo """ params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"}) ret = self.eldb.run_named_query("events_histo", "unit/repo1", params) expected = ( [ {"doc_count": 4, "key": 1577836800000, "key_as_string": "2020-01-01"}, {"doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02"}, ], 2.5, ) ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_events_top_authors(self): """ Test query: events_top_authors """ params = set_params({}) ret = self.eldb.run_named_query("events_top_authors", "unit/repo1", params) expected = { "count_avg": 2.5, "count_median": 2.5, "items": [{"doc_count": 3, "key": "jane"}, {"doc_count": 2, "key": "john"}], "total": 2, "total_hits": 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_events_top_authors(self): """ Test query: events_top_authors """ params = set_params({}) ret = self.eldb.run_named_query('events_top_authors', 'unit/repo1', params) expected = { 'count_avg': 2.5, 'count_median': 2.5, 'items': [{'doc_count': 3, 'key': 'jane'}, {'doc_count': 2, 'key': 'john'}], 'total': 2, 'total_hits': 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_events_histo(self): """ Test query: events_histo """ params = set_params({'gte': '2020-01-01', 'lte': '2020-01-02'}) ret = self.eldb.run_named_query('events_histo', 'unit/repo1', params) expected = ( [ {'doc_count': 0, 'key': 1577750400000, 'key_as_string': '2019-12-31'}, {'doc_count': 4, 'key': 1577836800000, 'key_as_string': '2020-01-01'}, {'doc_count': 1, 'key': 1577923200000, 'key_as_string': '2020-01-02'}, ], 1.6666666666666667, ) ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def do_query(index, repository_fullname, args, name): params = utils.set_params(args) db = ELmonocleDB( elastic_conn=os.getenv("ELASTIC_CONN", "localhost:9200"), index=index, prefix=CHANGE_PREFIX, create=False, user=os.getenv("ELASTIC_USER", None), password=os.getenv("ELASTIC_PASSWORD", None), use_ssl=os.getenv("ELASTIC_USE_SSL", None), verify_certs=os.getenv("ELASTIC_INSECURE", None), ssl_show_warn=os.getenv("ELASTIC_SSL_SHOW_WARN", None), ) try: result = db.run_named_query(name, repository_fullname, params) except InvalidIndexError: return "Invalid index: %s" % request.args.get("index"), 404 return jsonify(result)
def test_repos_top_merged(self): """ Test query: repos_top_merged """ params = set_params({"state": "MERGED"}) ret = self.eldb.run_named_query("repos_top", "unit/repo[12]", params) expected = { "items": [ {"key": "unit/repo2", "doc_count": 2}, {"key": "unit/repo1", "doc_count": 1}, ], "count_avg": 1.5, "count_median": 1.5, "total": 2, "total_hits": 3, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_repos_top_merged(self): """ Test query: repos_top_merged """ params = set_params({'state': 'MERGED'}) ret = self.eldb.run_named_query('repos_top', 'unit/repo[12]', params) expected = { 'items': [ {'key': 'unit/repo2', 'doc_count': 2}, {'key': 'unit/repo1', 'doc_count': 1}, ], 'count_avg': 1.5, 'count_median': 1.5, 'total': 2, 'total_hits': 3, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_events_top(self): """ Test internal query: _events_top """ params = set_params({}) ret = queries._events_top(self.eldb.es, self.eldb.index, "unit/repo1", "type", params) expected = { "count_avg": 1.25, "count_median": 1.0, "items": [ { "doc_count": 2, "key": "ChangeReviewedEvent" }, { "doc_count": 1, "key": "ChangeCommentedEvent" }, { "doc_count": 1, "key": "ChangeCreatedEvent" }, { "doc_count": 1, "key": "ChangeMergedEvent" }, ], "total": 4, "total_hits": 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def test_events_top(self): """ Test internal query: _events_top """ params = set_params({}) ret = queries._events_top(self.eldb.es, self.eldb.index, 'unit/repo1', 'type', params) expected = { 'count_avg': 1.25, 'count_median': 1.0, 'items': [ { 'doc_count': 2, 'key': 'ChangeReviewedEvent' }, { 'doc_count': 1, 'key': 'ChangeCommentedEvent' }, { 'doc_count': 1, 'key': 'ChangeCreatedEvent' }, { 'doc_count': 1, 'key': 'ChangeMergedEvent' }, ], 'total': 4, 'total_hits': 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def main(): parser = argparse.ArgumentParser(prog='monocle') parser.add_argument( '--loglevel', help='logging level', default='INFO') subparsers = parser.add_subparsers(title='Subcommands', description='valid subcommands', dest="command") for crawler_driver in (pullrequest, review): parser_crawler = subparsers.add_parser( crawler_driver.name, help=crawler_driver.help) parser_crawler.add_argument( '--loop-delay', help='Request last updated events every N secs', default=900) parser_crawler.add_argument( '--host', help='Base url of the code review server', required=True) crawler_driver.init_crawler_args_parser(parser_crawler) parser_dbmanage = subparsers.add_parser( 'dbmanage', help='Database manager') parser_dbmanage.add_argument( '--delete-repository', help='Delete events related to a repository (regexp)', required=True) parser_dbquery = subparsers.add_parser( 'dbquery', help='Run an existsing query on stored events') parser_dbquery.add_argument( '--interval', help='Histogram interval', default="3h") parser_dbquery.add_argument( '--name', help='The query name', required=True) parser_dbquery.add_argument( '--repository', help='Scope to events of a repository (regexp)', required=True) parser_dbquery.add_argument( '--gte', help='Scope to events created after date') parser_dbquery.add_argument( '--lte', help='Scope to events created before date') parser_dbquery.add_argument( '--on_cc_gte', help='Scope to events related to changes created after date') parser_dbquery.add_argument( '--on_cc_lte', help='Scope to events related to changes created before date') parser_dbquery.add_argument( '--ec-same-date', help='Scope to events related to changes created during the ' 'same date bondaries defined by gte/lte arguments', action='store_true') parser_dbquery.add_argument( '--type', help='Scope to events types list (comma separated)') parser_dbquery.add_argument( '--authors', help='Scope to authors (comma separated)') parser_dbquery.add_argument( '--approval', help='Scope to events with approval') parser_dbquery.add_argument( '--size', help='Return maximum of size results', default=10) parser_dbquery.add_argument( '--exclude-authors', help='Authors exclude list (comma separated)') args = parser.parse_args() logging.basicConfig( level=getattr(logging, args.loglevel.upper())) if not args.command: parser.print_usage() return 1 if args.command.endswith("_crawler"): crawler = MonocleCrawler(args) crawler.run() if args.command == "dbmanage": if args.delete_repository: db = ELmonocleDB() db.delete_repository(args.delete_repository) if args.command == "dbquery": db = ELmonocleDB() params = utils.set_params(args) ret = db.run_named_query( args.name, args.repository.lstrip('^'), params) pprint(ret)
def test_most_active_authors_stats(self): """ Test query: most_active_authors_stats """ params = set_params({}) ret = self.eldb.run_named_query('most_active_authors_stats', '.*', params) expected = { 'ChangeCommentedEvent': { 'count_avg': 1, 'count_median': 1.0, 'items': [ {'doc_count': 1, 'key': 'jane'}, {'doc_count': 1, 'key': 'steve'}, ], 'total': 2, 'total_hits': 2, }, 'ChangeCreatedEvent': { 'count_avg': 1.3333333333333333, 'count_median': 1, 'items': [ {'doc_count': 2, 'key': 'jane'}, {'doc_count': 1, 'key': 'john'}, {'doc_count': 1, 'key': 'steve'}, ], 'total': 3, 'total_hits': 4, }, 'ChangeMergedEvent': { 'count_avg': 1, 'count_median': 1, 'items': [ {'doc_count': 1, 'key': 'jane'}, {'doc_count': 1, 'key': 'john'}, {'doc_count': 1, 'key': 'steve'}, ], 'total': 3, 'total_hits': 3, }, 'ChangeReviewedEvent': { 'count_avg': 1.3333333333333333, 'count_median': 1, 'items': [ {'doc_count': 2, 'key': 'john'}, {'doc_count': 1, 'key': 'jane'}, {'doc_count': 1, 'key': 'steve'}, ], 'total': 3, 'total_hits': 4, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params({'authors': 'jane'}) ret = self.eldb.run_named_query('most_active_authors_stats', '.*', params) expected = { 'ChangeCommentedEvent': { 'count_avg': 1, 'count_median': 1, 'items': [{'doc_count': 1, 'key': 'jane'}], 'total': 1, 'total_hits': 1, }, 'ChangeCreatedEvent': { 'count_avg': 2, 'count_median': 2, 'items': [{'doc_count': 2, 'key': 'jane'}], 'total': 1, 'total_hits': 2, }, 'ChangeMergedEvent': { 'count_avg': 1, 'count_median': 1, 'items': [{'doc_count': 1, 'key': 'jane'}], 'total': 1, 'total_hits': 1, }, 'ChangeReviewedEvent': { 'count_avg': 1, 'count_median': 1, 'items': [{'doc_count': 1, 'key': 'jane'}], 'total': 1, 'total_hits': 1, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def main() -> None: parser = argparse.ArgumentParser(prog="monocle") parser.add_argument("--loglevel", help="logging level", default="INFO") parser.add_argument( "--elastic-timeout", help="Elasticsearch connection retry timeout", default=10, type=int, ) parser.add_argument("--elastic-conn", help="Elasticsearch connection info", default="localhost:9200") parser.add_argument( "--use-ssl", help="Use https protocol for communication with Elasticsearch", action="store_true", ) parser.add_argument( "--insecure", help="Skip SSL CA cert validation", action="store_false", ) parser.add_argument( "--ssl_show_warn", help="Skip showing a SSL warning message if it is not signed " "by CA authority", action="store_false", ) parser.add_argument( "--elastic-user", help="Username for Elasticsearch authorization", ) parser.add_argument( "--elastic-password", help="Password for Elasticsearch authorization", ) subparsers = parser.add_subparsers(title="Subcommands", description="valid subcommands", dest="command") parser_crawler = subparsers.add_parser("crawler", help="Threaded crawlers pool") parser_crawler.add_argument("--config", help="Configuration file of the crawlers pool", required=True) parser_dbmanage = subparsers.add_parser("dbmanage", help="Database manager") parser_dbmanage.add_argument("--config", help="Configuration file", required=False) parser_dbmanage.add_argument( "--delete-repository", help="Delete events related to a repository (regexp)", ) parser_dbmanage.add_argument( "--delete-index", help="Delete the index", action="store_true", ) parser_dbmanage.add_argument("--index", help="The Elastisearch index name", required=True) parser_dbmanage.add_argument( "--run-migrate", help="Run the migration process", ) parser_dbmanage.add_argument( "--update-idents", help="Update identities", action="store_true", ) parser_dbquery = subparsers.add_parser( "dbquery", help="Run an existsing query on stored events") parser_dbquery.add_argument("--index", help="The Elastisearch index name", required=True) parser_dbquery.add_argument("--name", help="The query name", required=True) parser_dbquery.add_argument( "--repository", help="Scope to events of repositories (regexp)", required=True) parser_dbquery.add_argument( "--target-branch", help="Scope to events of a target branches (regexp)") parser_dbquery.add_argument("--gte", help="Scope to events created after date") parser_dbquery.add_argument("--lte", help="Scope to events created before date") parser_dbquery.add_argument( "--on_cc_gte", help="Scope to events related to changes created after date") parser_dbquery.add_argument( "--on_cc_lte", help="Scope to events related to changes created before date") parser_dbquery.add_argument( "--ec-same-date", help="Scope to events related to changes created during the " "same date bondaries defined by gte/lte arguments", action="store_true", ) parser_dbquery.add_argument( "--type", help="Scope to events types list (comma separated)") parser_dbquery.add_argument( "--files", help="Scope to changes containing this file regexp") parser_dbquery.add_argument( "--state", help="Scope to changes with state (comma separated)", ) parser_dbquery.add_argument("--change-ids", help="Scope to change ids (comma separated)") parser_dbquery.add_argument("--authors", help="Scope to authors (comma separated)") parser_dbquery.add_argument( "--approvals", help="Scope to objects with approvals (comma separated)") parser_dbquery.add_argument( "--exclude-approvals", help="Approvals exclude list (comma separated)") parser_dbquery.add_argument("--size", help="Return maximum of size results", default=10) parser_dbquery.add_argument( "--from", help="Starting index of the elements to retrieve", default=0) parser_dbquery.add_argument("--exclude-authors", help="Authors exclude list (comma separated)") parser_dbquery.add_argument( "--tests-included", help="Scope to changes containing tests", action="store_true", ) parser_dbquery.add_argument( "--self-merged", help="Scope to changes merged by their authors", action="store_true", ) parser_dbquery.add_argument( "--has-issue-tracker-links", help="Scope to changes containing an issue tracker link", choices=["generic", "github.com", "altassian.net"], ) parser_dbquery.add_argument( "--task-priority", help="Scope to changes related to task priorities (comma separated)", ) parser_dbquery.add_argument( "--task-severity", help="Scope to changes related to task severities (comma separated)", ) parser_dbquery.add_argument( "--task-issue-type", help="Scope to changes related to task type (comma separated)", ) parser_dbquery.add_argument( "--task-score", help="Scope to changes related to task score '<op>: <val>'", ) args = parser.parse_args() logging.basicConfig( level=getattr(logging, args.loglevel.upper()), format="%(asctime)s - %(name)s - %(thread)d - %(threadName)s - " + "%(levelname)s - %(message)s", ) log = logging.getLogger(__name__) if not args.command: parser.print_usage() sys.exit(1) if args.command == "crawler": realpath = os.path.expanduser(args.config) if not os.path.isfile(realpath): log.error("Unable to access config: %s" % realpath) sys.exit(1) configdata = yaml.safe_load(open(realpath).read()) config.validate(configdata, config.schema) tpool: List[Union[Crawler, GroupCrawler]] = [] group = {} app = None if os.getenv("APP_ID") and os.getenv("APP_KEY_PATH"): app = application.get_app(os.getenv("APP_ID"), os.getenv("APP_KEY_PATH")) for tenant in configdata["tenants"]: idents_config = config.get_idents_config(configdata, tenant["index"]) for crawler_item in tenant.get("crawler", {}).get("github_orgs", []): tg = pullrequest.TokenGetter(crawler_item["name"], crawler_item.get("token"), app) github_c_args = pullrequest.GithubCrawlerArgs( command="github_crawler", org=crawler_item["name"], updated_since=crawler_item["updated_since"], loop_delay=tenant["crawler"]["loop_delay"], repository=crawler_item.get("repository"), base_url=utils.strip_url(crawler_item["base_url"]), token_getter=tg, db=ELmonocleDB( elastic_conn=args.elastic_conn, index=tenant["index"], timeout=args.elastic_timeout, user=args.elastic_user, password=args.elastic_password, use_ssl=args.use_ssl, verify_certs=args.insecure, ssl_show_warn=args.ssl_show_warn, ), idents_config=idents_config, ) gid = crawler_item.get("token") if not gid: if app: # No token, if we have a app then get the token from the app gid = app.get_token(org=crawler_item["name"]) else: log.info("Skip crawler because no token: %s" % github_c_args) continue if gid not in group: group[gid] = GroupCrawler() tpool.append(group[gid]) if github_c_args.repository: repositories = [github_c_args.repository] else: log.info("Discovering repositories in %s ..." % github_c_args.org) # No repository specified for that organization so # try to discover all of them rf = organization.RepositoriesFetcher( graphql.GithubGraphQLQuery(token_getter=tg)) repos = rf.get(github_c_args.org) repositories = [ repo["name"] for repo in repos if not repo["isArchived"] ] log.info("Found %s repositories in %s ..." % (len(repositories), github_c_args.org)) for repository in repositories: github_c_args.repository = repository group[gid].add_crawler(Runner(github_c_args)) for crawler_item in tenant.get("crawler", {}).get("gerrit_repositories", []): gerrit_c_args = review.GerritCrawlerArgs( command="gerrit_crawler", repository=crawler_item["name"], updated_since=crawler_item["updated_since"], loop_delay=tenant["crawler"]["loop_delay"], base_url=utils.strip_url(crawler_item["base_url"]), insecure=crawler_item.get("insecure", False), login=crawler_item.get("login"), password=crawler_item.get("password"), db=ELmonocleDB( elastic_conn=args.elastic_conn, index=tenant["index"], timeout=args.elastic_timeout, user=args.elastic_user, password=args.elastic_password, use_ssl=args.use_ssl, verify_certs=args.insecure, ssl_show_warn=args.ssl_show_warn, ), prefix=crawler_item.get("prefix"), idents_config=idents_config, ) tpool.append(Crawler(gerrit_c_args)) log.info("%d configured threads" % len(tpool)) for cthread in tpool: cthread.start() if args.command == "dbmanage": if args.update_idents and not args.config: log.error("Please provide the --config option") sys.exit(1) if args.update_idents: idents_config = config.get_idents_config( yaml.safe_load(open(args.config)), args.index) else: idents_config = [] db = ELmonocleDB( elastic_conn=args.elastic_conn, index=args.index, idents_config=idents_config, user=args.elastic_user, password=args.elastic_password, use_ssl=args.use_ssl, verify_certs=args.insecure, ssl_show_warn=args.ssl_show_warn, ) if args.delete_repository: db.delete_repository(args.delete_repository) if args.delete_index: db.delete_index() if args.update_idents: db.update_idents() if args.run_migrate: try: migrate.run_migrate(args.run_migrate, args.elastic_conn, args.index) except migrate.NotAvailableException: log.error("Error: %s is not a valid migration process" % args.run_migrate) if args.command == "dbquery": db = ELmonocleDB( elastic_conn=args.elastic_conn, index=args.index, user=args.elastic_user, password=args.elastic_password, use_ssl=args.use_ssl, verify_certs=args.insecure, ssl_show_warn=args.ssl_show_warn, ) params = utils.set_params(args) try: ret = db.run_named_query(args.name, args.repository.lstrip("^"), params) except UnknownQueryException as err: log.error("Unable to run query: %s" % err) sys.exit(1) pprint(ret)
def test_task_params(self): """ Test task related params """ params = set_params({"task_priority": "HIGH"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) params = set_params({"task_priority": "HIGH,MEDIUM,LOW"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({"task_type": "BUG"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({"task_type": "BUG,CLIENT_IMPACT"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({ "task_priority": "LOW", "task_type": "BUG,CLIENT_IMPACT" }) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) params = set_params({"task_priority": "HIGH"}) ret = self.eldb.run_named_query("changes_and_events", ".*", params) self.assertEqual(ret["total"], 2, ret) self.assertListEqual([o["id"] for o in ret["items"]], ["c1", "c1_e2"]) params = set_params({"task_score": "> 10"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) params = set_params({"task_score": ">= 10"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({"task_score": "< 10"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 0, ret) params = set_params({"task_score": "== 50"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) params = set_params({"task_score": "== 51"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 0, ret)
def test_self_merged_param(self): params = set_params({"state": "MERGED", "self_merged": True}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 1) self.assertEqual(ret["items"][0]["author"], ret["items"][0]["merged_by"])
def test_changes_lifecycle_stats(self): """ Test changes_lifecycle_stats query """ params = set_params({"gte": "2020-01-01", "lte": "2020-01-03"}) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) expected = { "ChangeCommitForcePushedEvent": {"authors_count": 0, "events_count": 0}, "ChangeCommitPushedEvent": {"authors_count": 1, "events_count": 1}, "ChangeCreatedEvent": {"authors_count": 2, "events_count": 2}, "abandoned": 0, "self_merged": 0, "commits": 1.0, "duration": 86400.0, "duration_variability": 0.0, "histos": { "ChangeAbandonedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitForcePushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitPushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 1, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), "ChangeCreatedEvent": ( [ { "doc_count": 1, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 1, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.6666666666666666, ), "ChangeMergedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), }, "merged": 1, "opened": 1, "ratios": { "abandoned/created": 0.0, "iterations/created": 1.5, "merged/created": 50.0, "self_merged/created": 0.0, }, "tests": 50.0, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params( {"gte": "2020-01-01", "lte": "2020-01-03", "authors": "john,jane"} ) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params( {"gte": "2020-01-01", "lte": "2020-01-03", "authors": "john"} ) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) expected = { "ChangeCommitForcePushedEvent": {"authors_count": 0, "events_count": 0}, "ChangeCommitPushedEvent": {"authors_count": 0, "events_count": 0}, "ChangeCreatedEvent": {"authors_count": 1, "events_count": 1}, "abandoned": 0, "self_merged": 0, "commits": 1.0, "duration": 86400.0, "duration_variability": 0.0, "histos": { "ChangeAbandonedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitForcePushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitPushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCreatedEvent": ( [ { "doc_count": 1, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), "ChangeMergedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), }, "merged": 1, "opened": 0, "ratios": { "abandoned/created": 0.0, "iterations/created": 1.0, "merged/created": 100.0, "self_merged/created": 0.0, }, "tests": 100.0, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def main(): parser_dbquery = argparse.ArgumentParser(prog=sys.argv[0]) parser_dbquery.add_argument( "--repository", help="Scope to events of a repository (regexp)", default=r".*") parser_dbquery.add_argument("--gte", help="Scope to events created after date") parser_dbquery.add_argument("--lte", help="Scope to events created before date") parser_dbquery.add_argument("--size", help="Return maximum of size results", default=1000) parser_dbquery.add_argument("--exclude-authors", help="Authors exclude list (comma separated)") args = parser_dbquery.parse_args() db = ELmonocleDB() params = utils.set_params(args) data = db.run_named_query("last_merged_changes", args.repository.lstrip("^"), params) lte_time = (datetime.datetime.strptime(args.lte, "%Y-%m-%d") + datetime.timedelta(days=1) if args.lte else None) title = {} for entry in data: # example: 2020-02-24T19:05:13Z created_time = datetime.datetime.strptime(entry["created_at"], "%Y-%m-%dT%H:%M:%SZ") merge_time = datetime.datetime.strptime(entry["merged_at"], "%Y-%m-%dT%H:%M:%SZ") if lte_time and merge_time > lte_time: continue print("%.0f|%s|A|/%s/%s|" % ( created_time.timestamp(), entry["author"], entry["repository_fullname"], entry["title"], )) print("%.0f|%s|M|/%s/%s|" % ( merge_time.timestamp(), entry["author"], entry["repository_fullname"], entry["title"], )) title[entry["repository_fullname_and_number"]] = entry["title"] params["etype"] = ("ChangeCommentedEvent", ) data = db.run_named_query("_scan", args.repository.lstrip("^"), params) for entry in data: # example: 2020-02-24T19:05:13Z created_time = datetime.datetime.strptime(entry["created_at"], "%Y-%m-%dT%H:%M:%SZ") try: print("%.0f|%s|M|/%s/%s|" % ( created_time.timestamp(), entry["author"], entry["repository_fullname"], title[entry["repository_fullname_and_number"]], )) except KeyError: print( "%s not merged" % entry["repository_fullname_and_number"], file=sys.stderr, )
def test_most_active_authors_stats(self): """ Test query: most_active_authors_stats """ params = set_params({}) ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params) expected = { "ChangeCommentedEvent": { "count_avg": 1, "count_median": 1.0, "items": [ {"doc_count": 1, "key": "jane"}, {"doc_count": 1, "key": "steve"}, ], "total": 2, "total_hits": 2, }, "ChangeCreatedEvent": { "count_avg": 1.3333333333333333, "count_median": 1, "items": [ {"doc_count": 2, "key": "jane"}, {"doc_count": 1, "key": "john"}, {"doc_count": 1, "key": "steve"}, ], "total": 3, "total_hits": 4, }, "ChangeMergedEvent": { "count_avg": 1, "count_median": 1, "items": [ {"doc_count": 1, "key": "jane"}, {"doc_count": 1, "key": "john"}, {"doc_count": 1, "key": "steve"}, ], "total": 3, "total_hits": 3, }, "ChangeReviewedEvent": { "count_avg": 1.3333333333333333, "count_median": 1, "items": [ {"doc_count": 2, "key": "john"}, {"doc_count": 1, "key": "jane"}, {"doc_count": 1, "key": "steve"}, ], "total": 3, "total_hits": 4, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params({"authors": "jane"}) ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params) expected = { "ChangeCommentedEvent": { "count_avg": 1, "count_median": 1, "items": [{"doc_count": 1, "key": "jane"}], "total": 1, "total_hits": 1, }, "ChangeCreatedEvent": { "count_avg": 2, "count_median": 2, "items": [{"doc_count": 2, "key": "jane"}], "total": 1, "total_hits": 2, }, "ChangeMergedEvent": { "count_avg": 1, "count_median": 1, "items": [{"doc_count": 1, "key": "jane"}], "total": 1, "total_hits": 1, }, "ChangeReviewedEvent": { "count_avg": 1, "count_median": 1, "items": [{"doc_count": 1, "key": "jane"}], "total": 1, "total_hits": 1, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def main(): parser = argparse.ArgumentParser(prog='monocle') parser.add_argument('--loglevel', help='logging level', default='INFO') parser.add_argument( '--elastic-timeout', help='Elasticsearch connection retry timeout', default=10, type=int, ) parser.add_argument('--elastic-conn', help='Elasticsearch connection info', default='localhost:9200') subparsers = parser.add_subparsers(title='Subcommands', description='valid subcommands', dest="command") parser_crawler = subparsers.add_parser('crawler', help='Threaded crawlers pool') parser_crawler.add_argument('--config', help='Configuration file of the crawlers pool', required=True) parser_dbmanage = subparsers.add_parser('dbmanage', help='Database manager') parser_dbmanage.add_argument( '--delete-repository', help='Delete events related to a repository (regexp)', required=True, ) parser_dbmanage.add_argument('--index', help='The Elastisearch index name', required=True) parser_dbquery = subparsers.add_parser( 'dbquery', help='Run an existsing query on stored events') parser_dbquery.add_argument('--index', help='The Elastisearch index name', required=True) parser_dbquery.add_argument('--name', help='The query name', required=True) parser_dbquery.add_argument( '--repository', help='Scope to events of repositories (regexp)', required=True) parser_dbquery.add_argument( '--target-branch', help='Scope to events of a target branches (regexp)') parser_dbquery.add_argument('--gte', help='Scope to events created after date') parser_dbquery.add_argument('--lte', help='Scope to events created before date') parser_dbquery.add_argument( '--on_cc_gte', help='Scope to events related to changes created after date') parser_dbquery.add_argument( '--on_cc_lte', help='Scope to events related to changes created before date') parser_dbquery.add_argument( '--ec-same-date', help='Scope to events related to changes created during the ' 'same date bondaries defined by gte/lte arguments', action='store_true', ) parser_dbquery.add_argument( '--type', help='Scope to events types list (comma separated)') parser_dbquery.add_argument( '--files', help='Scope to changes containing this file regexp') parser_dbquery.add_argument( '--state', help='Scope to changes having this state', choices=['OPEN', 'CLOSED', 'MERGED'], ) parser_dbquery.add_argument('--change-ids', help='Scope to change ids (comma separated)') parser_dbquery.add_argument('--authors', help='Scope to authors (comma separated)') parser_dbquery.add_argument('--approval', help='Scope to events with approval') parser_dbquery.add_argument('--size', help='Return maximum of size results', default=10) parser_dbquery.add_argument( '--from', help='Starting index of the elements to retrieve', default=0) parser_dbquery.add_argument('--exclude-authors', help='Authors exclude list (comma separated)') parser_dbquery.add_argument( '--tests-included', help='Scope to changes containing tests', action='store_true', ) parser_dbquery.add_argument( '--has-issue-tracker-links', help='Scope to changes containing an issue tracker link', choices=['generic', 'github.com', 'altassian.net'], ) args = parser.parse_args() logging.basicConfig( level=getattr(logging, args.loglevel.upper()), format="%(asctime)s - %(name)s - %(threadName)s - " + "%(levelname)s - %(message)s", ) log = logging.getLogger(__name__) if not args.command: parser.print_usage() return 1 if args.command == "crawler": realpath = os.path.expanduser(args.config) if not os.path.isfile(realpath): log.error('Unable to access config: %s' % realpath) sys.exit(1) configdata = yaml.safe_load(open(realpath).read()) validate(instance=configdata, schema=config.schema) tpool = [] group = {} for tenant in configdata['tenants']: for crawler_item in tenant['crawler'].get('github_orgs', []): c_args = pullrequest.GithubCrawlerArgs( command='github_crawler', index=tenant['index'], org=crawler_item['name'], updated_since=crawler_item['updated_since'], loop_delay=tenant['crawler']['loop_delay'], token=crawler_item['token'], repository=crawler_item.get('repository'), base_url=crawler_item['base_url'], ) log.info('args=%s' % c_args) if crawler_item['token'] not in group: group[crawler_item['token']] = GroupCrawler() tpool.append(group[crawler_item['token']]) group[crawler_item['token']].add_crawler( Runner( c_args, elastic_conn=args.elastic_conn, elastic_timeout=args.elastic_timeout, )) for crawler_item in tenant['crawler'].get('gerrit_repositories', []): c_args = review.GerritCrawlerArgs( command='gerrit_crawler', index=tenant['index'], repository=crawler_item['name'], updated_since=crawler_item['updated_since'], loop_delay=tenant['crawler']['loop_delay'], base_url=crawler_item['base_url'], ) tpool.append( Crawler( c_args, elastic_conn=args.elastic_conn, elastic_timeout=args.elastic_timeout, )) log.info('%d configured threads' % len(tpool)) for cthread in tpool: cthread.start() if args.command == "dbmanage": if args.delete_repository: db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index) db.delete_repository(args.delete_repository) if args.command == "dbquery": db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index) params = utils.set_params(args) try: ret = db.run_named_query(args.name, args.repository.lstrip('^'), params) except UnknownQueryException as err: log.error('Unable to run query: %s' % err) sys.exit(1) pprint(ret)
def main(): parser = argparse.ArgumentParser(prog='monocle') parser.add_argument('--loglevel', help='logging level', default='INFO') parser.add_argument( '--elastic-timeout', help='Elasticsearch connection retry timeout', default=10, type=int, ) parser.add_argument( '--elastic-conn', help='Elasticsearch connection info', default='localhost:9200' ) subparsers = parser.add_subparsers( title='Subcommands', description='valid subcommands', dest="command" ) parser_crawler = subparsers.add_parser('crawler', help='Threaded crawlers pool') parser_crawler.add_argument( '--config', help='Configuration file of the crawlers pool', required=True ) parser_dbmanage = subparsers.add_parser('dbmanage', help='Database manager') parser_dbmanage.add_argument( '--delete-repository', help='Delete events related to a repository (regexp)', ) parser_dbmanage.add_argument( '--delete-index', help='Delete the index', action='store_true', ) parser_dbmanage.add_argument( '--index', help='The Elastisearch index name', required=True ) parser_dbquery = subparsers.add_parser( 'dbquery', help='Run an existsing query on stored events' ) parser_dbquery.add_argument( '--index', help='The Elastisearch index name', required=True ) parser_dbquery.add_argument('--name', help='The query name', required=True) parser_dbquery.add_argument( '--repository', help='Scope to events of repositories (regexp)', required=True ) parser_dbquery.add_argument( '--target-branch', help='Scope to events of a target branches (regexp)' ) parser_dbquery.add_argument('--gte', help='Scope to events created after date') parser_dbquery.add_argument('--lte', help='Scope to events created before date') parser_dbquery.add_argument( '--on_cc_gte', help='Scope to events related to changes created after date' ) parser_dbquery.add_argument( '--on_cc_lte', help='Scope to events related to changes created before date' ) parser_dbquery.add_argument( '--ec-same-date', help='Scope to events related to changes created during the ' 'same date bondaries defined by gte/lte arguments', action='store_true', ) parser_dbquery.add_argument( '--type', help='Scope to events types list (comma separated)' ) parser_dbquery.add_argument( '--files', help='Scope to changes containing this file regexp' ) parser_dbquery.add_argument( '--state', help='Scope to changes having this state', choices=['OPEN', 'CLOSED', 'MERGED'], ) parser_dbquery.add_argument( '--change-ids', help='Scope to change ids (comma separated)' ) parser_dbquery.add_argument('--authors', help='Scope to authors (comma separated)') parser_dbquery.add_argument( '--approvals', help='Scope to objects with approvals (comma separated)' ) parser_dbquery.add_argument( '--exclude-approvals', help='Approvals exclude list (comma separated)' ) parser_dbquery.add_argument( '--size', help='Return maximum of size results', default=10 ) parser_dbquery.add_argument( '--from', help='Starting index of the elements to retrieve', default=0 ) parser_dbquery.add_argument( '--exclude-authors', help='Authors exclude list (comma separated)' ) parser_dbquery.add_argument( '--tests-included', help='Scope to changes containing tests', action='store_true', ) parser_dbquery.add_argument( '--has-issue-tracker-links', help='Scope to changes containing an issue tracker link', choices=['generic', 'github.com', 'altassian.net'], ) args = parser.parse_args() logging.basicConfig( level=getattr(logging, args.loglevel.upper()), format="%(asctime)s - %(name)s - %(thread)d - %(threadName)s - " + "%(levelname)s - %(message)s", ) log = logging.getLogger(__name__) if not args.command: parser.print_usage() return 1 if args.command == "crawler": realpath = os.path.expanduser(args.config) if not os.path.isfile(realpath): log.error('Unable to access config: %s' % realpath) sys.exit(1) configdata = yaml.safe_load(open(realpath).read()) validate(instance=configdata, schema=config.schema) tpool = [] group = {} app = None if os.getenv('APP_ID') and os.getenv('APP_KEY_PATH'): app = application.get_app(os.getenv('APP_ID'), os.getenv('APP_KEY_PATH')) for tenant in configdata['tenants']: for crawler_item in tenant['crawler'].get('github_orgs', []): tg = pullrequest.TokenGetter( crawler_item['name'], crawler_item.get('token'), app ) c_args = pullrequest.GithubCrawlerArgs( command='github_crawler', org=crawler_item['name'], updated_since=crawler_item['updated_since'], loop_delay=tenant['crawler']['loop_delay'], repository=crawler_item.get('repository'), base_url=crawler_item['base_url'], token_getter=tg, db=ELmonocleDB( elastic_conn=args.elastic_conn, index=tenant['index'], timeout=args.elastic_timeout, ), ) gid = crawler_item.get('token') if not gid: if app: # No token, if we have a app then get the token from the app gid = app.get_token(org=crawler_item['name']) else: log.info('Skip crawler because no token: %s' % c_args) continue if gid not in group: group[gid] = GroupCrawler() tpool.append(group[gid]) if c_args.repository: repositories = [c_args.repository] else: log.info('Discovering repositories in %s ...' % c_args.org) # No repository specified for that organization so # try to discover all of them rf = organization.RepositoriesFetcher( graphql.GithubGraphQLQuery(token_getter=tg) ) repos = rf.get(c_args.org) repositories = [ repo['name'] for repo in repos if not repo['isArchived'] ] log.info( 'Found %s repositories in %s ...' % (len(repositories), c_args.org) ) for repository in repositories: c_args.repository = repository group[gid].add_crawler(Runner(c_args)) for crawler_item in tenant['crawler'].get('gerrit_repositories', []): c_args = review.GerritCrawlerArgs( command='gerrit_crawler', repository=crawler_item['name'], updated_since=crawler_item['updated_since'], loop_delay=tenant['crawler']['loop_delay'], base_url=crawler_item['base_url'], insecure=crawler_item.get('insecure', False), login=crawler_item.get('login'), password=crawler_item.get('password'), db=ELmonocleDB( elastic_conn=args.elastic_conn, index=tenant['index'], timeout=args.elastic_timeout, ), ) tpool.append(Crawler(c_args)) log.info('%d configured threads' % len(tpool)) for cthread in tpool: cthread.start() if args.command == "dbmanage": db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index) if args.delete_repository: db.delete_repository(args.delete_repository) if args.delete_index: db.delete_index() if args.command == "dbquery": db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index) params = utils.set_params(args) try: ret = db.run_named_query(args.name, args.repository.lstrip('^'), params) except UnknownQueryException as err: log.error('Unable to run query: %s' % err) sys.exit(1) pprint(ret)
def query(name): repository_fullname = request.args.get('repository') params = utils.set_params(request.args) db = ELmonocleDB() result = db.run_named_query(name, repository_fullname, params) return jsonify(result)