def generate_file_fixes(self, data_service: DataService, changes: Sequence[Change], ) -> Iterator[FileFix]: """ Generate all data required for any type of further processing. Next processing can be comment generation or performance report generation. :param data_service: Connection to the Lookout data retrieval service. :param changes: The list of changes in the pointed state. :return: Iterator with unrendered data per comment. """ log = self._log base_files_by_lang = files_by_language(c.base for c in changes) head_files_by_lang = files_by_language(c.head for c in changes) processed_files_counter = defaultdict(int) processed_fixes_counter = defaultdict(int) for lang, head_files in head_files_by_lang.items(): if lang not in self.model: log.warning("skipped %d written in %s. Rules for %s do not exist in model", len(head_files), lang, lang) continue rules = self.model[lang] config = self.analyze_config[lang] rules = rules.filter_by_confidence(config["confidence_threshold"]) \ .filter_by_support(config["support_threshold"]) for file in filter_files(head_files, rules.origin_config["line_length_limit"], rules.origin_config["overall_size_limit"], log=log): processed_files_counter[lang] += 1 try: prev_file = base_files_by_lang[lang][file.path] except KeyError: prev_file = None lines = None else: lines = sorted(chain.from_iterable(( find_new_lines(prev_file, file), find_deleted_lines(prev_file, file), ))) log.debug("%s %s", file.path, lines) fe = FeatureExtractor(language=lang, **rules.origin_config["feature_extractor"]) feature_extractor_output = fe.extract_features([file], [lines]) if feature_extractor_output is None: submit_event("%s.analyze.%s.parse_failures" % (self.name, lang), 1) if config["report_parse_failures"]: log.warning("Failed to parse %s", file.path) yield FileFix(error="Failed to parse", head_file=file, language=lang, feature_extractor=fe, base_file=prev_file, file_vnodes=[], line_fixes=[], y_pred_pure=None, y=None) else: fixes, file_vnodes, y_pred_pure, y = self._generate_token_fixes( file, fe, feature_extractor_output, data_service.get_bblfsh(), rules) log.debug("%s %d fixes", file.path, len(fixes)) processed_fixes_counter[lang] += len(fixes) yield FileFix(error="", head_file=file, language=lang, feature_extractor=fe, base_file=prev_file, file_vnodes=file_vnodes, line_fixes=fixes, y_pred_pure=y_pred_pure, y=y) for key, val in processed_files_counter.items(): submit_event("%s.analyze.%s.files" % (self.name, key), val) for key, val in processed_fixes_counter.items(): submit_event("%s.analyze.%s.fixes" % (self.name, key), val)
class DataRequestsTests(unittest.TestCase, EventHandlers): COMMIT_FROM = "3ac2a59275902f7252404d26680e30cc41efb837" COMMIT_TO = "dce7fcba3d2151a0d5dc4b3a89cfc0911c96cf2b" def setUp(self): self.setUpEvent = threading.Event() self.tearDownEvent = threading.Event() self.port = find_port() self.lookout_sdk = LookoutSDK() self.listener = EventListener("localhost:%d" % self.port, self).start() self.server_thread = threading.Thread(target=self.run_data_service) self.server_thread.start() self.data_service = DataService("localhost:10301") self.url = "file://" + str( Path(lookout.core.__file__).parent.parent.absolute()) self.ref = "refs/heads/master" self.setUpWasSuccessful = True self.setUpEvent.wait() if not self.setUpWasSuccessful: self.fail("failed to setUp()") def tearDown(self): self.data_service.shutdown() self.tearDownEvent.set() self.listener.stop() self.server_thread.join() def process_review_event(self, request: ReviewEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def process_push_event(self, request: PushEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def run_data_service(self): try: self.lookout_sdk.push(self.COMMIT_FROM, self.COMMIT_TO, self.port, git_dir=os.getenv( "LOOKOUT_SDK_ML_TESTS_GIT_DIR", ".")) except Exception as e: print(type(e).__name__, e) self.setUpWasSuccessful = False self.setUpEvent.set() def test_with_changed_uasts(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(change.base.content, b"") self.assertEqual(change.head.content, b"") self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_changed_uasts_rpc_error(self): called = False def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): nonlocal called called = True def fail(f): def wrapped(): f() self.assertIsNotNone( self.data_service._data_request_local.channel) raise grpc.RpcError() return wrapped self.data_service._get_channel = fail(self.data_service._get_channel) func = with_changed_uasts(unicode=False)(func) self.assertRaises( grpc.RpcError, func, self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) self.assertFalse(called) self.assertIsNone(self.data_service._data_request_local.channel) def test_with_changed_contents(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(len(change.base.content), 5548) self.assertEqual(len(change.head.content), 5542) self.assertFalse(change.base.uast.children) self.assertFalse(change.head.uast.children) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_changed_uasts_and_contents(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(len(change.base.content), 5548) self.assertEqual(len(change.head.content), 5542) self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts_and_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_uasts(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: self.assertEqual(file.content, b"") self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_with_uasts_rpc_error(self): called = False def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): nonlocal called called = True def fail(f): def wrapped(): f() self.assertIsNotNone( self.data_service._data_request_local.channel) raise grpc.RpcError() return wrapped self.data_service._get_channel = fail(self.data_service._get_channel) func = with_uasts(unicode=False)(func) self.assertRaises(grpc.RpcError, func, self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) self.assertFalse(called) self.assertIsNone(self.data_service._data_request_local.channel) def test_with_contents(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) non_empty_langs = 0 for file in files: if not file.path.endswith("__init__.py"): self.assertGreater(len(file.content), 0, file.path) self.assertFalse(file.uast.children) self.assertTrue(file.path) if file.language: non_empty_langs += 1 self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) self.assertGreater(non_empty_langs, 0) func = with_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_with_uasts_and_contents(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: if not file.path.endswith("__init__.py"): self.assertGreater(len(file.content), 0, file.path) self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts_and_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_babelfish(self): uast, errors = parse_uast(self.data_service.get_bblfsh(), "console.log('hi');", "hi.js", unicode=False) self.assertIsInstance(uast, bblfsh.Node) self.assertEqual(len(errors), 0, str(errors)) def test_check_bblfsh_driver_versions(self): self.assertRaises(UnsatisfiedDriverVersionError, self.data_service.check_bblfsh_driver_versions, ["brainfuck>=1.0"]) self.assertRaises(UnsatisfiedDriverVersionError, self.data_service.check_bblfsh_driver_versions, ["javascript<1.0"]) self.data_service.check_bblfsh_driver_versions( ["javascript>=1.3.0,<10.0"]) def test_with_changed_uasts_unicode(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(change.base.content, "") self.assertEqual(change.head.content, "") self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts(unicode=True)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_uasts_unicode(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: self.assertIsInstance(file, UnicodeFile) self.assertEqual(file.content, "") self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts(unicode=True)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_babelfish_unicode(self): content = b"console.log('\xc3\x80');" uast_uni, errors_uni = parse_uast(self.data_service.get_bblfsh(), content.decode(), "test.js", unicode=True) uast, errors = parse_uast(self.data_service.get_bblfsh(), content.decode(), "test.js", unicode=False) self.assertIsInstance(uast, bblfsh.Node) self.assertIsInstance(uast_uni, bblfsh.Node) self.assertEqual(errors_uni, errors) check_uast_transformation(self, content, uast, uast_uni)