def test_baseline_benchmark_j1_single_working_checker(self, benchmark): """Establish a baseline of single-worker performance for PyLinter Here we mimic a single Checker that does some work so that we can see the impact of running a simple system with -j1 against the same system with -j10. We expect this benchmark to take very close to `numfiles*SleepingChecker.sleep_duration` """ if benchmark.disabled: benchmark(print, "skipping, do not want to sleep in main tests") return # _only_ run this test is profiling linter = PyLinter(reporter=Reporter()) linter.register_checker(SleepingChecker(linter)) # Check the same number of files as # `test_baseline_benchmark_j10_single_working_checker` fileinfos = [self.empty_filepath for _ in range(10)] assert linter.config.jobs == 1 assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_baseline_benchmark_j10_single_working_checker(self, benchmark): """Establishes baseline of multi-worker performance for PyLinter/check_parallel We expect this benchmark to take less time that test_baseline_benchmark_j1, `error_margin*(1/J)*(numfiles*SleepingChecker.sleep_duration)` Because of the cost of the framework and system the performance difference will *not* be 1/10 of -j1 versions. """ if benchmark.disabled: benchmark(print, "skipping, do not want to sleep in main tests") return # _only_ run this test is profiling linter = PyLinter(reporter=Reporter()) linter.config.jobs = 10 linter.register_checker(SleepingChecker(linter)) # Check the same number of files as # `test_baseline_benchmark_j1_single_working_checker` fileinfos = [self.empty_filepath for _ in range(10)] assert linter.config.jobs == 10 assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction( tmpdir: LocalPath, ) -> None: """Check that the pylint runner does not crash if `pylint.lint.run._query_cpu` determines only a fraction of a CPU core to be available. """ builtin_open = open def _mock_open(*args, **kwargs): if args[0] == "/sys/fs/cgroup/cpu/cpu.cfs_quota_us": return mock_open(read_data=b"-1")(*args, **kwargs) if args[0] == "/sys/fs/cgroup/cpu/cpu.shares": return mock_open(read_data=b"2")(*args, **kwargs) return builtin_open(*args, **kwargs) pathlib_path = pathlib.Path def _mock_path(*args, **kwargs): if args[0] == "/sys/fs/cgroup/cpu/cpu.shares": return MagicMock(is_file=lambda: True) return pathlib_path(*args, **kwargs) filepath = os.path.abspath(__file__) testargs = [filepath, "--jobs=0"] with tmpdir.as_cwd(): with pytest.raises(SystemExit) as err: with patch("builtins.open", _mock_open): with patch("pylint.lint.run.Path", _mock_path): Run(testargs, reporter=Reporter()) assert err.value.code == 0
def test_worker_check_single_file_no_checkers(self): linter = PyLinter(reporter=Reporter()) worker_initialize(linter=linter) (name, _, _, msgs, stats, msg_status) = worker_check_single_file(_gen_file_data()) assert name == "--test-file_data-name-0--" assert [] == msgs no_errors_status = 0 assert no_errors_status == msg_status assert { "by_module": { "--test-file_data-name-0--": { "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } }, "by_msg": {}, "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } == stats
def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark): """Runs a single file, with -j1, against all checkers/Extensions.""" args = [self.empty_filepath, "--enable=all", "--enable-all-extensions"] runner = benchmark(Run, args, reporter=Reporter(), exit=False) assert runner.linter.config.jobs == 1 print("len(runner.linter._checkers)", len(runner.linter._checkers)) assert len(runner.linter._checkers) > 1, "Should have more than 'main'" assert ( runner.linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(runner.linter.reporter.messages)}"
def test_baseline_benchmark_check_parallel_j10(self, benchmark): """Should demonstrate times very close to `test_baseline_benchmark_j10`""" linter = PyLinter(reporter=Reporter()) # Create file per worker, using all workers fileinfos = [self.empty_file_info for _ in range(linter.config.jobs)] assert len(linter._checkers) == 1, "Should have 'master'" benchmark(check_parallel, linter, jobs=10, files=fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_run(tmp_path, name, git_repo): """ Runs pylint against external sources """ checkoutdir = tmp_path / name checkoutdir.mkdir() os.system(f"git clone --depth=1 {git_repo} {checkoutdir}") filepaths = _get_py_files(scanpath=str(checkoutdir)) print("Have %d files" % len(filepaths)) runner = Run(filepaths, reporter=Reporter(), do_exit=False) print("Had %d files with %d messages" % (len(filepaths), len(runner.linter.reporter.messages))) pprint.pprint(runner.linter.reporter.messages)
def test_worker_initialize_pickling(self) -> None: """Test that we can pickle objects that standard pickling in multiprocessing can't. See: https://stackoverflow.com/questions/8804830/python-multiprocessing-picklingerror-cant-pickle-type-function https://github.com/PyCQA/pylint/pull/5584 """ linter = PyLinter(reporter=Reporter()) linter.attribute = argparse.ArgumentParser( ) # type: ignore[attr-defined] with multiprocessing.Pool(2, initializer=worker_initialize, initargs=[dill.dumps(linter)]) as pool: pool.imap_unordered(print, [1, 2])
def test_baseline_benchmark_j1(self, benchmark): """Establish a baseline of pylint performance with no work We will add extra Checkers in other benchmarks. Because this is so simple, if this regresses something very serious has happened """ linter = PyLinter(reporter=Reporter()) fileinfos = [self.empty_filepath] # Single file to end-to-end the system assert linter.config.jobs == 1 assert len(linter._checkers) == 1, "Should just have 'master'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark): """Runs a single file, with -j1, against all plug-ins ... that's the intent at least. """ # Just 1 file, but all Checkers/Extensions fileinfos = [self.empty_filepath] runner = benchmark(Run, fileinfos, reporter=Reporter(), exit=False) assert runner.linter.config.jobs == 1 print("len(runner.linter._checkers)", len(runner.linter._checkers)) assert len(runner.linter._checkers) > 1, "Should have more than 'master'" assert ( runner.linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(runner.linter.reporter.messages)}"
def test_baseline_lots_of_files_j1(self, benchmark): """Establish a baseline with only 'master' checker being run in -j1 We do not register any checkers except the default 'master', so the cost is just that of the system with a lot of files registerd""" if benchmark.disabled: benchmark(print, "skipping, only benchmark large file counts") return # _only_ run this test is profiling linter = PyLinter(reporter=Reporter()) linter.config.jobs = 1 fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] assert linter.config.jobs == 1 assert len(linter._checkers) == 1, "Should have 'master'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_worker_check_sequential_checker(self): """Same as test_worker_check_single_file_no_checkers with SequentialTestChecker""" linter = PyLinter(reporter=Reporter()) worker_initialize(linter=linter) # Add the only checker we care about in this test linter.register_checker(SequentialTestChecker(linter)) ( _, # proc-id name, _, # file_path _, # base_name msgs, stats, msg_status, _, # mapreduce_data ) = worker_check_single_file(_gen_file_data()) # Ensure we return the same data as the single_file_no_checkers test assert name == "--test-file_data-name-0--" assert [] == msgs no_errors_status = 0 assert no_errors_status == msg_status assert { "by_module": { "--test-file_data-name-0--": { "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } }, "by_msg": {}, "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } == stats
def test_baseline_lots_of_files_j10(self, benchmark): """ Establish a baseline with only 'master' checker being run in -j10 As with the -j1 variant above `test_baseline_lots_of_files_j1`, we do not register any checkers except the default 'master', so the cost is just that of the check_parallel system across 10 workers, plus the overhead of PyLinter """ if benchmark.disabled: benchmark(print, "skipping, only benchmark large file counts") return # _only_ run this test is profiling linter = PyLinter(reporter=Reporter()) linter.config.jobs = 10 fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] assert linter.config.jobs == 10 assert len(linter._checkers) == 1, "Should have 'master'" benchmark(linter.check, fileinfos) assert linter.msg_status == 0, ( "Expected no errors to be thrown: %s" % pprint.pformat(linter.reporter.messages))
def test_baseline_lots_of_files_j10_empty_checker(self, benchmark): """Baselines pylint for a single extra checker being run in -j10, for N-files We use a checker that does no work, so the cost is just that of the system at scale, across workers""" if benchmark.disabled: benchmark(print, "skipping, only benchmark large file counts") return # _only_ run this test is profiling linter = PyLinter(reporter=Reporter()) linter.config.jobs = 10 linter.register_checker(NoWorkChecker(linter)) fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] assert linter.config.jobs == 10 assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_map_reduce(self, num_files, num_jobs, num_checkers): """Compares the 3 key parameters for check_parallel() produces the same results. The intent here is to validate the reduce step: no stats should be lost. Checks regression of https://github.com/PyCQA/pylint/issues/4118 """ # define the stats we expect to get back from the runs, these should only vary # with the number of files. file_infos = _gen_file_datas(num_files) # Loop for single-proc and mult-proc, so we can ensure the same linter-config for do_single_proc in range(2): linter = PyLinter(reporter=Reporter()) # Assign between 1 and 3 checkers to the linter, they should not change the # results of the lint linter.register_checker(ParallelTestChecker(linter)) if num_checkers > 1: linter.register_checker(ExtraParallelTestChecker(linter)) if num_checkers > 2: linter.register_checker(ThirdParallelTestChecker(linter)) if do_single_proc: # establish the baseline assert (linter.namespace.jobs == 1 ), "jobs>1 are ignored when calling _check_files" linter._check_files(linter.get_ast, file_infos) stats_single_proc = linter.stats else: check_parallel( linter, jobs=num_jobs, files=file_infos, arguments=None, ) stats_check_parallel = linter.stats assert str(stats_single_proc.by_msg) == str( stats_check_parallel.by_msg ), "Single-proc and check_parallel() should return the same thing"
def test_invoke_single_job(self): """Tests basic checkers functionality using just a single workderdo This is *not* the same -j1 and does not happen under normal operation""" linter = PyLinter(reporter=Reporter()) linter.register_checker(SequentialTestChecker(linter)) # Create a dummy file, the actual contents of which will be ignored by the # register test checkers, but it will trigger at least a single-job to be run. single_file_container = _gen_file_datas(count=1) # Invoke the lint process in a multiprocess way, although we only specify one # job. check_parallel(linter, jobs=1, files=single_file_container, arguments=None) assert { "by_module": { "--test-file_data-name-0--": { "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } }, "by_msg": collections.Counter(), "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } == linter.stats assert linter.msg_status == 0, "We expect a single-file check to exit cleanly"
def test_baseline_benchmark_j10(self, benchmark): """Establish a baseline of pylint performance with no work across threads Same as `test_baseline_benchmark_j1` but we use -j10 with 10 fake files to ensure end-to-end-system invoked. Because this is also so simple, if this regresses something very serious has happened. """ linter = PyLinter(reporter=Reporter()) linter.config.jobs = 10 # Create file per worker, using all workers fileinfos = [self.empty_filepath for _ in range(linter.config.jobs)] assert linter.config.jobs == 10 assert len(linter._checkers) == 1, "Should have 'master'" benchmark(linter.check, fileinfos) assert ( linter.msg_status == 0 ), f"Expected no errors to be thrown: {pprint.pformat(linter.reporter.messages)}"
def test_worker_check_single_file_no_checkers(self) -> None: linter = PyLinter(reporter=Reporter()) worker_initialize(linter=dill.dumps(linter)) ( _, # proc-id name, _, # file_path _, # base_name msgs, stats, msg_status, _, # mapreduce_data ) = worker_check_single_file(_gen_file_data()) assert name == "--test-file_data-name-0--" assert not msgs no_errors_status = 0 assert no_errors_status == msg_status assert { "--test-file_data-name-0--": { "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } } == stats.by_module assert not stats.by_msg assert stats.convention == 0 assert stats.error == 0 assert stats.fatal == 0 assert stats.info == 0 assert stats.refactor == 0 assert stats.statement == 18 assert stats.warning == 0
def test_compare_workers_to_single_proc(self, num_files, num_jobs, num_checkers): """Compares the 3 key parameters for check_parallel() produces the same results. The intent here is to ensure that the check_parallel() operates on each file, without ordering issues, irrespective of the number of workers used and the number of checkers applied. This test becomes more important if we want to change how we parametrise the checkers, for example if we aim to batch the files across jobs. """ # define the stats we expect to get back from the runs, these should only vary # with the number of files. expected_stats = LinterStats( by_module={ # pylint: disable-next=consider-using-f-string "--test-file_data-name-%d--" % idx: ModuleStats( convention=0, error=0, fatal=0, info=0, refactor=0, statement=18, warning=0, ) for idx in range(num_files) }) expected_stats.by_msg = {} expected_stats.convention = 0 expected_stats.error = 0 expected_stats.fatal = 0 expected_stats.info = 0 expected_stats.refactor = 0 expected_stats.statement = 18 * num_files expected_stats.warning = 0 file_infos = _gen_file_datas(num_files) # Loop for single-proc and mult-proc, so we can ensure the same linter-config for do_single_proc in range(2): linter = PyLinter(reporter=Reporter()) # Assign between 1 and 3 checkers to the linter, they should not change the # results of the lint linter.register_checker(SequentialTestChecker(linter)) if num_checkers > 1: linter.register_checker(ExtraSequentialTestChecker(linter)) if num_checkers > 2: linter.register_checker(ThirdSequentialTestChecker(linter)) if do_single_proc: # establish the baseline assert (linter.namespace.jobs == 1 ), "jobs>1 are ignored when calling _check_files" linter._check_files(linter.get_ast, file_infos) assert linter.msg_status == 0, "We should not fail the lint" stats_single_proc = linter.stats else: check_parallel( linter, jobs=num_jobs, files=file_infos, arguments=None, ) stats_check_parallel = linter.stats assert linter.msg_status == 0, "We should not fail the lint" assert str(stats_single_proc) == str( stats_check_parallel ), "Single-proc and check_parallel() should return the same thing" assert str(stats_check_parallel) == str( expected_stats ), "The lint is returning unexpected results, has something changed?"
def test_sequential_checkers_work(self) -> None: """Tests original basic types of checker works as expected in -jN. This means that a sequential checker should return the same data for a given file-stream irrespective of whether it's run in -j1 or -jN """ linter = PyLinter(reporter=Reporter()) # Add a sequential checker to ensure it records data against some streams linter.register_checker(SequentialTestChecker(linter)) # Create a dummy file, the actual contents of which will be ignored by the # register test checkers, but it will trigger at least a single-job to be run. single_file_container = _gen_file_datas(count=1) # Invoke the lint process in a multiprocess way, although we only specify one # job. check_parallel( linter, jobs=1, files=iter(single_file_container), arguments=["--enable", "R9999"], ) assert len(linter.get_checkers()) == 2, ( "We should only have the 'master' and 'sequential-checker' " "checkers registered") assert { "--test-file_data-name-0--": { "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } } == linter.stats.by_module assert not linter.stats.by_msg assert linter.stats.convention == 0 assert linter.stats.error == 0 assert linter.stats.fatal == 0 assert linter.stats.info == 0 assert linter.stats.refactor == 0 assert linter.stats.statement == 18 assert linter.stats.warning == 0 # now run the regular mode of checking files and check that, in this proc, we # collect the right data filepath = [single_file_container[0][1]] # get the filepath element linter.check(filepath) assert { "input.similar1": { # module is the only change from previous "convention": 0, "error": 0, "fatal": 0, "info": 0, "refactor": 0, "statement": 18, "warning": 0, } } == linter.stats.by_module assert not linter.stats.by_msg assert linter.stats.convention == 0 assert linter.stats.error == 0 assert linter.stats.fatal == 0 assert linter.stats.info == 0 assert linter.stats.refactor == 0 assert linter.stats.statement == 18 assert linter.stats.warning == 0
def test_worker_initialize(self) -> None: linter = PyLinter(reporter=Reporter()) worker_initialize(linter=dill.dumps(linter)) assert isinstance(pylint.lint.parallel._worker_linter, type(linter))
def test_get_map_data(): """Tests that a SimilarChecker respects the MapReduceMixin interface""" linter = PyLinter(reporter=Reporter()) # Add a parallel checker to ensure it can map and reduce linter.register_checker(similar.SimilarChecker(linter)) source_streams = ( str(INPUT / "similar_lines_a.py"), str(INPUT / "similar_lines_b.py"), ) expected_linelists = ( ( "", "", "", "", "", "", "def adipiscing(elit):", 'etiam = "id"', 'dictum = "purus,"', 'vitae = "pretium"', 'neque = "Vivamus"', 'nec = "ornare"', 'tortor = "sit"', "return etiam, dictum, vitae, neque, nec, tortor", "", "", "class Amet:", "def similar_function_3_lines(self, tellus):", "agittis = 10", "tellus *= 300", "return agittis, tellus", "", "def lorem(self, ipsum):", 'dolor = "sit"', 'amet = "consectetur"', "return (lorem, dolor, amet)", "", "def similar_function_5_lines(self, similar):", "some_var = 10", "someother_var *= 300", 'fusce = "sit"', 'amet = "tortor"', "return some_var, someother_var, fusce, amet", "", 'def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"):', 'metus = "ut"', 'lobortis = "urna."', 'Integer = "nisl"', '(mauris,) = "interdum"', 'non = "odio"', 'semper = "aliquam"', 'malesuada = "nunc."', 'iaculis = "dolor"', 'facilisis = "ultrices"', 'vitae = "ut."', "", "return (", "metus,", "lobortis,", "Integer,", "mauris,", "non,", "semper,", "malesuada,", "iaculis,", "facilisis,", "vitae,", ")", "", "def similar_function_3_lines(self, tellus):", "agittis = 10", "tellus *= 300", "return agittis, tellus", ), ( "", "", "", "", "", "", "", "class Nulla:", 'tortor = "ultrices quis porta in"', 'sagittis = "ut tellus"', "", "def pulvinar(self, blandit, metus):", "egestas = [mauris for mauris in zip(blandit, metus)]", "neque = (egestas, blandit)", "", "def similar_function_5_lines(self, similar):", "some_var = 10", "someother_var *= 300", 'fusce = "sit"', 'amet = "tortor"', 'iaculis = "dolor"', "return some_var, someother_var, fusce, amet, iaculis, iaculis", "", "", "def tortor(self):", "ultrices = 2", 'quis = ultricies * "porta"', "return ultricies, quis", "", "", "class Commodo:", "def similar_function_3_lines(self, tellus):", "agittis = 10", "tellus *= 300", 'laoreet = "commodo "', "return agittis, tellus, laoreet", ), ) data = [] # Manually perform a 'map' type function for source_fname in source_streams: sim = similar.SimilarChecker(linter) with open(source_fname) as stream: sim.append_stream(source_fname, stream) # The map bit, can you tell? ;) data.extend(sim.get_map_data()) assert len(expected_linelists) == len(data) for source_fname, expected_lines, lineset_obj in zip( source_streams, expected_linelists, data ): assert source_fname == lineset_obj.name # There doesn't seem to be a faster way of doing this, yet. lines = (line for idx, line in lineset_obj.enumerate_stripped()) assert tuple(expected_lines) == tuple(lines)
def test_worker_initialize(self): linter = PyLinter(reporter=Reporter()) worker_initialize(linter=linter) assert pylint.lint.parallel._worker_linter == linter