def test_threadpool_limits_by_prefix(openblas_present, mkl_present, prefix): original_infos = threadpool_info() mkl_found = any([True for info in original_infos if info["prefix"] in ('mkl_rt', 'libmkl_rt')]) prefix_found = len([info["prefix"] for info in original_infos if info["prefix"] == prefix]) if not prefix_found: if "mkl_rt" in prefix and mkl_present and not mkl_found: raise RuntimeError("Could not load the MKL prefix") elif prefix == "libopenblas" and openblas_present: raise RuntimeError("Could not load the OpenBLAS prefix") else: pytest.skip("{} runtime missing".format(prefix)) with threadpool_limits(limits={prefix: 1}): for module in threadpool_info(): if is_old_openblas(module): continue if module["prefix"] == prefix: assert module["num_threads"] == 1 with threadpool_limits(limits={prefix: 3}): for module in threadpool_info(): if is_old_openblas(module): continue if module["prefix"] == prefix: assert module["num_threads"] <= 3 assert threadpool_info() == original_infos
def test_set_threadpool_limits_no_limit(): # Check that limits=None does nothing. original_infos = threadpool_info() with threadpool_limits(limits=None): assert threadpool_info() == original_infos assert threadpool_info() == original_infos
def test_profiling_disables_threadpools(tmpdir): """ Memory profiling disables thread pools, then restores them when done. """ cwd = os.getcwd() os.chdir(tmpdir) import numexpr import blosc numexpr.set_num_threads(3) blosc.set_nthreads(3) with threadpoolctl.threadpool_limits(3, "blas"): with run_with_profile(): assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d # Resets when done: assert numexpr.set_num_threads(2) == 3 assert blosc.set_nthreads(2) == 3 for d in threadpoolctl.threadpool_info(): if d["user_api"] == "blas": assert d["num_threads"] == 3, d
def test_set_threadpool_limits_by_api(user_api): # Check that the number of threads used by the multithreaded libraries can # be modified dynamically. if user_api is None: user_apis = ("blas", "openmp") else: user_apis = (user_api,) original_infos = threadpool_info() with threadpool_limits(limits=1, user_api=user_api): for module in threadpool_info(): if is_old_openblas(module): continue if module["user_api"] in user_apis: assert module["num_threads"] == 1 with threadpool_limits(limits=3, user_api=user_api): for module in threadpool_info(): if is_old_openblas(module): continue if module["user_api"] in user_apis: assert module["num_threads"] <= 3 assert threadpool_info() == original_infos
def test_openmp_nesting(nthreads_outer): # checks that OpenMP effectively uses the number of threads requested by # the context manager from ._openmp_test_helper import check_nested_openmp_loops from ._openmp_test_helper import get_inner_compiler from ._openmp_test_helper import get_outer_compiler inner_cc = get_inner_compiler() outer_cc = get_outer_compiler() outer_num_threads, inner_num_threads = check_nested_openmp_loops(10) original_infos = threadpool_info() openmp_infos = [info for info in original_infos if info["user_api"] == "openmp"] if "gcc" in (inner_cc, outer_cc): assert "libgomp" in [info["prefix"] for info in openmp_infos] if "clang" in (inner_cc, outer_cc): assert "libomp" in [info["prefix"] for info in openmp_infos] if inner_cc == outer_cc: # The openmp runtime should be shared by default, meaning that # the inner loop should automatically be run serially by the OpenMP # runtime. assert inner_num_threads == 1 else: # There should be at least 2 OpenMP runtime detected. assert len(openmp_infos) >= 2 with threadpool_limits(limits=1) as threadpoolctx: max_threads = threadpoolctx.get_original_num_threads()['openmp'] nthreads = effective_num_threads(nthreads_outer, max_threads) outer_num_threads, inner_num_threads = \ check_nested_openmp_loops(10, nthreads) # The state of the original state of all threadpools should have been # restored. assert threadpool_info() == original_infos # The number of threads available in the outer loop should not have been # decreased: assert outer_num_threads == nthreads # The number of threads available in the inner loop should have been # set to 1 so avoid oversubscription and preserve performance: if inner_cc != outer_cc: if inner_num_threads != 1: # XXX: this does not always work when nesting independent openmp # implementations. See: https://github.com/jeremiedbb/Nested_OpenMP pytest.xfail("Inner OpenMP num threads was %d instead of 1" % inner_num_threads) assert inner_num_threads == 1
def test_command_line_import_flag(): result = subprocess.run( [ sys.executable, "-m", "threadpoolctl", "-i", "numpy", "scipy.linalg", "invalid_package", "numpy.invalid_sumodule", ], capture_output=True, check=True, encoding="utf-8", ) cli_info = json.loads(result.stdout) this_process_info = threadpool_info() for lib_info in cli_info: assert lib_info in this_process_info warnings = [w.strip() for w in result.stderr.splitlines()] assert "WARNING: could not import invalid_package" in warnings assert "WARNING: could not import numpy.invalid_sumodule" in warnings if scipy is None: assert "WARNING: could not import scipy.linalg" in warnings else: assert "WARNING: could not import scipy.linalg" not in warnings
def set_numpy_threads(num_threads): # Determine the BLAS implementation in use - e.g. MKL (Intel), OpenBLAS, etc. info = threadpool_info() # Set the threads based on the library available. set_threads = 0 blas_implementation = None for lib in info: filepath = lib['filepath'] base_filepath = os.path.basename(filepath) if 'mkl' in base_filepath: blas_implementation = 'mkl' set_threads = _set_mkl_numpy_threads(filepath, num_threads) break elif 'openblas' in base_filepath: blas_implementation = 'openblas' set_threads = _set_openblas_numpy_threads(filepath, num_threads) break else: blas_implementation = 'unknown' return blas_implementation, set_threads
def test_mkl_set_num_threads(): # Determine the BLAS implementation in use - e.g. MKL (Intel), OpenBLAS, etc. info = threadpool_info() num_threads = 2 mkl_path = None for lib in info: filepath = lib['filepath'] base_filepath = os.path.basename(filepath) if 'mkl' in base_filepath: mkl_path = filepath[:] break if mkl_path is not None: # Traditional UNIX-like systems will have shared objects available. if 'bsd' in sys.platform or 'lin' in sys.platform: mkl_rt = ctypes.CDLL(mkl_path) # Darwin / Apple uses `*.dylib` by default for included Intel compiler libraries. # Traditional UNIX-like shared objects can be created (`*.so`), but are more # represented in third-party libraries. This is a more dynamic way of finding # the MKL library and using it on a Mac that has an Intel compiler installed. elif sys.platform == 'darwin': mkl_rt = ctypes.CDLL(mkl_path) ssutils.mkl_set_num_threads(num_threads) num_threads_set = mkl_rt.mkl_get_max_threads() assert num_threads_set == num_threads else: # While it may seem to be a good idea to raise an error at this point, # doing so puts the user in a catch-22, therefor no error should be # generated. A warning should be printed. print('Warning: No Intel BLAS implementation found.')
def test_threadpool_limits_public_api(): # Check consistency between threadpool_info and _ThreadpoolInfo public_info = threadpool_info() private_info = _threadpool_info() for module1, module2 in zip(public_info, private_info): assert module1 == module2.todict()
def test_threadpool_info(): # Check consistency between threadpool_info and ThreadpoolController function_info = threadpool_info() object_info = ThreadpoolController().lib_controllers for lib_info, lib_controller in zip(function_info, object_info): assert lib_info == lib_controller.info()
def test_threadpool_limits_function_with_side_effect(): # Check that threadpool_limits can be used as a function with # side effects instead of a context manager. original_infos = threadpool_info() threadpool_limits(limits=1) try: for module in threadpool_info(): if is_old_openblas(module): continue assert module["num_threads"] == 1 finally: # Restore the original limits so that this test does not have any # side-effect. threadpool_limits(limits=original_infos) assert threadpool_info() == original_infos
def test_threadpool_limits_manual_unregister(): # Check that threadpool_limits can be used as an object with that hold # the original state of the threadpools that can be restored thanks to the # dedicated unregister method original_infos = threadpool_info() limits = threadpool_limits(limits=1) try: for module in threadpool_info(): if is_old_openblas(module): continue assert module["num_threads"] == 1 finally: # Restore the original limits so that this test does not have any # side-effect. limits.unregister() assert threadpool_info() == original_infos
def check_update(self): import threadpoolctl new_info = threadpoolctl.threadpool_info() if new_info != self._info: self._stable = 0 self._controller = threadpoolctl.ThreadpoolController() self._info = new_info else: self._stable += 1
def test_command_line_command_flag(): pytest.importorskip("numpy") output = subprocess.check_output( [sys.executable, "-m", "threadpoolctl", "-c", "import numpy"]) cli_info = json.loads(output.decode("utf-8")) this_process_info = threadpool_info() for lib_info in cli_info: assert lib_info in this_process_info
def check_openblas_threads(): 'make sure openblas is running single-threaded' d = threadpool_info() #pprint(d) assert len(d) > 0, f"numpy didn't use blas api?: {d}" for entry in d: assert entry['num_threads'] == 1, "expected single thread libraries (export OPENBLAS_NUM_THREADS=1 and " + \ "export OMP_NUM_THREADS=1 or use Settings.CHECK_SINGLE_THREAD_BLAS" + \ f" = False to skip): {entry}\nAll entries:{d}"
def test_nested_prange_blas(nthreads_outer): import numpy as np blas_info = [module for module in threadpool_info() if module["user_api"] == "blas"] blis_linked = any([module['internal_api'] == 'blis' for module in threadpool_info()]) if not blis_linked: # numpy can be linked to BLIS for CBLAS and OpenBLAS for LAPACK. In that # case this test will run BLIS gemm so no need to skip. for module in threadpool_info(): if is_old_openblas(module): # OpenBLAS 0.3.3 and older are known to cause an unrecoverable # deadlock at process shutdown time (after pytest has exited). pytest.skip("Old OpenBLAS: skipping test to avoid deadlock") from ._openmp_test_helper import check_nested_prange_blas A = np.ones((1000, 10)) B = np.ones((100, 10)) with threadpool_limits(limits=1) as threadpoolctx: max_threads = threadpoolctx.get_original_num_threads()['openmp'] nthreads = effective_num_threads(nthreads_outer, max_threads) result = check_nested_prange_blas(A, B, nthreads) C, prange_num_threads, threadpool_infos = result assert np.allclose(C, np.dot(A, B.T)) assert prange_num_threads == nthreads nested_blas_info = [module for module in threadpool_infos if module["user_api"] == "blas"] assert len(nested_blas_info) == len(blas_info) for module in nested_blas_info: assert module['num_threads'] == 1
def testDisable(self): set_thread_envvars(2, override=True) self.assertEqual(os.environ["OMP_NUM_THREADS"], "2") set_thread_envvars(3, override=False) self.assertEqual(os.environ["OMP_NUM_THREADS"], "2") disable_implicit_threading() self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") # Check that we have only one thread. if numexpr: self.assertEqual(numexpr.utils.get_num_threads(), 1) if threadpoolctl: info = threadpoolctl.threadpool_info() for api in info: self.assertEqual(api["num_threads"], 1, f"API: {api}")
def test_ThreadpoolInfo_todicts(): # Check all keys expected for the public api are in the dicts returned by # the .todict(s) methods info = _threadpool_info() assert threadpool_info() == [module.todict() for module in info.modules] assert info.todicts() == [module.todict() for module in info] assert info.todicts() == [module.todict() for module in info.modules] for module in info: module_dict = module.todict() assert "user_api" in module_dict assert "internal_api" in module_dict assert "prefix" in module_dict assert "filepath" in module_dict assert "version" in module_dict assert "num_threads" in module_dict if module.internal_api in ("mkl", "blis", "openblas"): assert "threading_layer" in module_dict
def test_get_original_num_threads(limit): with threadpool_limits(limits=2, user_api='blas') as ctl: # set different blas num threads to start with (when multiple openblas) if ctl._original_limits: ctl._original_limits[0]['set_num_threads'](1) original_infos = threadpool_info() with threadpool_limits(limits=limit, user_api='blas') as threadpoolctx: original_num_threads = threadpoolctx.get_original_num_threads() assert 'openmp' not in original_num_threads if 'blas' in [module['user_api'] for module in original_infos]: assert original_num_threads['blas'] >= 1 else: assert original_num_threads['blas'] is None if len(libopenblas_paths) >= 2: with pytest.warns(None, match='Multiple value possible'): expected = min([module['num_threads'] for module in original_infos]) assert original_num_threads['blas'] == expected
def test_threadpool_controller_info(): # Check that all keys expected for the private api are in the dicts # returned by the `info` methods controller = ThreadpoolController() assert threadpool_info() == [ lib_controller.info() for lib_controller in controller.lib_controllers ] assert controller.info() == [ lib_controller.info() for lib_controller in controller.lib_controllers ] for lib_controller_dict in controller.info(): assert "user_api" in lib_controller_dict assert "internal_api" in lib_controller_dict assert "prefix" in lib_controller_dict assert "filepath" in lib_controller_dict assert "version" in lib_controller_dict assert "num_threads" in lib_controller_dict if lib_controller_dict["internal_api"] in ("mkl", "blis", "openblas"): assert "threading_layer" in lib_controller_dict
def test_architecture(): expected_openblas_architectures = ( # XXX: add more as needed by CI or developer laptops "armv8", "Haswell", "SkylakeX", "Sandybridge", "VORTEX", "Zen", ) expected_blis_architectures = ( # XXX: add more as needed by CI or developer laptops "skx", "haswell", ) for lib_info in threadpool_info(): if lib_info["internal_api"] == "openblas": assert lib_info["architecture"] in expected_openblas_architectures elif lib_info["internal_api"] == "blis": assert lib_info["architecture"] in expected_blis_architectures else: # Not supported for other libraries assert "architecture" not in lib_info
import json import joblib import yaml from sklearn.utils._show_versions import _get_deps_info, _get_sys_info from threadpoolctl import threadpool_info from benchmarks.core import load if __name__ == "__main__": with open("benchmarks/config.yml", "r") as f: config = yaml.full_load(f) for benchmark, params in config.items(): bench_class = load(f"benchmarks.{benchmark}.Benchmark") bench_instance = bench_class(**params) bench_instance.run() env_info = {} env_info["system_info"] = _get_sys_info() env_info["dependencies_info"] = _get_deps_info() env_info["threadpool_info"] = threadpool_info() env_info["cpu_count"] = joblib.cpu_count(only_physical_cores=True) with open("benchmarks/env_info.txt", "w") as f: json.dump(env_info, f)
def threadpool_info(): controller = _get_threadpool_controller() if controller is not None: return controller.info() else: return threadpoolctl.threadpool_info()
def __init__(self, repeats=2): import threadpoolctl self._info = threadpoolctl.threadpool_info() self._controller = threadpoolctl.ThreadpoolController() self._stable = 0 self.repeats = repeats
def check(): assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d
from threadpoolctl import threadpool_info from pprint import pprint try: import numpy as np print("numpy", np.__version__) except ImportError: pass try: import scipy import scipy.linalg print("scipy", scipy.__version__) except ImportError: pass try: from tests._openmp_test_helper import * # noqa except ImportError: pass pprint(threadpool_info())
"""Validate that number of threads in thread pools is set to 1.""" import numexpr import blosc import threadpoolctl # APIs that return previous number of threads: assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d