def _get_cutlass_compile_options(sm, threads, use_fast_math=False): cutlass_root = _get_cutlass_path() cutlass_include = os.path.join(cutlass_root, "include") cutlass_util_include = os.path.join(cutlass_root, "tools/util/include") kwargs = {} kwargs["cc"] = "nvcc" kwargs["options"] = [ "-c", "-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1", "-gencode=arch=compute_%d,code=[sm_%d,compute_%d]" % (sm, sm, sm), "-Xcompiler=-fPIC", "-Xcompiler=-Wconversion", "-Xcompiler=-fno-strict-aliasing", "-O3", "-std=c++14", "-I" + cutlass_include, "-I" + cutlass_util_include, ] if use_fast_math: kwargs["options"].append("-DCUTLASS_USE_TANH_FOR_SIGMOID") cuda_ver = get_cuda_version() if cuda_ver >= (11, 2): ncpu = multiprocessing.cpu_count() if threads < 0 else threads kwargs["options"].append("-t %d" % ncpu) return kwargs
def requires_nvcc_version(major_version, minor_version=0, release_version=0): """Mark a test as requiring at least a specific version of nvcc. Unit test marked with this decorator will run only if the installed version of NVCC is at least `(major_version, minor_version, release_version)`. This also marks the test as requiring a cuda support. Parameters ---------- major_version: int The major version of the (major,minor,release) version tuple. minor_version: int The minor version of the (major,minor,release) version tuple. release_version: int The release version of the (major,minor,release) version tuple. """ try: nvcc_version = nvcc.get_cuda_version() except RuntimeError: nvcc_version = (0, 0, 0) min_version = (major_version, minor_version, release_version) version_str = ".".join(str(v) for v in min_version) requires = [ pytest.mark.skipif(nvcc_version < min_version, reason=f"Requires NVCC >= {version_str}"), *requires_cuda(), ] def inner(func): return _compose([func], requires) return inner