예제 #1
0
def _get_cutlass_compile_options(sm, threads, use_fast_math=False):
    cutlass_root = _get_cutlass_path()
    cutlass_include = os.path.join(cutlass_root, "include")
    cutlass_util_include = os.path.join(cutlass_root, "tools/util/include")

    kwargs = {}
    kwargs["cc"] = "nvcc"
    kwargs["options"] = [
        "-c",
        "-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1",
        "-gencode=arch=compute_%d,code=[sm_%d,compute_%d]" % (sm, sm, sm),
        "-Xcompiler=-fPIC",
        "-Xcompiler=-Wconversion",
        "-Xcompiler=-fno-strict-aliasing",
        "-O3",
        "-std=c++14",
        "-I" + cutlass_include,
        "-I" + cutlass_util_include,
    ]
    if use_fast_math:
        kwargs["options"].append("-DCUTLASS_USE_TANH_FOR_SIGMOID")
    cuda_ver = get_cuda_version()
    if cuda_ver >= (11, 2):
        ncpu = multiprocessing.cpu_count() if threads < 0 else threads
        kwargs["options"].append("-t %d" % ncpu)
    return kwargs
예제 #2
0
def requires_nvcc_version(major_version, minor_version=0, release_version=0):
    """Mark a test as requiring at least a specific version of nvcc.

    Unit test marked with this decorator will run only if the
    installed version of NVCC is at least `(major_version,
    minor_version, release_version)`.

    This also marks the test as requiring a cuda support.

    Parameters
    ----------
    major_version: int

        The major version of the (major,minor,release) version tuple.

    minor_version: int

        The minor version of the (major,minor,release) version tuple.

    release_version: int

        The release version of the (major,minor,release) version tuple.

    """

    try:
        nvcc_version = nvcc.get_cuda_version()
    except RuntimeError:
        nvcc_version = (0, 0, 0)

    min_version = (major_version, minor_version, release_version)
    version_str = ".".join(str(v) for v in min_version)
    requires = [
        pytest.mark.skipif(nvcc_version < min_version,
                           reason=f"Requires NVCC >= {version_str}"),
        *requires_cuda(),
    ]

    def inner(func):
        return _compose([func], requires)

    return inner