示例#1
0
    def compile_str(
        module_name,
        src_code,
        location=None,
        include_dirs=[],
        lib_dirs=[],
        libs=[],
        preargs=[],
        rpaths=rpath_defaults,
        py_module=True,
        hide_symbols=True,
    ):
        """

        Parameters
        ----------
        module_name: str
             This has been embedded in the src_code.
        src_code
            A complete c or c++ source listing for the module.
        location
            A pre-existing filesystem directory where the
            cpp file and .so will be written.
        include_dirs
            A list of include directory names (each gets prefixed with -I).
        lib_dirs
            A list of library search path directory names (each gets 
            prefixed with -L).
        libs
            A list of libraries to link with (each gets prefixed with -l).
        preargs
            A list of extra compiler arguments.
        rpaths
            List of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
        py_module
            If False, compile to a shared library, but
            do not import as a Python module.
        hide_symbols
            If True (the default), hide all symbols from the library symbol
            table unless explicitely exported.

        Returns
        -------
        module
            Dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        Notes
        -----
        On Windows 7 with nvcc 3.1 we need to compile in the real directory
        Otherwise nvcc never finish.

        """
        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != "win32":
            preargs.append("-fPIC")
        cuda_root = config.cuda.root

        # The include dirs gived by the user should have precedence over
        # the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if "cudart" not in libs:
            libs.append("cudart")

        lib_dirs = std_lib_dirs() + lib_dirs
        if any(ld == os.path.join(cuda_root, "lib") or ld == os.path.join(cuda_root, "lib64") for ld in lib_dirs):
            warnings.warn(
                "You have the cuda library directory in your "
                "lib_dirs. This has been known to cause problems "
                "and should not be done."
            )

        if sys.platform != "darwin":
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, "mod.cu")
        cppfile = open(cppfilename, "w")

        _logger.debug("Writing module C++ code to %s", cppfilename)

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(location, "%s.%s" % (module_name, get_lib_extension()))

        _logger.debug("Generating shared lib %s", lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        # nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in [
                "-O",
                "-arch=",
                "-ccbin=",
                "-G",
                "-g",
                "-I",
                "-L",
                "--fmad",
                "--ftz",
                "--maxrregcount",
                "--prec-div",
                "--prec-sqrt",
                "--use_fast_math",
                "-fmad",
                "-ftz",
                "-maxrregcount",
                "-prec-div",
                "-prec-sqrt",
                "-use_fast_math",
                "--use-local-env",
                "--cl-version=",
            ]:

                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs if pa not in preargs1]  # other arguments

        # Don't put -G by default, as it slow things down.
        # We aren't sure if -g slow things down, so we don't put it by default.
        cmd = [nvcc_path, "-shared"] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(["--compiler-bindir", config.nvcc.compiler_bindir])

        if sys.platform == "win32":
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(["/Zi", "/MD"])
            cmd.extend(["-Xlinker", "/DEBUG"])
            # remove the complaints for the duplication of `double round(double)`
            # in both math_functions.h and pymath.h,
            # by not including the one in pymath.h
            cmd.extend(["-D HAVE_ROUND"])
        else:
            if hide_symbols:
                preargs2.append("-fvisibility=hidden")

        if local_bitwidth() == 64:
            cmd.append("-m64")
        else:
            cmd.append("-m32")

        if len(preargs2) > 0:
            cmd.extend(["-Xcompiler", ",".join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.
        if user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, "lib")):

            rpaths.append(os.path.join(config.cuda.root, "lib"))
            if sys.platform != "darwin":
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, "lib64"))
        if sys.platform != "win32":
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(["-Xlinker", ",".join(["-rpath", rpath])])
        cmd.extend("-I%s" % idir for idir in include_dirs)
        cmd.extend(["-o", lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(["-L%s" % ldir for ldir in lib_dirs])
        cmd.extend(["-l%s" % l for l in libs])
        if sys.platform == "darwin":
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(["-Xcompiler", "-undefined,dynamic_lookup"])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index("-u")
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError as e:
                done = True

        # CUDA Toolkit v4.1 Known Issues:
        # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
        # to nvcc this option is not recognized and generates an error
        # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
        # Passing -Xlinker -pie stops -no_pie from getting passed
        if sys.platform == "darwin" and nvcc_version >= "4.1":
            cmd.extend(["-Xlinker", "-pie"])

        # cmd.append("--ptxas-options=-v") #uncomment this to see
        # register and shared-mem requirements
        _logger.debug("Running cmd %s", " ".join(cmd))
        orig_dir = os.getcwd()
        try:
            os.chdir(location)
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2])
        finally:
            os.chdir(orig_dir)

        for eline in nvcc_stderr.split("\n"):
            if not eline:
                continue
            if "skipping incompatible" in eline:
                # ld is skipping an incompatible library
                continue
            if "declared but never referenced" in eline:
                continue
            if "statement is unreachable" in eline:
                continue
            _logger.info("NVCC: %s", eline)

        if p.returncode:
            for i, l in enumerate(src_code.split("\n")):
                print(i + 1, l, file=sys.stderr)
            print("===============================", file=sys.stderr)
            # filter the output from the compiler
            for l in nvcc_stderr.split("\n"):
                if not l:
                    continue
                # filter out the annoying declaration warnings

                try:
                    if l[l.index(":") :].startswith(": warning: variable"):
                        continue
                    if l[l.index(":") :].startswith(": warning: label"):
                        continue
                except Exception:
                    pass
                print(l, file=sys.stderr)
            print(nvcc_stdout)
            print(cmd)
            raise Exception("nvcc return status", p.returncode, "for cmd", " ".join(cmd))
        elif config.cmodule.compilation_warning and nvcc_stdout:
            print(nvcc_stdout)

        if nvcc_stdout:
            # this doesn't happen to my knowledge
            print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr)

        if py_module:
            # touch the __init__ file
            open(os.path.join(location, "__init__.py"), "w").close()
            return dlimport(lib_filename)
示例#2
0
    def compile_str(module_name,
                    src_code,
                    location=None,
                    include_dirs=[],
                    lib_dirs=[],
                    libs=[],
                    preargs=[],
                    rpaths=rpath_defaults,
                    py_module=True):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.
        :param py_module: if False, compile to a shared library, but
            do not import as a Python module.

        :returns: dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in [
                    "-Wno-write-strings", "-Wno-unused-label",
                    "-Wno-unused-variable", "-fno-math-errno"
            ]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        no_opt = False
        cuda_root = config.cuda.root

        #The include dirs gived by the user should have precedence over
        #the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = std_lib_dirs() + lib_dirs
        if cuda_root:
            lib_dirs.append(os.path.join(cuda_root, 'lib'))

            # from Benjamin Schrauwen April 14 2010
            if sys.platform != 'darwin':
                # OS X uses universal libraries
                lib_dirs.append(os.path.join(cuda_root, 'lib64'))

        if sys.platform != 'darwin':
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, 'mod.cu')
        cppfile = open(cppfilename, 'w')

        _logger.debug('Writing module C++ code to %s', cppfilename)

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(
            location, '%s.%s' % (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        #nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in [
                    '-O', '-arch=', '-ccbin='
                    '--fmad', '--ftz', '--maxrregcount', '--prec-div',
                    '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz',
                    '-maxrregcount', '-prec-div', '-prec-sqrt',
                    '-use_fast_math'
            ]:
                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs
                    if pa not in preargs1]  # other arguments

        cmd = [nvcc_path, '-shared', '-g'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(['/Zi', '/MD'])
            cmd.extend(['-Xlinker', '/DEBUG'])

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.
        if (user_provided_cuda_root
                and os.path.exists(os.path.join(config.cuda.root, 'lib'))):

            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        cmd.extend('-I%s' % idir for idir in include_dirs)
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
        cmd.extend(['-l%s' % l for l in libs])
        if sys.platform == 'darwin':
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup'])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index('-u')
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError, e:
                done = True
示例#3
0
    def compile_str(
            module_name, src_code,
            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
            rpaths=rpath_defaults, py_module=True, hide_symbols=True):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.
        :param py_module: if False, compile to a shared library, but
            do not import as a Python module.

        :param hide_symbols: if True (the default), hide all symbols
        from the library symbol table unless explicitely exported.

        :returns: dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label",
                      "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        cuda_root = config.cuda.root

        # The include dirs gived by the user should have precedence over
        # the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = std_lib_dirs() + lib_dirs
        if any(ld == os.path.join(cuda_root, 'lib') or
               ld == os.path.join(cuda_root, 'lib64') for ld in lib_dirs):
            warnings.warn("You have the cuda library directory in your "
                          "lib_dirs. This has been known to cause problems "
                          "and should not be done.")

        if sys.platform != 'darwin':
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, 'mod.cu')
        cppfile = open(cppfilename, 'w')

        _logger.debug('Writing module C++ code to %s', cppfilename)

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(location, '%s.%s' %
                (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        # nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in ['-O', '-arch=', '-ccbin=', '-G', '-g', '-I',
                            '-L', '--fmad', '--ftz', '--maxrregcount',
                            '--prec-div', '--prec-sqrt',  '--use_fast_math',
                            '-fmad', '-ftz', '-maxrregcount',
                            '-prec-div', '-prec-sqrt', '-use_fast_math',
                            '--use-local-env', '--cl-version=']:

                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs
                    if pa not in preargs1]  # other arguments

        # Don't put -G by default, as it slow things down.
        # We aren't sure if -g slow things down, so we don't put it by default.
        cmd = [nvcc_path, '-shared'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(['/Zi', '/MD'])
            cmd.extend(['-Xlinker', '/DEBUG'])
            # remove the complaints for the duplication of `double round(double)`
            # in both math_functions.h and pymath.h,
            # by not including the one in pymath.h
            cmd.extend(['-D HAVE_ROUND'])
        else:
            if hide_symbols:
                preargs2.append('-fvisibility=hidden')

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.
        if (user_provided_cuda_root and
            os.path.exists(os.path.join(config.cuda.root, 'lib'))):

            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        cmd.extend('-I%s' % idir for idir in include_dirs)
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
        cmd.extend(['-l%s' % l for l in libs])
        if sys.platform == 'darwin':
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup'])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index('-u')
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError as e:
                done = True

        # CUDA Toolkit v4.1 Known Issues:
        # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
        # to nvcc this option is not recognized and generates an error
        # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
        # Passing -Xlinker -pie stops -no_pie from getting passed
        if sys.platform == 'darwin' and nvcc_version >= '4.1':
            cmd.extend(['-Xlinker', '-pie'])

        # cmd.append("--ptxas-options=-v") #uncomment this to see
        # register and shared-mem requirements
        _logger.debug('Running cmd %s', ' '.join(cmd))
        orig_dir = os.getcwd()
        try:
            os.chdir(location)
            p = subprocess.Popen(
                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2])
        finally:
            os.chdir(orig_dir)

        for eline in nvcc_stderr.split('\n'):
            if not eline:
                continue
            if 'skipping incompatible' in eline:
                # ld is skipping an incompatible library
                continue
            if 'declared but never referenced' in eline:
                continue
            if 'statement is unreachable' in eline:
                continue
            _logger.info("NVCC: %s", eline)

        if p.returncode:
            for i, l in enumerate(src_code.split('\n')):
                print(i + 1, l, file=sys.stderr)
            print('===============================', file=sys.stderr)
            # filter the output from the compiler
            for l in nvcc_stderr.split('\n'):
                if not l:
                    continue
                # filter out the annoying declaration warnings

                try:
                    if l[l.index(':'):].startswith(': warning: variable'):
                        continue
                    if l[l.index(':'):].startswith(': warning: label'):
                        continue
                except Exception:
                    pass
                print(l, file=sys.stderr)
            print(nvcc_stdout)
            print(cmd)
            raise Exception('nvcc return status', p.returncode,
                            'for cmd', ' '.join(cmd))
        elif config.cmodule.compilation_warning and nvcc_stdout:
            print(nvcc_stdout)

        if nvcc_stdout:
            # this doesn't happen to my knowledge
            print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr)

        if py_module:
            # touch the __init__ file
            open(os.path.join(location, "__init__.py"), 'w').close()
            return dlimport(lib_filename)
示例#4
0
    def compile_str(
            module_name, src_code,
            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
            rpaths=rpath_defaults, py_module=True):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.
        :param py_module: if False, compile to a shared library, but
            do not import as a Python module.

        :returns: dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label",
                      "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        no_opt = False
        cuda_root = config.cuda.root

        #The include dirs gived by the user should have precedence over
        #the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = std_lib_dirs() + lib_dirs
        if cuda_root:
            lib_dirs.append(os.path.join(cuda_root, 'lib'))

            # from Benjamin Schrauwen April 14 2010
            if sys.platform != 'darwin':
                # No 64 bit CUDA libraries available on the mac, yet..
                lib_dirs.append(os.path.join(cuda_root, 'lib64'))

        if sys.platform == 'darwin':
            # On the mac, nvcc is not able to link using -framework
            # Python, so we have manually add the correct library and
            # paths
            darwin_python_lib = commands.getoutput('python-config --ldflags')
        else:
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, 'mod.cu')
        cppfile = file(cppfilename, 'w')

        _logger.debug('Writing module C++ code to %s', cppfilename)
        ofiles = []
        rval = None

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(location, '%s.%s' %
                (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        #nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in ['-O', '-arch=',
                            '--fmad', '--ftz', '--maxrregcount',
                            '--prec-div', '--prec-sqrt',  '--use_fast_math',
                            '-fmad', '-ftz', '-maxrregcount',
                            '-prec-div', '-prec-sqrt', '-use_fast_math']:
                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs
                    if pa not in preargs1]  # other arguments

        cmd = [nvcc_path, '-shared', '-g'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.append('/Zi')
            cmd.extend(['-Xlinker', '/DEBUG'])

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,
                                                            'lib')):
            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the 64bit CUDA libs are in the same files as are
                # named by the function above
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        cmd.extend('-I%s' % idir for idir in include_dirs)
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
        cmd.extend(['-l%s' % l for l in libs])
        if module_name != 'cuda_ndarray':
            cmd.append("-lcuda_ndarray")
        if sys.platform == 'darwin':
            cmd.extend(darwin_python_lib.split())

        if sys.platform == 'darwin':
            done = False
            while not done:
                try:
                    indexof = cmd.index('-framework')
                    newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
                    cmd.pop(indexof)  # Remove -framework
                    cmd.pop(indexof)  # Remove argument to -framework
                    cmd.extend(newarg)
                except ValueError, e:
                    done = True
示例#5
0
    def compile_str(
            module_name, src_code,
            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
            rpaths=rpath_defaults, py_module=True):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.
        :param py_module: if False, compile to a shared library, but
            do not import as a Python module.

        :returns: dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label",
                      "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        no_opt = False
        cuda_root = config.cuda.root

        #The include dirs gived by the user should have precedence over
        #the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = std_lib_dirs() + lib_dirs
        if cuda_root:
            lib_dirs.append(os.path.join(cuda_root, 'lib'))

            # from Benjamin Schrauwen April 14 2010
            if sys.platform != 'darwin':
                # OS X uses universal libraries
                lib_dirs.append(os.path.join(cuda_root, 'lib64'))

        if sys.platform != 'darwin':
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, 'mod.cu')
        cppfile = open(cppfilename, 'w')

        _logger.debug('Writing module C++ code to %s', cppfilename)

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(location, '%s.%s' %
                (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        #nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in ['-O', '-arch=', '-ccbin=',
                            '--fmad', '--ftz', '--maxrregcount',
                            '--prec-div', '--prec-sqrt',  '--use_fast_math',
                            '-fmad', '-ftz', '-maxrregcount',
                            '-prec-div', '-prec-sqrt', '-use_fast_math']:
                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs
                    if pa not in preargs1]  # other arguments

        cmd = [nvcc_path, '-shared', '-g'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(['/Zi', '/MD'])
            cmd.extend(['-Xlinker', '/DEBUG'])

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.
        if (user_provided_cuda_root and
            os.path.exists(os.path.join(config.cuda.root, 'lib'))):

            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        cmd.extend('-I%s' % idir for idir in include_dirs)
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
        cmd.extend(['-l%s' % l for l in libs])
        if sys.platform == 'darwin':
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup'])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index('-u')
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError, e:
                done = True
示例#6
0
    def compile_str(
            module_name, src_code,
            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
            rpaths=rpath_defaults, py_module=True, hide_symbols=True):
        """

        Parameters
        ----------
        module_name: str
             This has been embedded in the src_code.
        src_code
            A complete c or c++ source listing for the module.
        location
            A pre-existing filesystem directory where the
            cpp file and .so will be written.
        include_dirs
            A list of include directory names (each gets prefixed with -I).
        lib_dirs
            A list of library search path directory names (each gets
            prefixed with -L).
        libs
            A list of libraries to link with (each gets prefixed with -l).
        preargs
            A list of extra compiler arguments.
        rpaths
            List of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
        py_module
            If False, compile to a shared library, but
            do not import as a Python module.
        hide_symbols
            If True (the default), hide all symbols from the library symbol
            table unless explicitely exported.

        Returns
        -------
        module
            Dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        Notes
        -----
        On Windows 7 with nvcc 3.1 we need to compile in the real directory
        Otherwise nvcc never finish.

        """
        # Remove empty string directory
        include_dirs = [d for d in include_dirs if d]
        lib_dirs = [d for d in lib_dirs if d]

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label",
                      "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        if config.cmodule.remove_gxx_opt:
            preargs = [p for p in preargs if not p.startswith('-O')]

        cuda_root = config.cuda.root

        # The include dirs gived by the user should have precedence over
        # the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = libs + std_libs()
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = lib_dirs + std_lib_dirs()

        if sys.platform != 'darwin':
            # config.dnn.include_path add this by default for cudnn in the
            # new back-end. This should not be used in this back-end. So
            # just remove them.
            lib_dirs = [ld for ld in lib_dirs if
                        not(ld == os.path.join(cuda_root, 'lib') or
                            ld == os.path.join(cuda_root, 'lib64'))]

        if sys.platform != 'darwin':
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1,
                                                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        if (config.nvcc.cudafe == 'heuristic' and not
            any(marker in src_code for marker in ("__global__", "__device__",
                                                  "__host__", "<<<",
                                                  "nvmatrix.cuh"))):
            # only calls existing CUDA functions, can compile much faster
            cppfilename = os.path.join(location, 'mod.cpp')
            src_code = ("#include <cuda.h>\n"
                        "#include <cuda_runtime_api.h>\n" +
                        src_code)
        else:
            # contains CUDA host code or device functions, needs .cu extension
            cppfilename = os.path.join(location, 'mod.cu')

        with open(cppfilename, 'w') as cppfile:

            _logger.debug('Writing module C++ code to %s', cppfilename)
            cppfile.write(src_code)

        lib_filename = os.path.join(
            location, '%s.%s' %
            (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        # nvcc argument
        preargs1 = []
        preargs2 = []
        for pa in preargs:
            if pa.startswith('-Wl,'):
                # the -rpath option is not understood by the Microsoft linker
                if sys.platform != 'win32' or not pa.startswith('-Wl,-rpath'):
                    preargs1.append('-Xlinker')
                    preargs1.append(pa[4:])
                continue
            for pattern in ['-O', '-arch=', '-ccbin=', '-G', '-g', '-I',
                            '-L', '--fmad', '--ftz', '--maxrregcount',
                            '--prec-div', '--prec-sqrt', '--use_fast_math',
                            '-fmad', '-ftz', '-maxrregcount',
                            '-prec-div', '-prec-sqrt', '-use_fast_math',
                            '--use-local-env', '--cl-version=', '-std=']:

                if pa.startswith(pattern):
                    preargs1.append(pa)
                    break
            else:
                preargs2.append(pa)

        # Don't put -G by default, as it slow things down.
        # We aren't sure if -g slow things down, so we don't put it by default.
        cmd = [nvcc_path, '-shared'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(['/Zi', '/MD'])
            cmd.extend(['-Xlinker', '/DEBUG'])
            # remove the complaints for the duplication of `double round(double)`
            # in both math_functions.h and pymath.h,
            # by not including the one in pymath.h
            cmd.extend(['-D HAVE_ROUND'])
        else:
            if hide_symbols:
                preargs2.append('-fvisibility=hidden')

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.

        if (not type(config.cuda).root.is_default and
                os.path.exists(os.path.join(config.cuda.root, 'lib'))):

            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        # to support path that includes spaces, we need to wrap it with double quotes on Windows
        path_wrapper = "\"" if os.name == 'nt' else ""
        cmd.extend(['-I%s%s%s' % (path_wrapper, idir, path_wrapper) for idir in include_dirs])
        cmd.extend(['-L%s%s%s' % (path_wrapper, ldir, path_wrapper) for ldir in lib_dirs])
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-l%s' % l for l in libs])
        if sys.platform == 'darwin':
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup'])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index('-u')
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError:
                done = True

        # CUDA Toolkit v4.1 Known Issues:
        # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
        # to nvcc this option is not recognized and generates an error
        # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
        # Passing -Xlinker -pie stops -no_pie from getting passed
        if sys.platform == 'darwin' and nvcc_version >= '4.1':
            cmd.extend(['-Xlinker', '-pie'])

        # cmd.append("--ptxas-options=-v") #uncomment this to see
        # register and shared-mem requirements
        _logger.debug('Running cmd %s', ' '.join(cmd))
        orig_dir = os.getcwd()
        try:
            os.chdir(location)
            p = subprocess.Popen(
                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            nvcc_stdout_raw, nvcc_stderr_raw = p.communicate()[:2]
            console_encoding = getpreferredencoding()
            nvcc_stdout = decode_with(nvcc_stdout_raw, console_encoding)
            nvcc_stderr = decode_with(nvcc_stderr_raw, console_encoding)
        finally:
            os.chdir(orig_dir)

        for eline in nvcc_stderr.split('\n'):
            if not eline:
                continue
            if 'skipping incompatible' in eline:
                # ld is skipping an incompatible library
                continue
            if 'declared but never referenced' in eline:
                continue
            if 'statement is unreachable' in eline:
                continue
            _logger.info("NVCC: %s", eline)

        if p.returncode:
            for i, l in enumerate(src_code.split('\n')):
                print(i + 1, l, file=sys.stderr)
            print('===============================', file=sys.stderr)
            # filter the output from the compiler
            for l in nvcc_stderr.split('\n'):
                if not l:
                    continue
                # filter out the annoying declaration warnings

                try:
                    if l[l.index(':'):].startswith(': warning: variable'):
                        continue
                    if l[l.index(':'):].startswith(': warning: label'):
                        continue
                except Exception:
                    pass
                print(l, file=sys.stderr)
            print(nvcc_stdout)
            print(cmd)
            raise Exception('nvcc return status', p.returncode,
                            'for cmd', ' '.join(cmd))
        elif config.cmodule.compilation_warning and nvcc_stdout:
            print(nvcc_stdout)

        # On Windows, nvcc print useless stuff by default
        if sys.platform != 'win32' and nvcc_stdout:
            # this doesn't happen to my knowledge
            print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr)

        if py_module:
            # touch the __init__ file
            open(os.path.join(location, "__init__.py"), 'w').close()
            return dlimport(lib_filename)
示例#7
0
    def compile_str(
        module_name,
        src_code,
        location=None,
        include_dirs=[],
        lib_dirs=[],
        libs=[],
        preargs=[],
        rpaths=rpath_defaults,
        py_module=True,
    ):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.
        :param py_module: if False, compile to a shared library, but
            do not import as a Python module.

        :returns: dynamically-imported python module of the compiled code.
            (unless py_module is False, in that case returns None.)

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != "win32":
            preargs.append("-fPIC")
        cuda_root = config.cuda.root

        # The include dirs gived by the user should have precedence over
        # the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if "cudart" not in libs:
            libs.append("cudart")

        lib_dirs = std_lib_dirs() + lib_dirs
        if any(ld == os.path.join(cuda_root, "lib") or ld == os.path.join(cuda_root, "lib64") for ld in lib_dirs):
            warnings.warn(
                "You have the cuda library directory in your "
                "lib_dirs. This has been known to cause problems "
                "and should not be done."
            )

        if sys.platform != "darwin":
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, "mod.cu")
        cppfile = open(cppfilename, "w")

        _logger.debug("Writing module C++ code to %s", cppfilename)

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(location, "%s.%s" % (module_name, get_lib_extension()))

        _logger.debug("Generating shared lib %s", lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        # nvcc argument
        preargs1 = []
        for pa in preargs:
            for pattern in [
                "-O",
                "-arch=",
                "-ccbin=",
                "-G",
                "-g",
                "-I",
                "-L",
                "--fmad",
                "--ftz",
                "--maxrregcount",
                "--prec-div",
                "--prec-sqrt",
                "--use_fast_math",
                "-fmad",
                "-ftz",
                "-maxrregcount",
                "-prec-div",
                "-prec-sqrt",
                "-use_fast_math",
                "--use-local-env",
                "--cl-version=",
            ]:
                if pa.startswith(pattern):
                    preargs1.append(pa)
        preargs2 = [pa for pa in preargs if pa not in preargs1]  # other arguments

        # Don't put -G by default, as it slow things down.
        # We aren't sure if -g slow things down, so we don't put it by default.
        cmd = [nvcc_path, "-shared"] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(["--compiler-bindir", config.nvcc.compiler_bindir])

        if sys.platform == "win32":
            # add flags for Microsoft compiler to create .pdb files
            preargs2.extend(["/Zi", "/MD"])
            cmd.extend(["-Xlinker", "/DEBUG"])
            # remove the complaints for the duplication of `double round(double)`
            # in both math_functions.h and pymath.h,
            # by not including the one in pymath.h
            cmd.extend(["-D HAVE_ROUND"])

        if local_bitwidth() == 64:
            cmd.append("-m64")
        else:
            cmd.append("-m32")

        if len(preargs2) > 0:
            cmd.extend(["-Xcompiler", ",".join(preargs2)])

        # We should not use rpath if possible. If the user provided
        # provided an cuda.root flag, we need to add one, but
        # otherwise, we don't add it. See gh-1540 and
        # https://wiki.debian.org/RpathIssue for details.
        if user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, "lib")):

            rpaths.append(os.path.join(config.cuda.root, "lib"))
            if sys.platform != "darwin":
                # the CUDA libs are universal (contain both 32-bit and 64-bit)
                rpaths.append(os.path.join(config.cuda.root, "lib64"))
        if sys.platform != "win32":
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(["-Xlinker", ",".join(["-rpath", rpath])])
        cmd.extend("-I%s" % idir for idir in include_dirs)
        cmd.extend(["-o", lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(["-L%s" % ldir for ldir in lib_dirs])
        cmd.extend(["-l%s" % l for l in libs])
        if sys.platform == "darwin":
            # This tells the compiler to use the already-loaded python
            # symbols (which should always be the right ones).
            cmd.extend(["-Xcompiler", "-undefined,dynamic_lookup"])

        # Remove "-u Symbol" arguments, since they are usually not
        # relevant for the new compilation, even if they were used for
        # compiling python.  If they are necessary, the nvcc syntax is
        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
                indexof = cmd.index("-u")
                cmd.pop(indexof)  # Remove -u
                cmd.pop(indexof)  # Remove argument to -u
            except ValueError, e:
                done = True
示例#8
0
    def compile_str(module_name,
                    src_code,
                    location=None,
                    include_dirs=[],
                    lib_dirs=[],
                    libs=[],
                    preargs=[],
                    rpaths=rpath_defaults):
        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
        :param location: a pre-existing filesystem directory where the
                         cpp file and .so will be written
        :param include_dirs: a list of include directory names
                             (each gets prefixed with -I)
        :param lib_dirs: a list of library search path directory names
                         (each gets prefixed with -L)
        :param libs: a list of libraries to link with
                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
        :param rpaths: list of rpaths to use with Xlinker.
                       Defaults to `rpath_defaults`.

        :returns: dynamically-imported python module of the compiled code.

        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
                 real directory Otherwise nvcc never finish.

        """

        rpaths = list(rpaths)

        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
            for a in [
                    "-Wno-write-strings", "-Wno-unused-label",
                    "-Wno-unused-variable", "-fno-math-errno"
            ]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
            preargs = []
        else:
            preargs = list(preargs)
        if sys.platform != 'win32':
            preargs.append('-fPIC')
        no_opt = False
        cuda_root = config.cuda.root

        #The include dirs gived by the user should have precedence over
        #the standards ones.
        include_dirs = include_dirs + std_include_dirs()
        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))

        libs = std_libs() + libs
        if 'cudart' not in libs:
            libs.append('cudart')

        lib_dirs = std_lib_dirs() + lib_dirs
        if cuda_root:
            lib_dirs.append(os.path.join(cuda_root, 'lib'))

            # from Benjamin Schrauwen April 14 2010
            if sys.platform != 'darwin':
                # No 64 bit CUDA libraries available on the mac, yet..
                lib_dirs.append(os.path.join(cuda_root, 'lib64'))

        if sys.platform == 'darwin':
            # On the mac, nvcc is not able to link using -framework
            # Python, so we have manually add the correct library and
            # paths
            darwin_python_lib = commands.getoutput('python-config --ldflags')
        else:
            # sometimes, the linker cannot find -lpython so we need to tell it
            # explicitly where it is located
            # this returns somepath/lib/python2.x
            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
                            standard_lib=1)
            python_lib = os.path.dirname(python_lib)
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

        cppfilename = os.path.join(location, 'mod.cu')
        cppfile = file(cppfilename, 'w')

        _logger.debug('Writing module C++ code to %s', cppfilename)
        ofiles = []
        rval = None

        cppfile.write(src_code)
        cppfile.close()
        lib_filename = os.path.join(
            location, '%s.%s' % (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
        # TODO: Why do these args cause failure on gtx285 that has 1.3
        # compute capability? '--gpu-architecture=compute_13',
        # '--gpu-code=compute_13',
        #nvcc argument
        preargs1 = [
            pa for pa in preargs if pa.startswith('-O')
            or pa.startswith('--maxrregcount=') or pa.startswith('-arch=')
        ]
        preargs2 = [pa for pa in preargs
                    if pa not in preargs1]  # other arguments

        cmd = [nvcc_path, '-shared', '-g'] + preargs1
        if config.nvcc.compiler_bindir:
            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])

        if sys.platform == 'win32':
            # add flags for Microsoft compiler to create .pdb files
            preargs2.append('/Zi')
            cmd.extend(['-Xlinker', '/DEBUG'])

        if local_bitwidth() == 64:
            cmd.append('-m64')
        else:
            cmd.append('-m32')

        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

        if config.cuda.root and os.path.exists(
                os.path.join(config.cuda.root, 'lib')):
            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
                # the 64bit CUDA libs are in the same files as are
                # named by the function above
                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
        cmd.extend('-I%s' % idir for idir in include_dirs)
        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
        cmd.extend(['-l%s' % l for l in libs])
        if module_name != 'cuda_ndarray':
            cmd.append("-lcuda_ndarray")
        if sys.platform == 'darwin':
            cmd.extend(darwin_python_lib.split())

        if sys.platform == 'darwin':
            done = False
            while not done:
                try:
                    indexof = cmd.index('-framework')
                    newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
                    cmd.pop(indexof)  # Remove -framework
                    cmd.pop(indexof)  # Remove argument to -framework
                    cmd.extend(newarg)
                except ValueError, e:
                    done = True