def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, hide_symbols=True, ): """ Parameters ---------- module_name: str This has been embedded in the src_code. src_code A complete c or c++ source listing for the module. location A pre-existing filesystem directory where the cpp file and .so will be written. include_dirs A list of include directory names (each gets prefixed with -I). lib_dirs A list of library search path directory names (each gets prefixed with -L). libs A list of libraries to link with (each gets prefixed with -l). preargs A list of extra compiler arguments. rpaths List of rpaths to use with Xlinker. Defaults to `rpath_defaults`. py_module If False, compile to a shared library, but do not import as a Python module. hide_symbols If True (the default), hide all symbols from the library symbol table unless explicitely exported. Returns ------- module Dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) Notes ----- On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != "win32": preargs.append("-fPIC") cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if "cudart" not in libs: libs.append("cudart") lib_dirs = std_lib_dirs() + lib_dirs if any(ld == os.path.join(cuda_root, "lib") or ld == os.path.join(cuda_root, "lib64") for ld in lib_dirs): warnings.warn( "You have the cuda library directory in your " "lib_dirs. This has been known to cause problems " "and should not be done." ) if sys.platform != "darwin": # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, "mod.cu") cppfile = open(cppfilename, "w") _logger.debug("Writing module C++ code to %s", cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, "%s.%s" % (module_name, get_lib_extension())) _logger.debug("Generating shared lib %s", lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] for pa in preargs: for pattern in [ "-O", "-arch=", "-ccbin=", "-G", "-g", "-I", "-L", "--fmad", "--ftz", "--maxrregcount", "--prec-div", "--prec-sqrt", "--use_fast_math", "-fmad", "-ftz", "-maxrregcount", "-prec-div", "-prec-sqrt", "-use_fast_math", "--use-local-env", "--cl-version=", ]: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, "-shared"] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(["--compiler-bindir", config.nvcc.compiler_bindir]) if sys.platform == "win32": # add flags for Microsoft compiler to create .pdb files preargs2.extend(["/Zi", "/MD"]) cmd.extend(["-Xlinker", "/DEBUG"]) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(["-D HAVE_ROUND"]) else: if hide_symbols: preargs2.append("-fvisibility=hidden") if local_bitwidth() == 64: cmd.append("-m64") else: cmd.append("-m32") if len(preargs2) > 0: cmd.extend(["-Xcompiler", ",".join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, "lib")): rpaths.append(os.path.join(config.cuda.root, "lib")) if sys.platform != "darwin": # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, "lib64")) if sys.platform != "win32": # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(["-Xlinker", ",".join(["-rpath", rpath])]) cmd.extend("-I%s" % idir for idir in include_dirs) cmd.extend(["-o", lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(["-L%s" % ldir for ldir in lib_dirs]) cmd.extend(["-l%s" % l for l in libs]) if sys.platform == "darwin": # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(["-Xcompiler", "-undefined,dynamic_lookup"]) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index("-u") cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError as e: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == "darwin" and nvcc_version >= "4.1": cmd.extend(["-Xlinker", "-pie"]) # cmd.append("--ptxas-options=-v") #uncomment this to see # register and shared-mem requirements _logger.debug("Running cmd %s", " ".join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split("\n"): if not eline: continue if "skipping incompatible" in eline: # ld is skipping an incompatible library continue if "declared but never referenced" in eline: continue if "statement is unreachable" in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split("\n")): print(i + 1, l, file=sys.stderr) print("===============================", file=sys.stderr) # filter the output from the compiler for l in nvcc_stderr.split("\n"): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(":") :].startswith(": warning: variable"): continue if l[l.index(":") :].startswith(": warning: label"): continue except Exception: pass print(l, file=sys.stderr) print(nvcc_stdout) print(cmd) raise Exception("nvcc return status", p.returncode, "for cmd", " ".join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print(nvcc_stdout) if nvcc_stdout: # this doesn't happen to my knowledge print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr) if py_module: # touch the __init__ file open(os.path.join(location, "__init__.py"), "w").close() return dlimport(lib_filename)
def compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in [ "-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno" ]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') no_opt = False cuda_root = config.cuda.root #The include dirs gived by the user should have precedence over #the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if cuda_root: lib_dirs.append(os.path.join(cuda_root, 'lib')) # from Benjamin Schrauwen April 14 2010 if sys.platform != 'darwin': # OS X uses universal libraries lib_dirs.append(os.path.join(cuda_root, 'lib64')) if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = open(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join( location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', #nvcc argument preargs1 = [] for pa in preargs: for pattern in [ '-O', '-arch=', '-ccbin=' '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math' ]: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments cmd = [nvcc_path, '-shared', '-g'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError, e: done = True
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, hide_symbols=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :param hide_symbols: if True (the default), hide all symbols from the library symbol table unless explicitely exported. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if any(ld == os.path.join(cuda_root, 'lib') or ld == os.path.join(cuda_root, 'lib64') for ld in lib_dirs): warnings.warn("You have the cuda library directory in your " "lib_dirs. This has been known to cause problems " "and should not be done.") if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = open(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] for pa in preargs: for pattern in ['-O', '-arch=', '-ccbin=', '-G', '-g', '-I', '-L', '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math', '--use-local-env', '--cl-version=']: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, '-shared'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(['-D HAVE_ROUND']) else: if hide_symbols: preargs2.append('-fvisibility=hidden') if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError as e: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) # cmd.append("--ptxas-options=-v") #uncomment this to see # register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: # ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split('\n')): print(i + 1, l, file=sys.stderr) print('===============================', file=sys.stderr) # filter the output from the compiler for l in nvcc_stderr.split('\n'): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(':'):].startswith(': warning: variable'): continue if l[l.index(':'):].startswith(': warning: label'): continue except Exception: pass print(l, file=sys.stderr) print(nvcc_stdout) print(cmd) raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print(nvcc_stdout) if nvcc_stdout: # this doesn't happen to my knowledge print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr) if py_module: # touch the __init__ file open(os.path.join(location, "__init__.py"), 'w').close() return dlimport(lib_filename)
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') no_opt = False cuda_root = config.cuda.root #The include dirs gived by the user should have precedence over #the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if cuda_root: lib_dirs.append(os.path.join(cuda_root, 'lib')) # from Benjamin Schrauwen April 14 2010 if sys.platform != 'darwin': # No 64 bit CUDA libraries available on the mac, yet.. lib_dirs.append(os.path.join(cuda_root, 'lib64')) if sys.platform == 'darwin': # On the mac, nvcc is not able to link using -framework # Python, so we have manually add the correct library and # paths darwin_python_lib = commands.getoutput('python-config --ldflags') else: # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = file(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) ofiles = [] rval = None cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', #nvcc argument preargs1 = [] for pa in preargs: for pattern in ['-O', '-arch=', '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math']: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments cmd = [nvcc_path, '-shared', '-g'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.append('/Zi') cmd.extend(['-Xlinker', '/DEBUG']) if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) if config.cuda.root and os.path.exists(os.path.join(config.cuda.root, 'lib')): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the 64bit CUDA libs are in the same files as are # named by the function above rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if module_name != 'cuda_ndarray': cmd.append("-lcuda_ndarray") if sys.platform == 'darwin': cmd.extend(darwin_python_lib.split()) if sys.platform == 'darwin': done = False while not done: try: indexof = cmd.index('-framework') newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)]) cmd.pop(indexof) # Remove -framework cmd.pop(indexof) # Remove argument to -framework cmd.extend(newarg) except ValueError, e: done = True
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') no_opt = False cuda_root = config.cuda.root #The include dirs gived by the user should have precedence over #the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if cuda_root: lib_dirs.append(os.path.join(cuda_root, 'lib')) # from Benjamin Schrauwen April 14 2010 if sys.platform != 'darwin': # OS X uses universal libraries lib_dirs.append(os.path.join(cuda_root, 'lib64')) if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = open(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', #nvcc argument preargs1 = [] for pa in preargs: for pattern in ['-O', '-arch=', '-ccbin=', '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math']: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments cmd = [nvcc_path, '-shared', '-g'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError, e: done = True
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, hide_symbols=True): """ Parameters ---------- module_name: str This has been embedded in the src_code. src_code A complete c or c++ source listing for the module. location A pre-existing filesystem directory where the cpp file and .so will be written. include_dirs A list of include directory names (each gets prefixed with -I). lib_dirs A list of library search path directory names (each gets prefixed with -L). libs A list of libraries to link with (each gets prefixed with -l). preargs A list of extra compiler arguments. rpaths List of rpaths to use with Xlinker. Defaults to `rpath_defaults`. py_module If False, compile to a shared library, but do not import as a Python module. hide_symbols If True (the default), hide all symbols from the library symbol table unless explicitely exported. Returns ------- module Dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) Notes ----- On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ # Remove empty string directory include_dirs = [d for d in include_dirs if d] lib_dirs = [d for d in lib_dirs if d] rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') if config.cmodule.remove_gxx_opt: preargs = [p for p in preargs if not p.startswith('-O')] cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = libs + std_libs() if 'cudart' not in libs: libs.append('cudart') lib_dirs = lib_dirs + std_lib_dirs() if sys.platform != 'darwin': # config.dnn.include_path add this by default for cudnn in the # new back-end. This should not be used in this back-end. So # just remove them. lib_dirs = [ld for ld in lib_dirs if not(ld == os.path.join(cuda_root, 'lib') or ld == os.path.join(cuda_root, 'lib64'))] if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) if (config.nvcc.cudafe == 'heuristic' and not any(marker in src_code for marker in ("__global__", "__device__", "__host__", "<<<", "nvmatrix.cuh"))): # only calls existing CUDA functions, can compile much faster cppfilename = os.path.join(location, 'mod.cpp') src_code = ("#include <cuda.h>\n" "#include <cuda_runtime_api.h>\n" + src_code) else: # contains CUDA host code or device functions, needs .cu extension cppfilename = os.path.join(location, 'mod.cu') with open(cppfilename, 'w') as cppfile: _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) lib_filename = os.path.join( location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] preargs2 = [] for pa in preargs: if pa.startswith('-Wl,'): # the -rpath option is not understood by the Microsoft linker if sys.platform != 'win32' or not pa.startswith('-Wl,-rpath'): preargs1.append('-Xlinker') preargs1.append(pa[4:]) continue for pattern in ['-O', '-arch=', '-ccbin=', '-G', '-g', '-I', '-L', '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math', '--use-local-env', '--cl-version=', '-std=']: if pa.startswith(pattern): preargs1.append(pa) break else: preargs2.append(pa) # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, '-shared'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(['-D HAVE_ROUND']) else: if hide_symbols: preargs2.append('-fvisibility=hidden') if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (not type(config.cuda).root.is_default and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) # to support path that includes spaces, we need to wrap it with double quotes on Windows path_wrapper = "\"" if os.name == 'nt' else "" cmd.extend(['-I%s%s%s' % (path_wrapper, idir, path_wrapper) for idir in include_dirs]) cmd.extend(['-L%s%s%s' % (path_wrapper, ldir, path_wrapper) for ldir in lib_dirs]) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) # cmd.append("--ptxas-options=-v") #uncomment this to see # register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout_raw, nvcc_stderr_raw = p.communicate()[:2] console_encoding = getpreferredencoding() nvcc_stdout = decode_with(nvcc_stdout_raw, console_encoding) nvcc_stderr = decode_with(nvcc_stderr_raw, console_encoding) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: # ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split('\n')): print(i + 1, l, file=sys.stderr) print('===============================', file=sys.stderr) # filter the output from the compiler for l in nvcc_stderr.split('\n'): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(':'):].startswith(': warning: variable'): continue if l[l.index(':'):].startswith(': warning: label'): continue except Exception: pass print(l, file=sys.stderr) print(nvcc_stdout) print(cmd) raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print(nvcc_stdout) # On Windows, nvcc print useless stuff by default if sys.platform != 'win32' and nvcc_stdout: # this doesn't happen to my knowledge print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr) if py_module: # touch the __init__ file open(os.path.join(location, "__init__.py"), 'w').close() return dlimport(lib_filename)
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, ): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != "win32": preargs.append("-fPIC") cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if "cudart" not in libs: libs.append("cudart") lib_dirs = std_lib_dirs() + lib_dirs if any(ld == os.path.join(cuda_root, "lib") or ld == os.path.join(cuda_root, "lib64") for ld in lib_dirs): warnings.warn( "You have the cuda library directory in your " "lib_dirs. This has been known to cause problems " "and should not be done." ) if sys.platform != "darwin": # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, "mod.cu") cppfile = open(cppfilename, "w") _logger.debug("Writing module C++ code to %s", cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, "%s.%s" % (module_name, get_lib_extension())) _logger.debug("Generating shared lib %s", lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] for pa in preargs: for pattern in [ "-O", "-arch=", "-ccbin=", "-G", "-g", "-I", "-L", "--fmad", "--ftz", "--maxrregcount", "--prec-div", "--prec-sqrt", "--use_fast_math", "-fmad", "-ftz", "-maxrregcount", "-prec-div", "-prec-sqrt", "-use_fast_math", "--use-local-env", "--cl-version=", ]: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, "-shared"] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(["--compiler-bindir", config.nvcc.compiler_bindir]) if sys.platform == "win32": # add flags for Microsoft compiler to create .pdb files preargs2.extend(["/Zi", "/MD"]) cmd.extend(["-Xlinker", "/DEBUG"]) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(["-D HAVE_ROUND"]) if local_bitwidth() == 64: cmd.append("-m64") else: cmd.append("-m32") if len(preargs2) > 0: cmd.extend(["-Xcompiler", ",".join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, "lib")): rpaths.append(os.path.join(config.cuda.root, "lib")) if sys.platform != "darwin": # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, "lib64")) if sys.platform != "win32": # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(["-Xlinker", ",".join(["-rpath", rpath])]) cmd.extend("-I%s" % idir for idir in include_dirs) cmd.extend(["-o", lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(["-L%s" % ldir for ldir in lib_dirs]) cmd.extend(["-l%s" % l for l in libs]) if sys.platform == "darwin": # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(["-Xcompiler", "-undefined,dynamic_lookup"]) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index("-u") cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError, e: done = True
def compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :returns: dynamically-imported python module of the compiled code. :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in [ "-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno" ]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') no_opt = False cuda_root = config.cuda.root #The include dirs gived by the user should have precedence over #the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if cuda_root: lib_dirs.append(os.path.join(cuda_root, 'lib')) # from Benjamin Schrauwen April 14 2010 if sys.platform != 'darwin': # No 64 bit CUDA libraries available on the mac, yet.. lib_dirs.append(os.path.join(cuda_root, 'lib64')) if sys.platform == 'darwin': # On the mac, nvcc is not able to link using -framework # Python, so we have manually add the correct library and # paths darwin_python_lib = commands.getoutput('python-config --ldflags') else: # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = file(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) ofiles = [] rval = None cppfile.write(src_code) cppfile.close() lib_filename = os.path.join( location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', #nvcc argument preargs1 = [ pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=') or pa.startswith('-arch=') ] preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments cmd = [nvcc_path, '-shared', '-g'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.append('/Zi') cmd.extend(['-Xlinker', '/DEBUG']) if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) if config.cuda.root and os.path.exists( os.path.join(config.cuda.root, 'lib')): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the 64bit CUDA libs are in the same files as are # named by the function above rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if module_name != 'cuda_ndarray': cmd.append("-lcuda_ndarray") if sys.platform == 'darwin': cmd.extend(darwin_python_lib.split()) if sys.platform == 'darwin': done = False while not done: try: indexof = cmd.index('-framework') newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)]) cmd.pop(indexof) # Remove -framework cmd.pop(indexof) # Remove argument to -framework cmd.extend(newarg) except ValueError, e: done = True