def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, hide_symbols=True, ): """ Parameters ---------- module_name: str This has been embedded in the src_code. src_code A complete c or c++ source listing for the module. location A pre-existing filesystem directory where the cpp file and .so will be written. include_dirs A list of include directory names (each gets prefixed with -I). lib_dirs A list of library search path directory names (each gets prefixed with -L). libs A list of libraries to link with (each gets prefixed with -l). preargs A list of extra compiler arguments. rpaths List of rpaths to use with Xlinker. Defaults to `rpath_defaults`. py_module If False, compile to a shared library, but do not import as a Python module. hide_symbols If True (the default), hide all symbols from the library symbol table unless explicitely exported. Returns ------- module Dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) Notes ----- On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != "win32": preargs.append("-fPIC") cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if "cudart" not in libs: libs.append("cudart") lib_dirs = std_lib_dirs() + lib_dirs if any(ld == os.path.join(cuda_root, "lib") or ld == os.path.join(cuda_root, "lib64") for ld in lib_dirs): warnings.warn( "You have the cuda library directory in your " "lib_dirs. This has been known to cause problems " "and should not be done." ) if sys.platform != "darwin": # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, "mod.cu") cppfile = open(cppfilename, "w") _logger.debug("Writing module C++ code to %s", cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, "%s.%s" % (module_name, get_lib_extension())) _logger.debug("Generating shared lib %s", lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] for pa in preargs: for pattern in [ "-O", "-arch=", "-ccbin=", "-G", "-g", "-I", "-L", "--fmad", "--ftz", "--maxrregcount", "--prec-div", "--prec-sqrt", "--use_fast_math", "-fmad", "-ftz", "-maxrregcount", "-prec-div", "-prec-sqrt", "-use_fast_math", "--use-local-env", "--cl-version=", ]: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, "-shared"] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(["--compiler-bindir", config.nvcc.compiler_bindir]) if sys.platform == "win32": # add flags for Microsoft compiler to create .pdb files preargs2.extend(["/Zi", "/MD"]) cmd.extend(["-Xlinker", "/DEBUG"]) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(["-D HAVE_ROUND"]) else: if hide_symbols: preargs2.append("-fvisibility=hidden") if local_bitwidth() == 64: cmd.append("-m64") else: cmd.append("-m32") if len(preargs2) > 0: cmd.extend(["-Xcompiler", ",".join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, "lib")): rpaths.append(os.path.join(config.cuda.root, "lib")) if sys.platform != "darwin": # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, "lib64")) if sys.platform != "win32": # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(["-Xlinker", ",".join(["-rpath", rpath])]) cmd.extend("-I%s" % idir for idir in include_dirs) cmd.extend(["-o", lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(["-L%s" % ldir for ldir in lib_dirs]) cmd.extend(["-l%s" % l for l in libs]) if sys.platform == "darwin": # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(["-Xcompiler", "-undefined,dynamic_lookup"]) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index("-u") cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError as e: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == "darwin" and nvcc_version >= "4.1": cmd.extend(["-Xlinker", "-pie"]) # cmd.append("--ptxas-options=-v") #uncomment this to see # register and shared-mem requirements _logger.debug("Running cmd %s", " ".join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split("\n"): if not eline: continue if "skipping incompatible" in eline: # ld is skipping an incompatible library continue if "declared but never referenced" in eline: continue if "statement is unreachable" in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split("\n")): print(i + 1, l, file=sys.stderr) print("===============================", file=sys.stderr) # filter the output from the compiler for l in nvcc_stderr.split("\n"): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(":") :].startswith(": warning: variable"): continue if l[l.index(":") :].startswith(": warning: label"): continue except Exception: pass print(l, file=sys.stderr) print(nvcc_stdout) print(cmd) raise Exception("nvcc return status", p.returncode, "for cmd", " ".join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print(nvcc_stdout) if nvcc_stdout: # this doesn't happen to my knowledge print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr) if py_module: # touch the __init__ file open(os.path.join(location, "__init__.py"), "w").close() return dlimport(lib_filename)
def compile_str( module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True, hide_symbols=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :param hide_symbols: if True (the default), hide all symbols from the library symbol table unless explicitely exported. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in ["-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno"]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') cuda_root = config.cuda.root # The include dirs gived by the user should have precedence over # the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if any(ld == os.path.join(cuda_root, 'lib') or ld == os.path.join(cuda_root, 'lib64') for ld in lib_dirs): warnings.warn("You have the cuda library directory in your " "lib_dirs. This has been known to cause problems " "and should not be done.") if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = open(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join(location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', # nvcc argument preargs1 = [] for pa in preargs: for pattern in ['-O', '-arch=', '-ccbin=', '-G', '-g', '-I', '-L', '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math', '--use-local-env', '--cl-version=']: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments # Don't put -G by default, as it slow things down. # We aren't sure if -g slow things down, so we don't put it by default. cmd = [nvcc_path, '-shared'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) # remove the complaints for the duplication of `double round(double)` # in both math_functions.h and pymath.h, # by not including the one in pymath.h cmd.extend(['-D HAVE_ROUND']) else: if hide_symbols: preargs2.append('-fvisibility=hidden') if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError as e: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) # cmd.append("--ptxas-options=-v") #uncomment this to see # register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: # ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split('\n')): print(i + 1, l, file=sys.stderr) print('===============================', file=sys.stderr) # filter the output from the compiler for l in nvcc_stderr.split('\n'): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(':'):].startswith(': warning: variable'): continue if l[l.index(':'):].startswith(': warning: label'): continue except Exception: pass print(l, file=sys.stderr) print(nvcc_stdout) print(cmd) raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print(nvcc_stdout) if nvcc_stdout: # this doesn't happen to my knowledge print("DEBUG: nvcc STDOUT", nvcc_stdout, file=sys.stderr) if py_module: # touch the __init__ file open(os.path.join(location, "__init__.py"), 'w').close() return dlimport(lib_filename)
# Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) #cmd.append("--ptxas-options=-v") #uncomment this to see #register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: #ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline)
class NVCC_compiler(object): @staticmethod def version_str(): return "nvcc " + nvcc_version @staticmethod def compile_args(): """ This args will be received by compile_str() in the preargs paramter. They will also be included in the "hard" part of the key module. """ flags = [flag for flag in config.nvcc.flags.split(' ') if flag] if config.nvcc.fastmath: flags.append('-use_fast_math') cuda_ndarray_cuh_hash = hash_from_file( os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh')) flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash) # numpy 1.7 deprecated the following macro but the didn't # existed in the past numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]] if bool(numpy_ver < [1, 7]): flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY") flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED") flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE") flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL") flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS") flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS") # If the user didn't specify architecture flags add them if not any(['-arch=sm_' in f for f in flags]): # We compile cuda_ndarray.cu during import. # We should not add device properties at that time. # As the device is not selected yet! # TODO: re-compile cuda_ndarray when we bind to a GPU? import theano.sandbox.cuda if hasattr(theano.sandbox, 'cuda'): n = theano.sandbox.cuda.use.device_number if n is None: _logger.warn( "We try to get compilation arguments for CUDA" " code, but the GPU device is not initialized." " This is probably caused by an Op that work on" " the GPU that don't inherit from GpuOp." " We Initialize the GPU now.") theano.sandbox.cuda.use( "gpu", force=True, default_to_move_computation_to_gpu=False, move_shared_float32_to_gpu=False, enable_cuda=False) n = theano.sandbox.cuda.use.device_number p = theano.sandbox.cuda.device_properties(n) flags.append('-arch=sm_' + str(p['major']) + str(p['minor'])) return flags @staticmethod def compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[], rpaths=rpath_defaults, py_module=True): """:param module_name: string (this has been embedded in the src_code :param src_code: a complete c or c++ source listing for the module :param location: a pre-existing filesystem directory where the cpp file and .so will be written :param include_dirs: a list of include directory names (each gets prefixed with -I) :param lib_dirs: a list of library search path directory names (each gets prefixed with -L) :param libs: a list of libraries to link with (each gets prefixed with -l) :param preargs: a list of extra compiler arguments :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`. :param py_module: if False, compile to a shared library, but do not import as a Python module. :returns: dynamically-imported python module of the compiled code. (unless py_module is False, in that case returns None.) :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory Otherwise nvcc never finish. """ rpaths = list(rpaths) if sys.platform == "win32": # Remove some compilation args that cl.exe does not understand. # cl.exe is the compiler used by nvcc on Windows. for a in [ "-Wno-write-strings", "-Wno-unused-label", "-Wno-unused-variable", "-fno-math-errno" ]: if a in preargs: preargs.remove(a) if preargs is None: preargs = [] else: preargs = list(preargs) if sys.platform != 'win32': preargs.append('-fPIC') no_opt = False cuda_root = config.cuda.root #The include dirs gived by the user should have precedence over #the standards ones. include_dirs = include_dirs + std_include_dirs() if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) libs = std_libs() + libs if 'cudart' not in libs: libs.append('cudart') lib_dirs = std_lib_dirs() + lib_dirs if cuda_root: lib_dirs.append(os.path.join(cuda_root, 'lib')) # from Benjamin Schrauwen April 14 2010 if sys.platform != 'darwin': # OS X uses universal libraries lib_dirs.append(os.path.join(cuda_root, 'lib64')) if sys.platform != 'darwin': # sometimes, the linker cannot find -lpython so we need to tell it # explicitly where it is located # this returns somepath/lib/python2.x python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ standard_lib=1) python_lib = os.path.dirname(python_lib) if python_lib not in lib_dirs: lib_dirs.append(python_lib) cppfilename = os.path.join(location, 'mod.cu') cppfile = open(cppfilename, 'w') _logger.debug('Writing module C++ code to %s', cppfilename) cppfile.write(src_code) cppfile.close() lib_filename = os.path.join( location, '%s.%s' % (module_name, get_lib_extension())) _logger.debug('Generating shared lib %s', lib_filename) # TODO: Why do these args cause failure on gtx285 that has 1.3 # compute capability? '--gpu-architecture=compute_13', # '--gpu-code=compute_13', #nvcc argument preargs1 = [] for pa in preargs: for pattern in [ '-O', '-arch=', '-ccbin=' '--fmad', '--ftz', '--maxrregcount', '--prec-div', '--prec-sqrt', '--use_fast_math', '-fmad', '-ftz', '-maxrregcount', '-prec-div', '-prec-sqrt', '-use_fast_math' ]: if pa.startswith(pattern): preargs1.append(pa) preargs2 = [pa for pa in preargs if pa not in preargs1] # other arguments cmd = [nvcc_path, '-shared', '-g'] + preargs1 if config.nvcc.compiler_bindir: cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) if sys.platform == 'win32': # add flags for Microsoft compiler to create .pdb files preargs2.extend(['/Zi', '/MD']) cmd.extend(['-Xlinker', '/DEBUG']) if local_bitwidth() == 64: cmd.append('-m64') else: cmd.append('-m32') if len(preargs2) > 0: cmd.extend(['-Xcompiler', ','.join(preargs2)]) # We should not use rpath if possible. If the user provided # provided an cuda.root flag, we need to add one, but # otherwise, we don't add it. See gh-1540 and # https://wiki.debian.org/RpathIssue for details. if (user_provided_cuda_root and os.path.exists(os.path.join(config.cuda.root, 'lib'))): rpaths.append(os.path.join(config.cuda.root, 'lib')) if sys.platform != 'darwin': # the CUDA libs are universal (contain both 32-bit and 64-bit) rpaths.append(os.path.join(config.cuda.root, 'lib64')) if sys.platform != 'win32': # the -rpath option is not understood by the Microsoft linker for rpath in rpaths: cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])]) cmd.extend('-I%s' % idir for idir in include_dirs) cmd.extend(['-o', lib_filename]) cmd.append(os.path.split(cppfilename)[-1]) cmd.extend(['-L%s' % ldir for ldir in lib_dirs]) cmd.extend(['-l%s' % l for l in libs]) if sys.platform == 'darwin': # This tells the compiler to use the already-loaded python # symbols (which should always be the right ones). cmd.extend(['-Xcompiler', '-undefined,dynamic_lookup']) # Remove "-u Symbol" arguments, since they are usually not # relevant for the new compilation, even if they were used for # compiling python. If they are necessary, the nvcc syntax is # "-U Symbol" with a capital U. done = False while not done: try: indexof = cmd.index('-u') cmd.pop(indexof) # Remove -u cmd.pop(indexof) # Remove argument to -u except ValueError, e: done = True # CUDA Toolkit v4.1 Known Issues: # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option # to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) #cmd.append("--ptxas-options=-v") #uncomment this to see #register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: #ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline) if p.returncode: for i, l in enumerate(src_code.split('\n')): print >> sys.stderr, i + 1, l print >> sys.stderr, '===============================' # filter the output from the compiler for l in nvcc_stderr.split('\n'): if not l: continue # filter out the annoying declaration warnings try: if l[l.index(':'):].startswith(': warning: variable'): continue if l[l.index(':'):].startswith(': warning: label'): continue except Exception: pass print >> sys.stderr, l print nvcc_stdout print cmd raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) elif config.cmodule.compilation_warning and nvcc_stdout: print nvcc_stdout if nvcc_stdout: # this doesn't happen to my knowledge print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout if py_module: #touch the __init__ file open(os.path.join(location, "__init__.py"), 'w').close() return dlimport(lib_filename)
# to nvcc this option is not recognized and generates an error # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie # Passing -Xlinker -pie stops -no_pie from getting passed if sys.platform == 'darwin' and nvcc_version >= '4.1': cmd.extend(['-Xlinker', '-pie']) #cmd.append("--ptxas-options=-v") #uncomment this to see #register and shared-mem requirements _logger.debug('Running cmd %s', ' '.join(cmd)) orig_dir = os.getcwd() try: os.chdir(location) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) nvcc_stdout, nvcc_stderr = decode_iter(p.communicate()[:2]) finally: os.chdir(orig_dir) for eline in nvcc_stderr.split('\n'): if not eline: continue if 'skipping incompatible' in eline: #ld is skipping an incompatible library continue if 'declared but never referenced' in eline: continue if 'statement is unreachable' in eline: continue _logger.info("NVCC: %s", eline)