def prepare_temp_files_for_error_msg(self): """Prepare temp file with source code, and return list of temp file names""" temp_filename = util.get_temp_filename(suffix=self.kernel_source.get_suffix()) util.write_file(temp_filename, self.kernel_string) ret = [temp_filename] ret.extend(self.temp_files.values()) return ret
def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_names): """ prepare the kernel string along with any additional files The first file in the list is allowed to include or read in the others The files beyond the first are considered additional files that may also contain tunable parameters For each file beyond the first this function creates a temporary file with preprocessors statements inserted. Occurences of the original filenames in the first file are replaced with their temporary counterparts. :param kernel_name: A string specifying the kernel name. :type kernel_name: string :param params: A dictionary with the tunable parameters for this particular instance. :type params: dict() :param grid: The grid dimensions for this instance. The grid dimensions are also inserted into the code as if they are tunable parameters for convenience. :type grid: tuple() :param threads: The thread block dimensions for this instance. The thread block are also inserted into the code as if they are tunable parameters for convenience. :type threads: tuple() :param block_size_names: A list of strings that denote the names for the thread block dimensions. :type block_size_names: list(string) """ temp_files = dict() for i, f in enumerate(self.kernel_sources): if i > 0 and not util.looks_like_a_filename(f): raise ValueError('When passing multiple kernel sources, the secondary entries must be filenames') ks = self.get_kernel_string(i, params) # add preprocessor statements n, ks = util.prepare_kernel_string(kernel_name, ks, params, grid, threads, block_size_names, self.lang) if i == 0: # primary kernel source name = n kernel_string = ks continue # save secondary kernel sources to temporary files # generate temp filename with the same extension temp_file = util.get_temp_filename(suffix="." + f.split(".")[-1]) temp_files[f] = temp_file util.write_file(temp_file, ks) # replace occurences of the additional file's name in the first kernel_string with the name of the temp file kernel_string = kernel_string.replace(f, temp_file) return name, kernel_string, temp_files
def compile_and_benchmark(self, gpu_args, params, kernel_options, tuning_options): """ Compile and benchmark a kernel instance based on kernel strings and parameters """ instance_string = util.get_instance_string(params) logging.debug('compile_and_benchmark ' + instance_string) mem_usage = round( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0, 1) logging.debug('Memory usage : %2.2f MB', mem_usage) verbose = tuning_options.verbose instance = self.create_kernel_instance(kernel_options, params, verbose) if instance is None: return None try: #compile the kernel func = self.compile_kernel(instance, verbose) if func is None: return None #add constant memory arguments to compiled module if kernel_options.cmem_args is not None: self.dev.copy_constant_memory_args(kernel_options.cmem_args) #test kernel for correctness and benchmark if tuning_options.answer is not None: self.check_kernel_correctness(func, gpu_args, instance, tuning_options.answer, tuning_options.atol, tuning_options.verify, verbose) #benchmark time = self.benchmark(func, gpu_args, instance, verbose) except Exception as e: #dump kernel_string to temp file temp_filename = util.get_temp_filename(suffix=".c") util.write_file(temp_filename, instance.kernel_string) print("Error while compiling or benchmarking, see source files: " + temp_filename + " ".join(instance.temp_files.values())) raise e #clean up any temporary files, if no error occured for v in instance.temp_files.values(): util.delete_temp_file(v) return time
def compile_and_benchmark(self, gpu_args, params, kernel_options, tuning_options): """ Compile and benchmark a kernel instance based on kernel strings and parameters """ instance_string = util.get_instance_string(params) logging.debug('compile_and_benchmark ' + instance_string) mem_usage = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024.0, 1) logging.debug('Memory usage : %2.2f MB', mem_usage) verbose = tuning_options.verbose instance = self.create_kernel_instance(kernel_options, params, verbose) if instance is None: return None try: #compile the kernel func = self.compile_kernel(instance, verbose) if func is None: return None #add constant memory arguments to compiled module if kernel_options.cmem_args is not None: self.dev.copy_constant_memory_args(kernel_options.cmem_args) #add texture memory arguments to compiled module if kernel_options.texmem_args is not None: self.dev.copy_texture_memory_args(kernel_options.texmem_args) #test kernel for correctness and benchmark if tuning_options.answer is not None: self.check_kernel_output(func, gpu_args, instance, tuning_options.answer, tuning_options.atol, tuning_options.verify, verbose) #benchmark time = self.benchmark(func, gpu_args, instance, tuning_options.times, verbose) except Exception as e: #dump kernel_string to temp file temp_filename = util.get_temp_filename(suffix=".c") util.write_file(temp_filename, instance.kernel_string) print("Error while compiling or benchmarking, see source files: " + temp_filename + " ".join(instance.temp_files.values())) raise e #clean up any temporary files, if no error occured for v in instance.temp_files.values(): util.delete_temp_file(v) return time
def compile(self, kernel_name, kernel_string): """call the C compiler to compile the kernel, return the function :param kernel_name: The name of the kernel to be compiled, used to lookup the function after compilation. :type kernel_name: string :param kernel_string: The C code that contains the function `kernel_name` :type kernel_string: string :returns: An ctypes function that can be called directly. :rtype: ctypes._FuncPtr """ logging.debug('compiling ' + kernel_name) if self.lib != None: self.cleanup_lib() compiler_options = ["-fPIC"] #detect openmp if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string: logging.debug('set using_openmp to true') self.using_openmp = True if self.compiler == "pgfortran": compiler_options.append("-mp") else: compiler_options.append("-fopenmp") #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++ if ("#include <cuda" in kernel_string) or ("__global__" in kernel_string): if self.compiler == "g++" and self.nvcc_available: self.compiler = "nvcc" #select right suffix based on compiler suffix = ".cc" if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]: suffix = ".F90" if self.compiler == "nvcc": suffix = suffix[:-1] + "u" compiler_options = ["-Xcompiler=" + c for c in compiler_options] if ".c" in suffix: if not "extern \"C\"" in kernel_string: kernel_string = "extern \"C\" {\n" + kernel_string + "\n}" #copy user specified compiler options to current list if self.compiler_options: compiler_options += self.compiler_options lib_args = [] if "CL/cl.h" in kernel_string: lib_args = ["-lOpenCL"] logging.debug('using compiler ' + self.compiler) logging.debug('compiler_options ' + " ".join(compiler_options)) logging.debug('lib_args ' + " ".join(lib_args)) source_file = get_temp_filename(suffix=suffix) filename = ".".join(source_file.split(".")[:-1]) #detect Fortran modules match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string) if match: if self.compiler == "gfortran": kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name elif self.compiler in ["ftn", "ifort"]: kernel_name = match.group(1) + "_mp_" + kernel_name + "_" elif self.compiler == "pgfortran": kernel_name = match.group(1) + "_" + kernel_name + "_" try: write_file(source_file, kernel_string) lib_extension = ".so" if platform.system() == "Darwin": lib_extension = ".dylib" subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"]) subprocess.check_call([self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] + lib_args) self.lib = numpy.ctypeslib.load_library(filename, '.') func = getattr(self.lib, kernel_name) func.restype = C.c_float finally: delete_temp_file(source_file) delete_temp_file(filename + ".o") delete_temp_file(filename + ".so") delete_temp_file(filename + ".dylib") return func
def compile(self, kernel_instance): """call the C compiler to compile the kernel, return the function :param kernel_instance: An object representing the specific instance of the tunable kernel in the parameter space. :type kernel_instance: kernel_tuner.core.KernelInstance :returns: An ctypes function that can be called directly. :rtype: ctypes._FuncPtr """ logging.debug('compiling ' + kernel_instance.name) kernel_string = kernel_instance.kernel_string kernel_name = kernel_instance.name if self.lib != None: self.cleanup_lib() compiler_options = ["-fPIC"] #detect openmp if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string: logging.debug('set using_openmp to true') self.using_openmp = True if self.compiler == "pgfortran": compiler_options.append("-mp") else: compiler_options.append("-fopenmp") #if filename is known, use that one suffix = kernel_instance.kernel_source.get_user_suffix() #if code contains device code, suffix .cu is required device_code_signals = ["__global", "__syncthreads()", "threadIdx"] if any([snippet in kernel_string for snippet in device_code_signals]): suffix = ".cu" #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++ if (suffix == ".cu") or ("#include <cuda" in kernel_string) or ("cudaMemcpy" in kernel_string): if self.compiler == "g++" and self.nvcc_available: self.compiler = "nvcc" if suffix is None: #select right suffix based on compiler suffix = ".cc" if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]: suffix = ".F90" if self.compiler == "nvcc": compiler_options = ["-Xcompiler=" + c for c in compiler_options] #this basically checks if we aren't compiling Fortran #at the moment any C, C++, or CUDA code is assumed to use extern "C" linkage if ".c" in suffix: if not "extern \"C\"" in kernel_string: kernel_string = "extern \"C\" {\n" + kernel_string + "\n}" #copy user specified compiler options to current list if self.compiler_options: compiler_options += self.compiler_options lib_args = [] if "CL/cl.h" in kernel_string: lib_args = ["-lOpenCL"] logging.debug('using compiler ' + self.compiler) logging.debug('compiler_options ' + " ".join(compiler_options)) logging.debug('lib_args ' + " ".join(lib_args)) source_file = get_temp_filename(suffix=suffix) filename = ".".join(source_file.split(".")[:-1]) #detect Fortran modules match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string) if match: if self.compiler == "gfortran": kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name elif self.compiler in ["ftn", "ifort"]: kernel_name = match.group(1) + "_mp_" + kernel_name + "_" elif self.compiler == "pgfortran": kernel_name = match.group(1) + "_" + kernel_name + "_" else: #for functions outside of modules if self.compiler in ["gfortran", "ftn", "ifort", "pgfortran"]: kernel_name = kernel_name + "_" try: write_file(source_file, kernel_string) lib_extension = ".so" if platform.system() == "Darwin": lib_extension = ".dylib" subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"]) subprocess.check_call([self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] + lib_args) self.lib = numpy.ctypeslib.load_library(filename, '.') func = getattr(self.lib, kernel_name) func.restype = C.c_float finally: delete_temp_file(source_file) delete_temp_file(filename + ".o") delete_temp_file(filename + ".so") delete_temp_file(filename + ".dylib") return func
def compile(self, kernel_name, kernel_string): """call the C compiler to compile the kernel, return the function :param kernel_name: The name of the kernel to be compiled, used to lookup the function after compilation. :type kernel_name: string :param kernel_string: The C code that contains the function `kernel_name` :type kernel_string: string :returns: An ctypes function that can be called directly. :rtype: ctypes._FuncPtr """ logging.debug('compiling ' + kernel_name) if self.lib != None: self.cleanup_lib() suffix = ".cc" if not "extern \"C\"" in kernel_string: kernel_string = "extern \"C\" {\n" + kernel_string + "\n}" compiler_options = ["-fPIC"] if "#include <omp.h>" in kernel_string: logging.debug('set using_openmp to true') self.using_openmp = True compiler_options.append("-fopenmp") if ("#include <cuda" in kernel_string) or ("__global__" in kernel_string): if self.nvcc_available: self.compiler = "nvcc" if self.compiler == "nvcc": suffix = suffix[:-1] + "u" compiler_options = ["-Xcompiler=" + c for c in compiler_options] if self.compiler_options: compiler_options += self.compiler_options lib_args = [] if "CL/cl.h" in kernel_string: lib_args = ["-lOpenCL"] logging.debug('using compiler ' + self.compiler) logging.debug('compiler_options ' + " ".join(compiler_options)) logging.debug('lib_args ' + " ".join(lib_args)) source_file = get_temp_filename(suffix=suffix) filename = ".".join(source_file.split(".")[:-1]) try: write_file(source_file, kernel_string) subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename+".o"]) subprocess.check_call([self.compiler, filename+".o"] + compiler_options + ["-shared", "-o", filename+".so"] + lib_args) self.lib = numpy.ctypeslib.load_library(filename, '.') func = getattr(self.lib, kernel_name) func.restype = C.c_float finally: delete_temp_file(source_file) delete_temp_file(filename+".o") delete_temp_file(filename+".so") return func
def compile(self, kernel_name, kernel_string): """call the C compiler to compile the kernel, return the function :param kernel_name: The name of the kernel to be compiled, used to lookup the function after compilation. :type kernel_name: string :param kernel_string: The C code that contains the function `kernel_name` :type kernel_string: string :returns: An ctypes function that can be called directly. :rtype: ctypes._FuncPtr """ logging.debug('compiling ' + kernel_name) if self.lib != None: self.cleanup_lib() compiler_options = ["-fPIC"] #detect openmp if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string: logging.debug('set using_openmp to true') self.using_openmp = True if self.compiler == "pgfortran": compiler_options.append("-mp") else: compiler_options.append("-fopenmp") #select right suffix based on compiler suffix = ".cc" #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++ if ("#include <cuda" in kernel_string) or ("cudaMemcpy" in kernel_string): if self.compiler == "g++" and self.nvcc_available: self.compiler = "nvcc" #if contains device code suffix .cu is required by nvcc if self.compiler == "nvcc" and "__global__" in kernel_string: suffix = ".cu" if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]: suffix = ".F90" if self.compiler == "nvcc": compiler_options = ["-Xcompiler=" + c for c in compiler_options] if ".c" in suffix: if not "extern \"C\"" in kernel_string: kernel_string = "extern \"C\" {\n" + kernel_string + "\n}" #copy user specified compiler options to current list if self.compiler_options: compiler_options += self.compiler_options lib_args = [] if "CL/cl.h" in kernel_string: lib_args = ["-lOpenCL"] logging.debug('using compiler ' + self.compiler) logging.debug('compiler_options ' + " ".join(compiler_options)) logging.debug('lib_args ' + " ".join(lib_args)) source_file = get_temp_filename(suffix=suffix) filename = ".".join(source_file.split(".")[:-1]) #detect Fortran modules match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string) if match: if self.compiler == "gfortran": kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name elif self.compiler in ["ftn", "ifort"]: kernel_name = match.group(1) + "_mp_" + kernel_name + "_" elif self.compiler == "pgfortran": kernel_name = match.group(1) + "_" + kernel_name + "_" try: write_file(source_file, kernel_string) lib_extension = ".so" if platform.system() == "Darwin": lib_extension = ".dylib" subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"]) subprocess.check_call([self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] + lib_args) self.lib = numpy.ctypeslib.load_library(filename, '.') func = getattr(self.lib, kernel_name) func.restype = C.c_float finally: delete_temp_file(source_file) delete_temp_file(filename+".o") delete_temp_file(filename+".so") delete_temp_file(filename+".dylib") return func