def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code code, impl_arg_info = generate_code(kernel) if self.kernel.options.write_cl: output = code if self.kernel.options.highlight_cl: output = get_highlighted_cl_code(output) if self.kernel.options.write_cl is True: print(output) else: with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) if self.kernel.options.edit_cl: from pytools import invoke_editor code = invoke_editor(code, "code.cl") import pyopencl as cl cl_program = cl.Program(self.context, code) cl_kernel = getattr( cl_program.build(options=kernel.options.cl_build_options), kernel.name) return _CLKernelInfo( kernel=kernel, cl_kernel=cl_kernel, impl_arg_info=impl_arg_info, invoker=generate_invoker( kernel, cl_kernel, impl_arg_info, self.kernel.options))
def program_info(self, entrypoint, arg_to_dtype_set=frozenset(), all_kwargs=None): program = self.get_typed_and_scheduled_translation_unit( entrypoint, arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(program) dev_code = codegen_result.device_code() host_code = codegen_result.host_code() all_code = "\n".join([dev_code, "", host_code]) if self.program[entrypoint].options.write_cl: output = all_code if self.program[entrypoint].options.highlight_cl: output = get_highlighted_code(output) if self.program[entrypoint].options.write_cl is True: print(output) else: with open(self.program[entrypoint].options.write_cl, "w") as outf: outf.write(output) if self.program[entrypoint].options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.c") # update code from editor all_code = "\n".join([dev_code, "", host_code]) c_kernels = [] for dp in codegen_result.device_programs: c_kernels.append( CompiledCKernel( dp, codegen_result.implemented_data_infos[entrypoint], all_code, self.program.target, self.compiler)) return _KernelInfo(program=program, c_kernels=c_kernels, implemented_data_info=codegen_result. implemented_data_infos[entrypoint], invoker=self.get_invoker(program, entrypoint, codegen_result))
def translation_unit_info(self, entrypoint, arg_to_dtype_set=frozenset(), all_kwargs=None): program = self.get_typed_and_scheduled_translation_unit( entrypoint, arg_to_dtype_set) # FIXME: now just need to add the types to the arguments from loopy.codegen import generate_code_v2 from loopy.target.execution import get_highlighted_code codegen_result = generate_code_v2(program) dev_code = codegen_result.device_code() if program[entrypoint].options.write_cl: #FIXME: redirect to "translation unit" level option as well. output = dev_code if self.program[entrypoint].options.highlight_cl: output = get_highlighted_code(output) if self.program[entrypoint].options.write_cl is True: print(output) else: with open(self.program[entrypoint].options.write_cl, "w") as outf: outf.write(output) if program[entrypoint].options.edit_cl: #FIXME: redirect to "translation unit" level option as well. from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.cl") import pyopencl as cl #FIXME: redirect to "translation unit" level option as well. cl_program = ( cl.Program(self.context, dev_code) .build(options=program[entrypoint].options.cl_build_options)) cl_kernels = _Kernels() for dp in cl_program.kernel_names.split(";"): setattr(cl_kernels, dp, getattr(cl_program, dp)) return _KernelInfo( program=program, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_infos[ entrypoint], invoker=self.get_invoker(program, entrypoint, codegen_result))
def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() if self.kernel.options.write_cl: output = dev_code if self.kernel.options.highlight_cl: output = get_highlighted_cl_code(output) if self.kernel.options.write_cl is True: print(output) else: with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) if self.kernel.options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.cl") import pyopencl as cl logger.info("%s: opencl compilation start" % self.kernel.name) cl_program = ( cl.Program(self.context, dev_code) .build(options=kernel.options.cl_build_options)) cl_kernels = _CLKernels() for dp in codegen_result.device_programs: setattr(cl_kernels, dp.name, getattr(cl_program, dp.name)) logger.info("%s: opencl compilation done" % self.kernel.name) return _CLKernelInfo( kernel=kernel, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_info, invoker=generate_invoker(kernel, codegen_result))
def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() host_code = codegen_result.host_code() all_code = '\n'.join([dev_code, '', host_code]) if self.kernel.options.write_cl: output = all_code if self.kernel.options.highlight_cl: output = get_highlighted_code(output) if self.kernel.options.write_cl is True: print(output) else: with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) if self.kernel.options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.c") # update code from editor all_code = '\n'.join([dev_code, '', host_code]) c_kernels = [] for dp in codegen_result.device_programs: c_kernels.append(CompiledCKernel(dp, codegen_result.implemented_data_info, all_code, self.kernel.target, self.compiler)) return _KernelInfo( kernel=kernel, c_kernels=c_kernels, implemented_data_info=codegen_result.implemented_data_info, invoker=self.get_invoker(kernel, codegen_result))
def main(): from argparse import ArgumentParser parser = ArgumentParser(description="Stand-alone loopy frontend") parser.add_argument("infile", metavar="INPUT_FILE") parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE", help="Defaults to stdout ('-').", nargs="?") parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") parser.add_argument("--target", choices=("opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"), default="opencl") parser.add_argument("--name") parser.add_argument("--transform") parser.add_argument("--edit-code", action="store_true") parser.add_argument("--occa-defines") parser.add_argument("--occa-add-dummy-arg", action="store_true") parser.add_argument("--print-ir", action="store_true") args = parser.parse_args() if args.target == "opencl": from loopy.target.opencl import OpenCLTarget target = OpenCLTarget() elif args.target == "ispc": from loopy.target.ispc import ISPCTarget target = ISPCTarget() elif args.target == "ispc-occa": from loopy.target.ispc import ISPCTarget target = ISPCTarget(occa_mode=True) elif args.target == "c": from loopy.target.c import CTarget target = CTarget() elif args.target == "c-fortran": from loopy.target.c import CTarget target = CTarget(fortran_abi=True) elif args.target == "cuda": from loopy.target.cuda import CudaTarget target = CudaTarget() else: raise ValueError("unknown target: %s" % target) lp.set_default_target(target) lang = None if args.infile == "-": infile_content = sys.stdin.read() else: from os.path import splitext _, ext = splitext(args.infile) lang = { ".py": "loopy", ".loopy": "loopy", ".floopy": "fortran", ".f90": "fortran", ".fpp": "fortran", ".f": "fortran", ".f77": "fortran", }.get(ext) with open(args.infile) as infile_fd: infile_content = infile_fd.read() if args.lang is not None: lang = args.lang if lang is None: raise RuntimeError("unable to deduce input language " "(wrong input file extension? --lang flag?)") if lang == "loopy": # {{{ path wrangling from os.path import dirname, abspath from os import getcwd infile_dirname = dirname(args.infile) if infile_dirname: infile_dirname = abspath(infile_dirname) else: infile_dirname = getcwd() sys.path.append(infile_dirname) # }}} data_dic = {} data_dic["lp"] = lp data_dic["np"] = np if args.occa_defines: with open(args.occa_defines) as defines_fd: occa_define_code = defines_to_python_code(defines_fd.read()) exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic) with open(args.infile) as infile_fd: exec(compile(infile_content, args.infile, "exec"), data_dic) if args.transform: with open(args.transform) as xform_fd: exec(compile(xform_fd.read(), args.transform, "exec"), data_dic) try: kernel = data_dic["lp_knl"] except KeyError: raise RuntimeError("loopy-lang requires 'lp_knl' " "to be defined on exit") if args.name is not None: kernel = kernel.copy(name=args.name) kernels = [kernel] elif lang in ["fortran", "floopy", "fpp"]: pre_transform_code = None if args.transform: with open(args.transform) as xform_fd: pre_transform_code = xform_fd.read() if args.occa_defines: if pre_transform_code is None: pre_transform_code = "" with open(args.occa_defines) as defines_fd: pre_transform_code = ( defines_to_python_code(defines_fd.read()) + pre_transform_code) kernels = lp.parse_transformed_fortran( infile_content, pre_transform_code=pre_transform_code, filename=args.infile) if args.name is not None: kernels = [ kernel for kernel in kernels if kernel.name == args.name ] if not kernels: raise RuntimeError("no kernels found (name specified: %s)" % args.name) else: raise RuntimeError("unknown language: '%s'" % args.lang) if args.print_ir: for kernel in kernels: print(kernel, file=sys.stderr) if args.occa_add_dummy_arg: new_kernels = [] for kernel in kernels: new_args = [lp.ArrayArg("occa_info", np.int32, shape=None) ] + kernel.args new_kernels.append(kernel.copy(args=new_args)) kernels = new_kernels del new_kernels codes = [] from loopy.codegen import generate_code for kernel in kernels: kernel = lp.preprocess_kernel(kernel) code, impl_arg_info = generate_code(kernel) codes.append(code) if args.outfile is not None: outfile = args.outfile else: outfile = "-" code = "\n\n".join(codes) # {{{ edit code if requested import os edit_kernel_env = os.environ.get("LOOPY_EDIT_KERNEL") need_edit = args.edit_code if not need_edit and edit_kernel_env is not None: # Do not replace with "any()"--Py2.6/2.7 bug doesn't like # comprehensions in functions with exec(). for k in kernels: if edit_kernel_env.lower() in k.name.lower(): need_edit = True if need_edit: from pytools import invoke_editor code = invoke_editor(code, filename="edit.cl") # }}} if outfile == "-": sys.stdout.write(code) else: with open(outfile, "w") as outfile_fd: outfile_fd.write(code)
def main(): from argparse import ArgumentParser parser = ArgumentParser(description="Stand-alone loopy frontend") parser.add_argument("infile", metavar="INPUT_FILE") parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE", help="Defaults to stdout ('-').", nargs="?") parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") parser.add_argument("--target", choices=("opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"), default="opencl") parser.add_argument("--name") parser.add_argument("--transform") parser.add_argument("--edit-code", action="store_true") parser.add_argument("--occa-defines") parser.add_argument("--occa-add-dummy-arg", action="store_true") parser.add_argument("--print-ir", action="store_true") args = parser.parse_args() if args.target == "opencl": from loopy.target.opencl import OpenCLTarget target = OpenCLTarget() elif args.target == "ispc": from loopy.target.ispc import ISPCTarget target = ISPCTarget() elif args.target == "ispc-occa": from loopy.target.ispc import ISPCTarget target = ISPCTarget(occa_mode=True) elif args.target == "c": from loopy.target.c import CTarget target = CTarget() elif args.target == "c-fortran": from loopy.target.c import CTarget target = CTarget(fortran_abi=True) elif args.target == "cuda": from loopy.target.cuda import CudaTarget target = CudaTarget() else: raise ValueError("unknown target: %s" % target) lp.set_default_target(target) lang = None if args.infile == "-": infile_content = sys.stdin.read() else: from os.path import splitext _, ext = splitext(args.infile) lang = { ".py": "loopy", ".loopy": "loopy", ".floopy": "fortran", ".f90": "fortran", ".fpp": "fortran", ".f": "fortran", ".f77": "fortran", }.get(ext) with open(args.infile, "r") as infile_fd: infile_content = infile_fd.read() if args.lang is not None: lang = args.lang if lang is None: raise RuntimeError("unable to deduce input language " "(wrong input file extension? --lang flag?)") if lang == "loopy": # {{{ path wrangling from os.path import dirname, abspath from os import getcwd infile_dirname = dirname(args.infile) if infile_dirname: infile_dirname = abspath(infile_dirname) else: infile_dirname = getcwd() sys.path.append(infile_dirname) # }}} data_dic = {} data_dic["lp"] = lp data_dic["np"] = np if args.occa_defines: with open(args.occa_defines, "r") as defines_fd: occa_define_code = defines_to_python_code(defines_fd.read()) exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic) with open(args.infile, "r") as infile_fd: exec(compile(infile_content, args.infile, "exec"), data_dic) if args.transform: with open(args.transform, "r") as xform_fd: exec(compile(xform_fd.read(), args.transform, "exec"), data_dic) try: kernel = data_dic["lp_knl"] except KeyError: raise RuntimeError("loopy-lang requires 'lp_knl' " "to be defined on exit") if args.name is not None: kernel = kernel.copy(name=args.name) kernels = [kernel] elif lang in ["fortran", "floopy", "fpp"]: pre_transform_code = None if args.transform: with open(args.transform, "r") as xform_fd: pre_transform_code = xform_fd.read() if args.occa_defines: if pre_transform_code is None: pre_transform_code = "" with open(args.occa_defines, "r") as defines_fd: pre_transform_code = defines_to_python_code(defines_fd.read()) + pre_transform_code kernels = lp.parse_transformed_fortran( infile_content, pre_transform_code=pre_transform_code, filename=args.infile ) if args.name is not None: kernels = [kernel for kernel in kernels if kernel.name == args.name] if not kernels: raise RuntimeError("no kernels found (name specified: %s)" % args.name) else: raise RuntimeError("unknown language: '%s'" % args.lang) if args.print_ir: for kernel in kernels: print(kernel, file=sys.stderr) if args.occa_add_dummy_arg: new_kernels = [] for kernel in kernels: new_args = [lp.GlobalArg("occa_info", np.int32, shape=None)] + kernel.args new_kernels.append(kernel.copy(args=new_args)) kernels = new_kernels del new_kernels codes = [] from loopy.codegen import generate_code for kernel in kernels: kernel = lp.preprocess_kernel(kernel) code, impl_arg_info = generate_code(kernel) codes.append(code) if args.outfile is not None: outfile = args.outfile else: outfile = "-" code = "\n\n".join(codes) # {{{ edit code if requested import os edit_kernel_env = os.environ.get("LOOPY_EDIT_KERNEL") need_edit = args.edit_code if not need_edit and edit_kernel_env is not None: # Do not replace with "any()"--Py2.6/2.7 bug doesn't like # comprehensions in functions with exec(). for k in kernels: if edit_kernel_env.lower() in k.name.lower(): need_edit = True if need_edit: from pytools import invoke_editor code = invoke_editor(code, filename="edit.cl") # }}} if outfile == "-": sys.stdout.write(code) else: with open(outfile, "w") as outfile_fd: outfile_fd.write(code)