Exemplo n.º 1
0
 def transform(self, tree, program_cfg):
     arg_cfg, tune_cfg = program_cfg
     tree = PyBasicConversions().visit(tree)
     tree = Backend(arg_cfg, self.symbol_table).visit(tree)
     tree = ConstantFold().visit(tree)
     tree.name = self.original_tree.body[0].name
     return tree
Exemplo n.º 2
0
 def transform(self, tree, program_config):
     arg_types = program_config[0]['arg_typesig']
     tree = PyBasicConversions().visit(tree.body[0])
     tree.return_type = arg_types[0]()
     for param, ty in zip(tree.params, arg_types):
         param.type = ty()
     return [CFile(tree.name, [tree])]
    def transform(self, tree, program_config):
        tree = PyBasicConversions().visit(tree)

        fib_fn = tree.find(FunctionDecl, name="apply")
        arg_type = program_config.args_subconfig['arg_type']
        fib_fn.return_type = arg_type()
        fib_fn.params[0].type = arg_type()
        c_translator = CFile("generated", [tree])

        return [c_translator]
    def visit_Lambda(self, node):
        self.generic_visit(node)
        macro_name = "LAMBDA_" + str(self.lambda_counter)
        LambdaLifter.lambda_counter += 1
        node = PyBasicConversions().visit(node)
        node.name = macro_name
        macro = CppDefine(macro_name, node.params, node.defn[0].value)
        self.lifted_functions.append(macro)

        return SymbolRef(macro_name)
Exemplo n.º 5
0
 def transform(self, tree, program_cfg):
     arg_cfg, tune_cfg = program_cfg
     # tree = Desugar().visit(tree)
     inliner = InlineEnvironment(self.symbol_table)
     tree = inliner.visit(tree)
     tree = PyBasicConversions().visit(tree)
     tree.body = inliner.files + tree.body
     # tree.find(C.For).pragma = 'omp parallel for'
     tree.name = self.original_tree.body[0].name
     tree.body.insert(0, StringTemplate("#include <math.h>"))
     # print(tree)
     return [tree]
Exemplo n.º 6
0
 def transform(self, tree, program_cfg):
     arg_cfg, tune_cfg = program_cfg
     # tree = Desugar().visit(tree)
     inliner = InlineEnvironment(self.symbol_table)
     tree = inliner.visit(tree)
     tree = PyBasicConversions().visit(tree)
     tree.body = inliner.files + tree.body
     # tree.find(C.For).pragma = 'omp parallel for'
     tree.name = self.original_tree.body[0].name
     tree.body.insert(0, StringTemplate("#include <math.h>"))
     # print(tree)
     return [tree]
 def transform(self, tree, program_config):
     args_subconfig, tuning_config = program_config
     function = tree.body[0]
     c_func = PyBasicConversions().visit(function)
     #print(c_func)
     c_func.defn = c_func.defn[1:]
     #print(c_func)
     c_func.params[0].type = ctree.types.get_ctype(args_subconfig)
     c_func.params[1].type =c_func.params[0].type
     c_func.params.append(SymbolRef('arr', ctypes.POINTER(ctypes.c_int32)()))
     #print(c_func)
     return CFile(body=[c_func])
Exemplo n.º 8
0
 def transform(self, tree, program_config):
     args_subconfig, tuning_config = program_config
     function = tree.body[0]
     c_func = PyBasicConversions().visit(function)
     #print(c_func)
     c_func.defn = c_func.defn[1:]
     #print(c_func)
     c_func.params[0].type = ctree.types.get_ctype(args_subconfig)
     c_func.params[1].type = c_func.params[0].type
     c_func.params.append(SymbolRef('arr',
                                    ctypes.POINTER(ctypes.c_int32)()))
     #print(c_func)
     return CFile(body=[c_func])
Exemplo n.º 9
0
    def transform(self, tree, program_config):
        arg_cfg = program_config[0]
        self.entry_point = unique_kernel_name()
        ctypeObject = c_float()
        ctype = c_float
        len_x = arg_cfg[0][1][0]
        len_y = arg_cfg[0][1][1]
        output = unique_name()
        params = [
            SymbolRef("input", POINTER(ctype)(), _global=True, _const=True),
            SymbolRef(output, POINTER(ctype)(), _global=True)
        ]
        defn = []
        defn.extend([
            Assign(SymbolRef('x', c_int()), get_global_id(0)),
            Assign(SymbolRef('y', c_int()), get_global_id(1)),
            Assign(SymbolRef('temp', ctypeObject), Constant(0)),
        ])
        body = \
            """
temp = .5 * input[clamp(x/2, 0, (len_x / 2) - 1) * len_y +
                  clamp(y/2, 0, (len_y / 2) - 1)]
if (x & 0x1):
    temp += .25 * input[clamp(x/2 + 1, 0, (len_x / 2) - 1) * len_y +
                        clamp(y/2, 0, (len_y /  2) - 1)]
else:
    temp += .25 * input[clamp(x/2 - 1, 0, (len_x / 2) - 1) * len_y +
                        clamp(y/2, 0, (len_y / 2) - 1)]
if (y & 0x1):
    temp += .25 * input[clamp(x/2, 0, (len_x / 2) - 1) * len_y +
                        clamp(y/2 + 1, 0, (len_y / 2) - 1)]
else:
    temp += .25 * input[clamp(x/2, 0, (len_x / 2) - 1) *len_y +
                        clamp(y/2 - 1, 0, (len_y / 2) - 1)]
output[x * len_y + y] = temp
"""
        body = ast.parse(body).body
        name_dict = {
            'output': output
        }
        const_dict = {
            'len_x': len_x,
            'len_y': len_y,
        }
        transformation = PyBasicConversions(name_dict, const_dict)
        defn.extend(body)
        tree = FunctionDecl(None, self.entry_point, params, defn)
        tree.set_kernel()
        kernel = OclFile("kernel", [tree])
        kernel = transformation.visit(kernel)
        return kernel
Exemplo n.º 10
0
    def test_fib(self):
        py_ast = get_ast(fib).body[0]
        c_ast = PyBasicConversions().visit(py_ast)
        filled_ast = DeclarationFiller().visit(c_ast)
        print(filled_ast)
        expected = """
void fib(n) {

    double a = 0.0;
    double b = 1.0;


    char* k = "hello";

    while (n > 0) {

        double ____temp__a = b;
        double ____temp__b = a + b;
        a = ____temp__a;
        b = ____temp__b;

        n -= 1;
    }
    return a;
}"""
        stripped_actual = str(filled_ast).replace(" ", "").replace("\n", "")
        stripped_expected = expected.replace(" ", "").replace("\n", "")
        self.assertEqual(stripped_actual, stripped_expected)
Exemplo n.º 11
0
    def transform(self, tree, program_config):
        """Convert the Python AST to a C AST."""
        param_types = []
        for arg in program_config[0]:
            param_types.append(NdPointer(arg[1], arg[2], arg[3]))
        kernel_sig = FuncType(Void(), param_types)

        tune_cfg = program_config[1]
        # block_factor = 2**tune_cfg['block_factor']
        unroll_factor = 2**tune_cfg['unroll_factor']

        for transformer in [StencilTransformer(self.input_grids,
                                               self.output_grid,
                                               self.constants
                                               ),
                            PyBasicConversions()]:
            tree = transformer.visit(tree)
        first_For = tree.find(For)
        inner_For = FindInnerMostLoop().find(first_For)
        # self.block(inner_For, first_For, block_factor)
        self.unroll(inner_For, unroll_factor)
        # remove self param
        # TODO: Better way to do this?
        params = tree.find(FunctionDecl, name="kernel").params
        params.pop(0)
        self.gen_array_macro_definition(tree, params)
        entry_point = tree.find(FunctionDecl, name="kernel")
        entry_point.set_typesig(kernel_sig)
        return tree, entry_point.get_type().as_ctype()
Exemplo n.º 12
0
    def transform(self, py_ast, program_cfg):
        arg_cfg, tune_cfg = program_cfg
        tree = PyBasicConversions().visit(py_ast)
        param_dict = {}
        tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]()))
        # Annotate arguments
        for param, type in zip(tree.body[0].params, arg_cfg):
            param.type = type()
            param_dict[param.name] = type._dtype_

        length = np.prod(arg_cfg[0]._shape_)
        transformer = MapTransformer("i", param_dict, "retval")
        body = list(map(transformer.visit, tree.body[0].defn))

        tree.body[0].defn = [C.For(
                C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)),
                C.Lt(C.SymbolRef("i"), C.Constant(length)),
                C.PostInc(C.SymbolRef("i")),
                body=body,
                pragma="ivdep"
            )]

        tree = DeclarationFiller().visit(tree)
        defns = []
        tree = HwachaVectorize(param_dict, defns).visit(tree)
        file_body = [
            StringTemplate("#include <stdlib.h>"),
            StringTemplate("#include <stdint.h>"),
            StringTemplate("#include <assert.h>"),
            StringTemplate("extern \"C\" void __hwacha_body(void);"),
        ]
        file_body.extend(defns)
        file_body.append(tree)
        return [CFile("generated", file_body)]
Exemplo n.º 13
0
    def test_multiple_assign_constant(self):
        node = ast.Assign([
            ast.Tuple(elts=(ast.Name(id="x", ctx=None),
                            ast.Name(id="y", ctx=None)))
        ], ast.Tuple(elts=(Constant(1), Constant(2))))
        transformed_node = PyBasicConversions().visit(node)

        self.assertEqual(str(transformed_node), "\nx = 1;\ny = 2;\n")
Exemplo n.º 14
0
    def mini_transform(self, node):
        """
        This method acts as a simulation of a specializer's transform() method. It's the bare minimum required of
        a transform() method by the specializer writer.

        :param node: the node to transform
        :return: the node transformed through PyBasicConversions into a rough C-AST.
        """
        transformed_node = PyBasicConversions().visit(node)

        transformed_node.name = "apply"
        transformed_node.return_type = ct.c_float()

        for param in transformed_node.params:
            param.type = ct.c_float()

        return transformed_node
Exemplo n.º 15
0
 def eval_with_loop(self, elts):
     new_elts = []
     for elt in elts:
         elt = self.replace_loopvars_as_constants(copy.deepcopy(elt))
         elt = PyBasicConversions().visit(elt)
         elt = ConstantFold().visit(elt)
         new_elts.append(elt.value)
     return tuple(new_elts)
Exemplo n.º 16
0
    def mini_transform(self, node):
        """
        This method acts as a simulation of a specializer's transform() method. It's the bare minimum required of
        a transform() method by the specializer writer.

        :param node: the node to transform
        :return: the node transformed through PyBasicConversions into a rough C-AST.
        """
        transformed_node = PyBasicConversions().visit(node)

        transformed_node.name = "apply"
        transformed_node.return_type = ct.c_float()

        for param in transformed_node.params:
            param.type = ct.c_float()

        return transformed_node
Exemplo n.º 17
0
 def emit(cls, sources, sinks, keywords, symbol_table):
     tree = get_ast(cls.fn)
     tree = PyBasicConversions().visit(tree)
     body = tree.body[0].defn
     mapping = {arg.name: source
                for arg, source in zip(tree.body[0].params, sources)}
     visitor = MapTransformer(mapping, sinks[0])
     body = [visitor.visit(s) for s in body]
     return "\n".join([str(s) + ";" for s in body])
Exemplo n.º 18
0
 def visit_AugAssign(self, node):
     # TODO: Handle all types?
     value = self.visit(node.value)
     # HACK to get this to work, PyBasicConversions will skip this AugAssign node
     # TODO Figure out why
     value = PyBasicConversions().visit(value)
     if type(node.op) is ast.Add:
         return AddAssign(self.visit(node.target), value)
     if type(node.op) is ast.Sub:
         return SubAssign(self.visit(node.target), value)
Exemplo n.º 19
0
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()
        scalar_data_type = get_c_type_from_numpy_dtype(np.dtype(input_data.scalar_type))()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = scalar_data_type
        apply_one.return_type = data_type  # TODO: figure out which data type to actually preserve

        # TODO: MAKE A CLASS THAT HANDLES SUPPORTED TYPES (INT, FLOAT, DOUBLE)

        array_add_template = StringTemplate(r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(arr[i], scalar);
            }
        """, {
            'length': Constant(length)
        })

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, FUNC_NAME,
                         params=[
                             SymbolRef("arr", pointer()),
                             SymbolRef("scalar", scalar_data_type),
                             SymbolRef("output", pointer())
                         ],
                         defn=[
                             array_add_template
                         ])
        ], 'omp')

        return [array_op]
Exemplo n.º 20
0
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim,
                                         input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()
        scalar_data_type = get_c_type_from_numpy_dtype(
            np.dtype(input_data.scalar_type))()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = scalar_data_type
        apply_one.return_type = data_type  # TODO: figure out which data type to actually preserve

        # TODO: MAKE A CLASS THAT HANDLES SUPPORTED TYPES (INT, FLOAT, DOUBLE)

        array_add_template = StringTemplate(
            r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(arr[i], scalar);
            }
        """, {'length': Constant(length)})

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"), apply_one,
            FunctionDecl(None,
                         FUNC_NAME,
                         params=[
                             SymbolRef("arr", pointer()),
                             SymbolRef("scalar", scalar_data_type),
                             SymbolRef("output", pointer())
                         ],
                         defn=[array_add_template])
        ], 'omp')

        return [array_op]
Exemplo n.º 21
0
    def test_multiple_assign_dependent(self):
        node = ast.Assign([
            ast.Tuple(elts=(ast.Name(id="x", ctx=None),
                            ast.Name(id="y", ctx=None)))
        ],
                          ast.Tuple(elts=(ast.Name(id="y", ctx=None),
                                          ast.Name(id="x", ctx=None))))
        transformed_node = PyBasicConversions().visit(node)

        self.assertEqual(
            str(transformed_node),
            "\n____temp__x = x;\n____temp__y = y;\ny = ____temp__y;\nx = ____temp__x;\n"
        )
Exemplo n.º 22
0
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = data_type
        apply_one.return_type = data_type

        array_add_template = StringTemplate(r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(input1[i], input2[i]);
            }
        """, {
            'length': Constant(length)
        })

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, FUNC_NAME,
                         params=[
                             SymbolRef("input1", pointer()),
                             SymbolRef("input2", pointer()),
                             SymbolRef("output", pointer())
                         ],
                         defn=[
                             array_add_template
                         ])
        ], 'omp')

        return [array_op]
Exemplo n.º 23
0
    def test_multiple_assign_dependent(self):
        node = ast.Assign(
            [
                ast.Tuple(elts=(ast.Name(id="x", ctx=None),
                                ast.Name(id="y", ctx=None)))
            ],
            ast.Tuple(
                elts=(FunctionCall(func='square',
                                   args=[Constant(5), Constant(5)]),
                      FunctionCall(func='square',
                                   args=[Constant(5), Constant(5)]))))
        transformed_node = PyBasicConversions().visit(node)

        self.assertEqual(
            str(transformed_node),
            "\n____temp__x = square(5, 5);\n____temp__y = square(5, 5);\nx = ____temp__x;\ny = ____temp__y;\n"
        )
Exemplo n.º 24
0
 def visit_FunctionCall(self, node):
     if node.func.name in {'min', 'max'}:
         node.func.name = "f" + node.func.name
         # TODO: Add support for all math funcs
         self.includes.add("math.h")
         return super(Backend, self).generic_visit(node)
     # FIXME: This is specific for handling a map function
     # do we have to generalize?
     node.args = [self.visit(arg) for arg in node.args]
     func_tree = get_ast(self.symbol_table[node.func.name])
     func_tree = PyBasicConversions().visit(func_tree).body[0]
     func_tree = self.visit(func_tree)
     func_tree.name = C.SymbolRef(node.func.name)
     func_tree.set_static()
     func_tree.set_inline()
     self.defns.append(func_tree)
     # FIXME: Infer type
     for p in func_tree.params:
         p.type = ct.c_float()
     func_tree.return_type = ct.c_float()
     return node
Exemplo n.º 25
0
    def test_fmin(self):
        def func():
            a = 3.0
            b = 4.0
            c = fmax(a + b, 0.0)
            return c
        py_ast = get_ast(func).body[0]
        c_ast = PyBasicConversions().visit(py_ast)
        filled_ast = DeclarationFiller().visit(c_ast)
        expected = """
void func() {
    double a = 3.0;
    double b = 4.0;
    double c = fmax(a + b, 0.0);
    return c;
}"""
        stripped_actual = str(filled_ast).replace(" ", "").replace("\n", "")
        stripped_expected = expected.replace(" ", "").replace("\n", "")
        self.assertEqual(stripped_actual, stripped_expected)
    def test_simple_transform(self):
        class Kernel(StencilKernel):
            def kernel(self, in_img, out_img):
                for x in out_img.interior_points():
                    for y in in_img.neighbors(x, 1):
                        out_img[x] += in_img[y]

        kernel = Kernel()
        kernel.should_unroll = False
        out_grid = StencilGrid([5])
        out_grid.ghost_depth = radius
        in_grid = StencilGrid([5])
        in_grid.ghost_depth = radius
        for x in range(0, 5):
            in_grid.data[x] = 1

        tree1 = ctree.get_ast(Kernel.kernel)
        tree2 = PyBasicConversions().visit(tree1)
        actual = StencilOmpTransformer([in_grid], out_grid,
                                       kernel).visit(tree2)

        self.assertEqual(actual, second)
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]

        num_2d_layers = np.prod(input_data.num_frames)
        data_height = np.prod(input_data.data_height)
        layer_length = np.prod(input_data.size // num_2d_layers)
        segment_length = np.prod(input_data.segment_length)

        inp_type = get_c_type_from_numpy_dtype(input_data.dtype)()

        input_pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        output_pointer = np.ctypeslib.ndpointer(input_data.dtype, 1, (input_data.size, 1))

        # Get the kernel function, apply_one
        apply_one = PyBasicConversions().visit(py_ast).find(FunctionDecl)

        apply_one.return_type = inp_type
        apply_one.params[0].type = inp_type
        apply_one.params[1].type = inp_type

        # Naming our kernel method
        apply_one.name = 'apply'
        num_pfovs = int(layer_length / segment_length)
        # print ("num layers: ", num_2d_layers)
        # print ("input size: ", input_data.size)
        # print ("layer length: ", layer_length)

        # TODO: TIME TO START CPROFILING THINGS!
        reduction_template = StringTemplate(r"""
            #pragma omp parallel for collapse(2)
            for (int level = 0; level < $num_2d_layers; level++) {
                for (int i=0; i<$num_pfovs ; i++) {
                    int level_offset = level * $layer_length;
                    double avg = 0.0;
                    // #pragma omp parallel for reduction (+:avg)
                    for (int j=0; j<$pfov_length; j++) {
                        int in_layer_offset = ($pfov_length * i + j) /
                            ($layer_length / $data_height);

                        int index = (in_layer_offset + ($pfov_length * i + j) * $data_height)
                                     % $layer_length;
                        // printf ("Index: %i, I: %i, J: %i\n", index, i, j);
                        avg += input_arr[level_offset + index];
                    }
                    avg = avg / $pfov_length;

                    // #pragma omp parallel for
                    for (int j=0; j<$pfov_length; j++) {
                        int in_layer_offset = ($pfov_length * i + j) /
                            ($layer_length / $data_height);

                        int index = (in_layer_offset + ($pfov_length * i + j) * $data_height)
                                     % $layer_length;
                        output_arr[level_offset + index] = input_arr[level_offset + index] - avg;
                    }
                }
            }
        """, {
            'num_2d_layers': Constant(num_2d_layers),
            'layer_length': Constant(layer_length),
            'num_pfovs': Constant(num_pfovs),
            'pfov_length': Constant(segment_length),
            'data_height': Constant(data_height),
        })

        reducer = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, REDUCTION_FUNC_NAME,
                         params=[
                             SymbolRef("input_arr", input_pointer()),
                             SymbolRef("output_arr", output_pointer())
                         ],
                         defn=[
                             reduction_template
                         ])
        ], 'omp')

        return [reducer]
Exemplo n.º 28
0
 def test_List(self):
     array = ast.parse("[1, 5, 7, 3]")
     array = PyBasicConversions().visit(array).find(Array)
     self.assertEqual(str(array), "{1, 5, 7, 3}")
Exemplo n.º 29
0
 def visit_Assign(self, node):
     target = PyBasicConversions().visit(self.visit(node.targets[0]))
     value = PyBasicConversions().visit(self.visit(node.value))
     return Assign(target, value)
Exemplo n.º 30
0
 def test_Equals(self):
     comp = ast.parse("5 == foo == 6")
     comp = PyBasicConversions().visit(comp).find(BinaryOp)
     self.assertEqual(str(comp), "5 == foo && foo == 6")
Exemplo n.º 31
0
 def test_minus(self):
     op = ast.parse("- foo")
     op = PyBasicConversions().visit(op).find(UnaryOp)
     self._check(op, "- foo")
Exemplo n.º 32
0
 def test_not(self):
     op = ast.parse("not foo")
     op = PyBasicConversions().visit(op).find(UnaryOp)
     self._check(op, "! foo")
Exemplo n.º 33
0
 def test_LessThan(self):
     comp = ast.parse("5 < foo < 6")
     comp = PyBasicConversions().visit(comp).find(BinaryOp)
     self.assertEqual(str(comp), "5 < foo && foo < 6")
Exemplo n.º 34
0
 def test_CUnaryOp(self):
     op = Not(SymbolRef("foo"))
     op = PyBasicConversions().visit(op).find(UnaryOp)
     self._check(str(op), "! foo")
Exemplo n.º 35
0
    def transform(self, tree, program_config):
        dirname = self.config_to_dirname(program_config)
        A = program_config[0]
        len_A = np.prod(A.shape)
        data_type = get_c_type_from_numpy_dtype(A.dtype)        # Get the ctype class for the data type for the parameters
        pointer = np.ctypeslib.ndpointer(A.dtype, A.ndim, A.shape)
        apply_one = PyBasicConversions().visit(tree).find(FunctionDecl)
          
        apply_one.name = 'apply'                                # Naming our kernel method

        # Assigning a data_type instance for the  #
        # return type, and the parameter types... #
        apply_one.return_type = data_type()                     
        apply_one.params[0].type = data_type()
        apply_one.params[1].type = data_type()

        responsible_size = int(len_A / WORK_GROUP_SIZE)         # Get the appropriate number of threads for parallelizing
        
        # Creating our controller function (called "apply_kernel") to control #
        # the parallelizing of our computation, using ctree syntax...         #
        apply_kernel = FunctionDecl(None, "apply_kernel",
                                    params=[SymbolRef("A", pointer()).set_global(),
                                            SymbolRef("output_buf", pointer()).set_global(),
                                            SymbolRef("localData", pointer()).set_local()
                                    ],
                                    defn=[
                                        Assign(SymbolRef('groupId', ct.c_int()), get_group_id(0)),
                                        Assign(SymbolRef('globalId', ct.c_int()), get_global_id(0)),
                                        Assign(SymbolRef('localId', ct.c_int()), get_local_id(0)),
                                        Assign(SymbolRef('localResult', (ct.c_int() if A.dtype is np.int32 else ct.c_float())),
                                               ArrayRef(SymbolRef('A'), SymbolRef('globalId'))
                                               ),
                                        For(Assign(SymbolRef('offset', ct.c_int()), Constant(1)), Lt(SymbolRef('offset'), Constant(responsible_size)),
                                            PostInc(SymbolRef('offset')),
                                            [
                                                Assign(SymbolRef('localResult'),
                                                       FunctionCall(apply_one.name, [SymbolRef('localResult'),
                                                                              ArrayRef(SymbolRef('A'),
                                                                                       Add(SymbolRef('globalId'),
                                                                                           Mul(SymbolRef('offset'),
                                                                                               Constant(WORK_GROUP_SIZE))))])
                                                       ),
                                            ]
                                        ),
                                            Assign(ArrayRef(SymbolRef('localData'), SymbolRef('globalId')),
                                                SymbolRef('localResult')
                                               ),
                                            barrier(CLK_LOCAL_MEM_FENCE()),
                                        If(Eq(SymbolRef('globalId'), Constant(0)),
                                           [
                                                Assign(SymbolRef('localResult'), FunctionCall(SymbolRef(apply_one.name), [SymbolRef('localResult'),
                                                                                                                   ArrayRef(SymbolRef('localData'),Constant(x))]))
                                                for x in range(1, WORK_GROUP_SIZE)
                                           ] + [Assign(ArrayRef(SymbolRef('output_buf'), Constant(0)), SymbolRef('localResult'))]
                                        )
                                    ]
        ).set_kernel()

        # Hardcoded OpenCL code to compensate to begin execution of parallelized computation 
        control = StringTemplate(r"""
        #ifdef __APPLE__
        #include <OpenCL/opencl.h>
        #else
        #include <CL/cl.h>
        #endif

        #include <stdio.h>

        void apply_all(cl_command_queue queue, cl_kernel kernel, cl_mem buf, cl_mem out_buf) {
            size_t global = $local;
            size_t local = $local;
            intptr_t len = $length;
            clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
            clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_buf);
            clSetKernelArg(kernel, 2, local * sizeof(int), NULL);
            clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
        }
        """, {'local': Constant(WORK_GROUP_SIZE),
              'n': Constant((len_A + WORK_GROUP_SIZE - (len_A % WORK_GROUP_SIZE))/2),
              'length': Constant(len_A),
        })

        ocl_kernel = OclFile("kernel", [apply_one, apply_kernel])
        c_controller = CFile("generated", [control])
        return [ocl_kernel, c_controller]
Exemplo n.º 36
0
    def transform(self, tree, program_config):
        call_args = program_config[0]

        base_size = call_args.base_shape[0] * call_args.base_shape[1]
        border = call_args.border

        c_float_type = c_float
        c_int_type = c_int

        transformer = PyBasicConversions()

        output = unique_name()

        init_entry_point = unique_kernel_name()
        init_params = [
            SymbolRef('input', POINTER(c_float_type)(), _global=True, _const=True),
            SymbolRef(output, POINTER(c_float_type)(), _global=True),
        ]

        init_defn = []
        init_defn.extend([
            Assign(SymbolRef('x', c_int()), get_global_id(0)),
            Assign(SymbolRef('y', c_int()), get_global_id(1)),
        ])

        body = """{output}[y * {len_x} + x] = input[y * {len_x} + x]""".format(
            output=output, len_x=call_args.base_shape[0]
        )
        print(body)
        tree_body = ast.parse(body).body

        init_defn.extend(tree_body)

        init_tree = FunctionDecl(None, init_entry_point, init_params, init_defn)
        init_tree.set_kernel()
        init_kernel = OclFile('kernel', [init_tree])
        init_kernel = transformer.visit(init_kernel)
        print("init kernel codegen")
        print(init_kernel.codegen())

        compute_entry_point = unique_kernel_name()
        compute_params = [
            SymbolRef(output, POINTER(c_float_type)(), _global=True),
            SymbolRef('power', c_int(), _const=True),
        ]
        compute_defn = []
        compute_defn.extend([
            Assign(SymbolRef('x', c_int()), get_global_id(0)),
            Assign(SymbolRef('y', c_int()), get_global_id(1)),
        ])

        body = """{matrix}[(power+1) * {base_size} + y * {len_x} + x] =
                    0.1 * {matrix}[
                        power * {base_size} + clamp(y-1, {border}, {len_y}-{border}-1) * {len_x} +  clamp(x, {border}, {len_x}-{border}-1)
                    ] +
                    0.1 * {matrix}[
                        power * {base_size} + clamp(y+1, {border}, {len_y}-{border}-1) * {len_x} +  clamp(x, {border}, {len_x}-{border}-1)
                    ] +
                    0.4 * {matrix}[
                        power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} +  clamp(x-1, {border}, {len_x}-{border}-1)
                    ] +
                    0.4 * {matrix}[
                        power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} +  clamp(x+1, {border}, {len_x}-{border}-1)
                    ] +
                    1.0 * {matrix}[
                        power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} +  clamp(x, {border}, {len_x}-{border}-1)
                    ]
        """.format(
            matrix=output,
            base_size=base_size,
            len_y=call_args.base_shape[0],
            len_x=call_args.base_shape[1],
            border=border,
        )

        body = re.sub("""\s\s*""", " ", body)
        print(body)
        tree_body = ast.parse(body).body

        compute_defn.extend(tree_body)

        compute_tree = FunctionDecl(None, compute_entry_point, compute_params, compute_defn)
        compute_tree.set_kernel()
        compute_kernel = OclFile('kernel', [compute_tree])
        compute_kernel = transformer.visit(compute_kernel)
        print("compute kernel codegen")
        print(compute_kernel.codegen())


        fn = OclMatrixPowers()
        init_program = clCreateProgramWithSource(fn.context, init_kernel.codegen()).build()
        init_ptr = init_program[init_entry_point]

        compute_program = clCreateProgramWithSource(fn.context, compute_kernel.codegen()).build()
        compute_ptr = compute_program[compute_entry_point]

        return fn.finalize(init_ptr, compute_ptr, (call_args.base_shape[1], call_args.base_shape[0]))
Exemplo n.º 37
0
    def transform(self, tree, program_config):
        A = program_config[0]
        len_A = np.prod(A.shape)
        inner_type = get_c_type_from_numpy_dtype(A.dtype)()
        pointer = np.ctypeslib.ndpointer(A.dtype, A.ndim, A.shape)
        apply_one = PyBasicConversions().visit(tree.body[0])
        apply_one.return_type = inner_type
        apply_one.params[0].type = inner_type
        apply_one.params[1].type = inner_type
        responsible_size = int(len_A / WORK_GROUP_SIZE)
        apply_kernel = FunctionDecl(None, "apply_kernel",
                                    params=[SymbolRef("A", pointer()).set_global(),
                                            SymbolRef("output_buf", pointer()).set_global(),
                                            SymbolRef("localData", pointer()).set_local()
                                    ],
                                    defn=[
                                        Assign(SymbolRef('groupId', ct.c_int()), get_group_id(0)),
                                        Assign(SymbolRef('globalId', ct.c_int()), get_global_id(0)),
                                        Assign(SymbolRef('localId', ct.c_int()), get_local_id(0)),
                                        Assign(SymbolRef('localResult', ct.c_int()),
                                               ArrayRef(SymbolRef('A'), SymbolRef('globalId'))
                                               )
                                        ] +
                                        [Assign(SymbolRef('localResult'),
                                                FunctionCall(SymbolRef('apply'),
                                                             [SymbolRef('localResult'), ArrayRef(SymbolRef('A'),Add(SymbolRef('globalId'), Constant(i * WORK_GROUP_SIZE)))]))
                                            for i in range(1, responsible_size)] +
                                        [
                                            Assign(ArrayRef(SymbolRef('localData'), SymbolRef('globalId')),
                                                SymbolRef('localResult')
                                               ),
                                            barrier(CLK_LOCAL_MEM_FENCE()),
                                        If(Eq(SymbolRef('globalId'), Constant(0)),
                                           [
                                                Assign(SymbolRef('localResult'), FunctionCall(SymbolRef('apply'), [SymbolRef('localResult'),
                                                                                                                   ArrayRef(SymbolRef('localData'),Constant(x))]))
                                                for x in range(1, WORK_GROUP_SIZE)
                                           ] + [Assign(ArrayRef(SymbolRef('output_buf'), Constant(0)), SymbolRef('localResult'))]
                                        )
                                    ]
        ).set_kernel()

        kernel = OclFile("kernel", [apply_one, apply_kernel])

        control = StringTemplate(r"""
        #ifdef __APPLE__
        #include <OpenCL/opencl.h>
        #else
        #include <CL/cl.h>
        #endif

        #include <stdio.h>

        void apply_all(cl_command_queue queue, cl_kernel kernel, cl_mem buf, cl_mem out_buf) {
            size_t global = $local;
            size_t local = $local;
            intptr_t len = $length;
            cl_mem swap;
            clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
            clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_buf);
            clSetKernelArg(kernel, 2, local * sizeof(int), NULL);
            clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
        }
        """, {'local': Constant(WORK_GROUP_SIZE),
              'n': Constant((len_A + WORK_GROUP_SIZE - (len_A % WORK_GROUP_SIZE))/2),
              'length': Constant(len_A)
        })

        c_controller = CFile("generated", [control])
        return [kernel, c_controller]
Exemplo n.º 38
0
    def transform(self, tree, program_config):
        A = program_config[0]
        len_A = np.prod(A.shape)
        inner_type = A.dtype.type()
        pointer = np.ctypeslib.ndpointer(A.dtype, A.ndim, A.shape)
        apply_one = PyBasicConversions().visit(tree.body[0])
        apply_one.return_type = inner_type
        apply_one.params[0].type = inner_type
        apply_one.params[1].type = inner_type


        apply_kernel = FunctionDecl(None, "apply_kernel",
                                    params=[SymbolRef("A", pointer()).set_global(),
                                            SymbolRef("output_buf", pointer()).set_global(),
                                            SymbolRef("len", ct.c_int())
                                    ],
                                    defn=[
                                        Assign(SymbolRef('groupId', ct.c_int()), get_group_id(0)),                          # getting the group id for this work group
                                        Assign(SymbolRef('globalId', ct.c_int()), get_global_id(0)),                        # getting the global id for this work item
                                        Assign(SymbolRef('localId', ct.c_int()), get_local_id(0)),                          # getting the local id for this work item
                                        For(Assign(SymbolRef('i', ct.c_int()), Constant(1)),                                # for(int i=1; i<WORK_GROUP_SIZE; i *= 2)
                                            Lt(SymbolRef('i'), Constant(WORK_GROUP_SIZE)),                                  
                                            MulAssign(SymbolRef('i'), Constant(2)),
                                            [
                                                If(And(Eq(Mod(SymbolRef('globalId'), Mul(SymbolRef('i'), Constant(2))),     # if statement checks 
                                                          Constant(0)),
                                                       Lt(Add(SymbolRef('globalId'), SymbolRef('i')),
                                                          SymbolRef("len"))),
                                                   [
                                                       Assign(ArrayRef(SymbolRef('A'), SymbolRef('globalId')),
                                                              FunctionCall(SymbolRef('apply'),
                                                                           [
                                                                               ArrayRef(SymbolRef('A'),
                                                                                        SymbolRef('globalId')),
                                                                               ArrayRef(SymbolRef('A'),
                                                                                        Add(SymbolRef('globalId'),
                                                                                            SymbolRef('i')))
                                                                           ])),
                                                   ]
                                                ),
                                                FunctionCall(SymbolRef('barrier'), [SymbolRef('CLK_LOCAL_MEM_FENCE')])
                                            ]
                                        ),
                                        If(Eq(SymbolRef('localId'), Constant(0)),
                                           [
                                               Assign(ArrayRef(SymbolRef('output_buf'), SymbolRef('groupId')),
                                                      ArrayRef(SymbolRef('A'), SymbolRef('globalId')))
                                           ]
                                        )
                                    ]
        ).set_kernel()

        kernel = OclFile("kernel", [apply_one, apply_kernel])

        control = StringTemplate(r"""
        #ifdef __APPLE__
        #include <OpenCL/opencl.h>
        #else
        #include <CL/cl.h>
        #endif

        #include <stdio.h>

        void apply_all(cl_command_queue queue, cl_kernel kernel, cl_mem buf, cl_mem out_buf) {
            size_t global = $n;
            size_t local = $local;
            intptr_t len = $length;
            cl_mem swap;
            for (int runs = 0; runs < $run_limit ; runs++){
                clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
                clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_buf);
                clSetKernelArg(kernel, 2, sizeof(intptr_t), &len);
                clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
                swap = buf;
                buf = out_buf;
                out_buf = swap;
                len  = len/local + (len % local != 0);
            }
        }
        """, {'local': Constant(WORK_GROUP_SIZE),
              'n': Constant(len_A + WORK_GROUP_SIZE - (len_A % WORK_GROUP_SIZE)),
              'length': Constant(len_A),
              'run_limit': Constant(ceil(log(len_A, WORK_GROUP_SIZE)))
        })

        proj = Project([kernel, CFile("generated", [control])])
        fn = ConcreteXorReduction()

        program = cl.clCreateProgramWithSource(fn.context, kernel.codegen()).build()
        apply_kernel_ptr = program['apply_kernel']

        entry_type = ct.CFUNCTYPE(None, cl.cl_command_queue, cl.cl_kernel, cl.cl_mem)
        return fn.finalize(apply_kernel_ptr, proj, "apply_all", entry_type)
Exemplo n.º 39
0
 def test_GreaterThan(self):
     comp = ast.parse("5 >= foo >= 6")
     comp = PyBasicConversions().visit(comp).find(BinaryOp)
     self.assertEqual(str(comp), "5 >= foo && foo >= 6")