Exemplo n.º 1
0
def insert_free(name):

    return (StringTemplate(
        "$free;", {
            "free": C.FunctionCall(C.SymbolRef('_mm_free'),
                                   [C.SymbolRef(name)]),
        }))
Exemplo n.º 2
0
 def visit_AugAssign(self, node):
     node.value = self.visit(node.value)
     if not self.vectorize:
         node.target = self.visit(node.target)
         return node
     if util.contains_symbol(node.target, self.loop_var):
         return simd_macros.mm256_store_ps(
             node.target,
             C.BinaryOp(self.visit(node.target), node.op, node.value))
     elif isinstance(node.op, C.Op.Add) and isinstance(node.value, C.BinaryOp) and \
             isinstance(node.value.op, C.Op.Mul):
         # if not isinstance(node.target, C.SymbolRef):
         #     node.value = C.FunctionCall(C.SymbolRef("vsum"), [node.value])
         #     return node
         # else:
         return C.Assign(
             node.target,
             C.FunctionCall(
                 C.SymbolRef("_mm256_fmadd_ps"),
                 [node.value.left, node.value.right, node.target]))
     elif isinstance(node.op, C.Op.Add) and isinstance(
             node.value, C.FunctionCall):
         # TODO: Verfiy it's a vector intrinsic
         return C.Assign(
             node.target,
             C.FunctionCall(C.SymbolRef("_mm256_add_ps"),
                            [node.value, node.target]))
     elif isinstance(node.target, C.BinaryOp) and isinstance(
             node.target.op, C.Op.ArrayRef):
         raise NotImplementedError()
     node.target = self.visit(node.target)
     return node
Exemplo n.º 3
0
 def gen_loop_index(self, loopvars, shape):
     curr = C.SymbolRef(loopvars[-1])
     for i in reversed(range(len(loopvars) - 1)):
         curr = C.Add(
             C.Mul(C.SymbolRef(loopvars[i]),
                   C.Constant(np.prod(shape[i + 1:]))), curr)
     return curr
Exemplo n.º 4
0
    def transform(self, py_ast, program_cfg):
        arg_cfg, tune_cfg = program_cfg
        tree = PyBasicConversions().visit(py_ast)
        param_dict = {}
        tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]()))
        # Annotate arguments
        for param, type in zip(tree.body[0].params, arg_cfg):
            param.type = type()
            param_dict[param.name] = type._dtype_

        length = np.prod(arg_cfg[0]._shape_)
        transformer = MapTransformer("i", param_dict, "retval")
        body = list(map(transformer.visit, tree.body[0].defn))

        tree.body[0].defn = [C.For(
                C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)),
                C.Lt(C.SymbolRef("i"), C.Constant(length)),
                C.PostInc(C.SymbolRef("i")),
                body=body,
                pragma="ivdep"
            )]

        tree = DeclarationFiller().visit(tree)
        defns = []
        tree = HwachaVectorize(param_dict, defns).visit(tree)
        file_body = [
            StringTemplate("#include <stdlib.h>"),
            StringTemplate("#include <stdint.h>"),
            StringTemplate("#include <assert.h>"),
            StringTemplate("extern \"C\" void __hwacha_body(void);"),
        ]
        file_body.extend(defns)
        file_body.append(tree)
        return [CFile("generated", file_body)]
Exemplo n.º 5
0
 def rewrite_arg(self, arg):
     if isinstance(arg, C.UnaryOp) and isinstance(
             arg.op, C.Op.Ref) and isinstance(
                 arg.arg, C.BinaryOp) and isinstance(
                     arg.arg.op, C.Op.ArrayRef):
         curr_node = arg.arg
     elif isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef):
         curr_node = arg
     else:
         curr_node = None
     idx = self.dim
     num_zeroes = self.prefetch_num_zeroes
     while (idx + 1 != 0):
         if num_zeroes > 0:
             curr_node.right = C.Constant(0)
             num_zeroes -= 1
         curr_node = curr_node.left
         idx += 1
     old_expr = curr_node.right
     #if isinstance(old_expr, C.BinaryOp) and isinstance(old_expr.op, C.Op.Add):
     #  old_expr = old_expr.left
     #new_expr = C.Add(old_expr, C.Mul(C.Add(C.SymbolRef(self.prefetch_loop_var), C.SymbolRef(self.prefetch_constant)), C.SymbolRef(self.prefetch_multiplier)))
     new_expr = C.Mul(
         C.Add(C.SymbolRef(self.prefetch_loop_var),
               C.SymbolRef(self.prefetch_constant)),
         C.SymbolRef(self.prefetch_multiplier))
     curr_node.right = new_expr
     if isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef):
         return C.Ref(arg)
     return arg
Exemplo n.º 6
0
    def visit_If(self, node):
        check = [
            util.contains_symbol(node, var)
            for var in list(self.unrolled_vars) + [self.target_var]
        ]

        if any(check):
            body = []
            for i in range(self.factor):
                stmt = deepcopy(node)
                for var in self.unrolled_vars:
                    stmt = util.replace_symbol(var,
                                               C.SymbolRef(var + "_" + str(i)),
                                               stmt)
                if self.unroll_type == 0:
                    body.append(
                        util.replace_symbol(
                            self.target_var,
                            C.Add(C.SymbolRef(self.target_var), C.Constant(i)),
                            stmt))
                elif self.unroll_type == 1:
                    body.append(
                        util.replace_symbol(
                            self.target_var,
                            C.Add(
                                C.Mul(C.Constant(self.factor),
                                      C.SymbolRef(self.target_var)),
                                C.Constant(i)), stmt))
                else:
                    assert (false)

            return body
        return node
Exemplo n.º 7
0
 def visit_BinaryOp(self, node):
     if isinstance(node.op, C.Op.Assign):
         check = [
             util.contains_symbol(node.right, var)
             for var in list(self.unrolled_vars) + [self.target_var]
         ]
         if any(check):
             body = []
             if hasattr(node.left, 'type') and node.left.type is not None:
                 self.unrolled_vars.add(node.left.name)
             for i in range(self.factor):
                 stmt = deepcopy(node)
                 for var in self.unrolled_vars:
                     stmt = util.replace_symbol(
                         var, C.SymbolRef(var + "_" + str(i)), stmt)
                 if self.unroll_type == 0:
                     body.append(
                         util.replace_symbol(
                             self.target_var,
                             C.Add(C.SymbolRef(self.target_var),
                                   C.Constant(i)), stmt))
                 elif self.unroll_type == 1:
                     body.append(
                         util.replace_symbol(
                             self.target_var,
                             C.Add(
                                 C.Mul(C.Constant(self.factor),
                                       C.SymbolRef(self.target_var)),
                                 C.Constant(i)), stmt))
                 else:
                     assert (false)
             return body
     return node
Exemplo n.º 8
0
 def _gen_reduce_for_loop(self, loop, var, size):
     looplen1 = loop.test.right
     loopincr = loop.incr.value.value
     return StringTemplate("""
         //{
         //  ContinueNode *$reduce_node = new ContinueNode(&graph, [=]() {
           parallel_for(0,$size,
             [=](int low, int high) {
               #pragma simd
               for (int x = low; x < high; ++x) {
                 float sum = _$arr[x];
                 #pragma unroll
                 for (int i = 1; i < $batch_size; ++ i) {
                   sum += _$arr[i * $size + x];
                 }
                 _$arr[x] = sum;
               }
             });
         //  });
         //  for (int i = 0; i < $looplen1; i+=$loopincr) {
         //    make_edge($node_list[i], $reduce_node);
         //  }
         //};
         """, {'size': C.Constant(size),
               'batch_size': C.Constant(self.batch_size),
               'arr': C.SymbolRef(var),
               'node_list': C.SymbolRef("node_list_"),
               'reduce_node': C.SymbolRef("reduce_node_"),
               'looplen1': C.Constant(looplen1.value),  
               'loopincr': C.Constant(loopincr)
               })
Exemplo n.º 9
0
 def _gen_reduce_for_loop(self, loop, var, size):
     looplen1 = loop.test.right
     loopincr = loop.incr.value.value
     kernel_name = self._gen_unique_kernel_name()
     kernel_src = StringTemplate("""
       __kernel void $kernel_name(__global float * $arr) {
         int x = get_global_id(0);
         float sum = $arr[x];
         #pragma unroll
         for (int i = 1; i < $batch_size; ++ i) {
           sum += $arr[i * $size + x];
         }
         $arr[x] = sum;
       }
     """, {'batch_size': C.Constant(self.batch_size),
           'arr': C.SymbolRef(var),
           'size': C.Constant(size),
           'kernel_name': C.SymbolRef(kernel_name)})
     program = cl.clCreateProgramWithSource(
         latte.config.cl_ctx, kernel_src.codegen()).build()
     kernel = program[kernel_name]
     self.kernels[kernel_name] = kernel
     kernel.setarg(0, self.cl_buffers[var], ctypes.sizeof(cl.cl_mem))
     return StringTemplate(
         """
         size_t global_size_{kernel_name}[1] = {{{looplen1}}};
         clEnqueueNDRangeKernel(queue, {kernel_name}, 1, NULL, global_size_{kernel_name}, NULL, 0, NULL, NULL);
         clFinish(queue);
         """.format(
             kernel_name=kernel_name, 
             looplen1=size)
     )
Exemplo n.º 10
0
    def test_add_zero(self):
        tree = C.Add(C.SymbolRef("a"), C.Constant(0))
        tree = ConstantFold().visit(tree)
        self.assertEqual(tree, C.SymbolRef("a"))

        tree = C.Add(C.Constant(0), C.SymbolRef("a"))
        tree = ConstantFold().visit(tree)
        self.assertEqual(tree, C.SymbolRef("a"))
Exemplo n.º 11
0
    def test_sub_zero(self):
        tree = C.Sub(C.SymbolRef("a"), C.Constant(0))
        tree = ConstantFold().visit(tree)
        self.assertEqual(tree, C.SymbolRef("a"))

        tree = C.Sub(C.Constant(0), C.SymbolRef("a"))
        tree = ConstantFold().visit(tree)
        self.assertEqual(str(tree), str(C.Sub(C.SymbolRef("a"))))
Exemplo n.º 12
0
    def test_mul_by_1(self):
        tree = C.Mul(C.Constant(1), C.SymbolRef("b"))
        tree = ConstantFold().visit(tree)
        self.assertEqual(tree, C.SymbolRef("b"))

        tree = C.Mul(C.SymbolRef("b"), C.Constant(1))
        tree = ConstantFold().visit(tree)
        self.assertEqual(tree, C.SymbolRef("b"))
Exemplo n.º 13
0
 def test_recursive_fold(self):
     tree = C.Assign(
         C.SymbolRef("c"),
         C.Add(C.Add(C.Constant(2), C.Constant(-2)),
               C.SymbolRef("b")))
     tree = ConstantFold().visit(tree)
     self.assertEqual(
         str(tree),
         str(C.Assign(C.SymbolRef("c"), C.SymbolRef("b"))))
Exemplo n.º 14
0
    def visit_For(self, node):
        node.body = util.flatten([s for s in node.body])
        new_body = []
        for stmt in node.body:
          if isinstance(stmt, C.FunctionCall) and "_mm" in stmt.func.name \
             and "_store" in stmt.func.name and inReplaceMapSource(stmt.args[0], self.replace_map):
                  
                  if isinstance(stmt.args[1], C.SymbolRef):
                    sym_arr_ref = extract_reference(stmt.args)  
                    store_in_du_map(sym_arr_ref)  
                    reg = stmt.args[1]
                    self.seen[reg.name] = None
                    new_body.append(stmt)

                  elif isinstance(stmt.args[1], C.FunctionCall) and "_mm" in stmt.func.name:
                      tmp = self._gen_register()
                      new_body.append(C.Assign(C.SymbolRef(tmp, get_simd_type()()), deepcopy(stmt.args[1])))
                      new_body.append(C.FunctionCall(C.SymbolRef(stmt.func.name),  [stmt.args[0],C.SymbolRef(tmp, None)]))
                      sym_arr_ref = extract_reference(C.FunctionCall(C.SymbolRef(stmt.func.name),  [stmt.args[0],C.SymbolRef(tmp, None)]).args)  
                      store_in_du_map(sym_arr_ref)
                  # if stmt.args[0].type:
                  #    self.seen[reg.name] = stmt.args[0].type     
                  #else:
                      self.seen[tmp] = None

          elif isinstance(stmt, C.BinaryOp) and \
             isinstance(stmt.op, C.Op.Assign) and \
             isinstance(stmt.left, C.SymbolRef) and \
             isinstance(stmt.right, C.FunctionCall) and "_mm" in stmt.right.func.name and "_load" in stmt.right.func.name and inReplaceMapSink(stmt.right.args[0], self.replace_map): 
                  #print(stmt.right.args[0])                         
                  source = get_alias(stmt.right.args, self.replace_map)
                  #print(source)      
                  if (source is not None):
                    sym_arr_ref = construct_arr_reference(source, deepcopy(stmt.right.args))
                    if in_du_map(sym_arr_ref):
                       reg = get_register(sym_arr_ref)
                       #print(reg.name)   
                       if str(reg.name) in self.seen: 
                          #print(reg.name)  
                          sym_map[stmt.left.name] = reg
                       else:
                          new_body.append(stmt) 
                    else:
                       new_body.append(stmt)    
                  else:
                      new_body.append(stmt)
                            
          else:
              new_body.append(stmt)  
        node.body = util.flatten([self.visit(s) for s in new_body])
        return node
Exemplo n.º 15
0
 def visit_AugAssign(self, node):
     node.value = self.visit(node.value)
     if util.contains_symbol(node.target, self.loop_var):
         if not util.contains_symbol(node.target.right, self.loop_var):
             target = self.visit(deepcopy(node.target))
             curr_node = node.target
             idx = 1
             while curr_node.left.right.name != self.loop_var:
                 curr_node = curr_node.left
                 idx += 1
             curr_node.left = curr_node.left.left
             node.target = C.ArrayRef(node.target,
                                      C.SymbolRef(self.loop_var))
             while not isinstance(curr_node, C.SymbolRef):
                 curr_node = curr_node.left
             if curr_node.name in self.transposed_buffers and self.transposed_buffers[
                     curr_node.name] != idx:
                 raise NotImplementedError()
             self.transposed_buffers[curr_node.name] = idx
             curr_node.name += "_transposed"
             if isinstance(node.target.right,
                           C.Constant) and node.target.value == 0.0:
                 return store_ps(node.target.left,
                                 C.BinaryOp(target, node.op, node.value))
             else:
                 return store_ps(C.Ref(node.target),
                                 C.BinaryOp(target, node.op, node.value))
         else:
             if isinstance(node.target.right,
                           C.Constant) and node.target.value == 0.0:
                 return store_ps(
                     node.target.left,
                     C.BinaryOp(self.visit(node.target), node.op,
                                node.value))
             else:
                 return store_ps(
                     C.Ref(node.target),
                     C.BinaryOp(self.visit(node.target), node.op,
                                node.value))
     elif isinstance(node.op, C.Op.Add) and isinstance(
             node.value, C.FunctionCall):
         # TODO: Verfiy it's a vector intrinsic
         return C.Assign(
             node.target,
             C.FunctionCall(C.SymbolRef("_mm256_add_ps"),
                            [node.value, node.target]))
     elif isinstance(node.target, C.BinaryOp) and isinstance(
             node.target.op, C.Op.ArrayRef):
         raise NotImplementedError(node)
     node.target = self.visit(node.target)
     return node
Exemplo n.º 16
0
    def visit(self, node):
        node = super().visit(node)
        if hasattr(node, 'body'):
            # [collector.visit(s) for s in node.body]
            newbody = []
            for s in node.body:
                if isinstance(s, C.BinaryOp) and isinstance(s.op, C.Op.Assign):
                    # Anand - needs more work 27th June 2017
                    if isinstance(s.left, C.SymbolRef) and (s.left.type is not None) and s.left.name in self.variables \
                         and s.left.name not in self.defs:
                        y = self._gen_register()

                        new_stmt = C.Assign(
                            C.SymbolRef(y,
                                        get_simd_type(s.left.type)()),
                            broadcast_ss(C.SymbolRef(s.left.name, None),
                                         s.left.type))
                        newbody.append(s)
                        newbody.append(new_stmt)
                        self.defs[s.left.name] = C.SymbolRef(y, None)
                        self.symbol_table[y] = get_simd_type(s.left.type)()
                    else:
                        for i in self.defs:
                            s = replace_symbol(i, self.defs[i], s)

                        if (isinstance(s.left.type,
                                       get_simd_type(ctypes.c_int()))
                                or isinstance(
                                    s.left.type, get_simd_type(
                                        ctypes.c_float()))) and isinstance(
                                            s.right, C.SymbolRef):
                            s.right = broadcast_ss(
                                C.SymbolRef(s.right.name, None), s.left.type)

                        elif isinstance(s.left, C.SymbolRef) and s.left.name in self.symbol_table and\
                             (isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_int())) or isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_float()))) and isinstance(s.right, C.SymbolRef):
                            s.right = broadcast_ss(
                                C.SymbolRef(s.right.name, None),
                                self.symbol_table[s.left.name])

                        newbody.append(s)

                else:

                    for i in self.defs:
                        s = replace_symbol(i, self.defs[i], s)

                    newbody.append(s)
            node.body = util.flatten(newbody)
        return node
Exemplo n.º 17
0
    def transform(self, tree, program_cfg):
        arg_cfg, tune_cfg = program_cfg
        channels, height, width = arg_cfg[0]
        cfg = {
            'pad_h': C.Constant(self.pad_h),
            'pad_w': C.Constant(self.pad_w),
            'stride_h': C.Constant(self.stride_h),
            'stride_w': C.Constant(self.stride_w),
            'kernel_h': C.Constant(self.kernel_h),
            'kernel_w': C.Constant(self.kernel_w),
            'channels': C.Constant(channels),
            'height': C.Constant(height),
            'width': C.Constant(width),
        }
        im2col = C.FunctionDecl(
            None,
            C.SymbolRef("im2col"),
            [C.SymbolRef("data_im", arg_cfg[1]()),
             C.SymbolRef("data_col", arg_cfg[1]())],
            [StringTemplate("""
int stride_h = $stride_h;
int stride_w = $stride_w;
int pad_h = $pad_h;
int pad_w = $pad_w;
int kernel_h = $kernel_h;
int kernel_w = $kernel_w;
int channels = $channels;
int height = $height;
int width = $width;
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
    int w_offset = c % kernel_w;
    int h_offset = (c / kernel_w) % kernel_h;
    int c_im = c / kernel_h / kernel_w;
    for (int h = 0; h < height_col; ++h) {
        for (int w = 0; w < width_col; ++w) {
            int h_pad = h * stride_h - pad_h + h_offset;
            int w_pad = w * stride_w - pad_w + w_offset;
            if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
            data_col[(c * height_col + h) * width_col + w] =
                data_im[(c_im * height + h_pad) * width + w_pad];
            else
                data_col[(c * height_col + h) * width_col + w] = 0;
        }
    }
} """, cfg)])
        return [C.CFile('im2col', [im2col])]
Exemplo n.º 18
0
def store_epi32(target, value):
    return C.FunctionCall(
        C.SymbolRef({
            "AVX": "_mm256_store_epi32",
            "AVX-2": "_mm256_store_epi32",
            "AVX-512": "_mm512_store_epi32",
        }[latte.config.vec_config]), [target, value])
Exemplo n.º 19
0
def load_ps(arg):
    return C.FunctionCall(
        C.SymbolRef({
            "AVX": "_mm256_load_ps",
            "AVX-2": "_mm256_load_ps",
            "AVX-512": "_mm512_load_ps",
        }[latte.config.vec_config]), [arg])
Exemplo n.º 20
0
def set_zero_ps():
    return C.FunctionCall(
        C.SymbolRef({
            "AVX": "_mm256_setzero_ps",
            "AVX-2": "_mm256_setzero_ps",
            "AVX-512": "_mm512_setzero_ps"
        }[latte.config.vec_config]), [])
Exemplo n.º 21
0
    def visit_BinaryOp(self, node):
        if isinstance(node.op, C.Op.ArrayRef):
            if util.contains_symbol(node, self.loop_var):
                idx = 0
                curr_node = node
                while not isinstance(curr_node.right, C.SymbolRef) or \
                        curr_node.right.name != self.loop_var:
                    idx += 1
                    curr_node = curr_node.left
                while not isinstance(curr_node, C.SymbolRef):
                    curr_node = curr_node.left
                self.vectorized_buffers[curr_node.name] = idx
                if self.vectorize:
                    return simd_macros.mm256_load_ps(node)
                else:
                    return C.ArrayRef(node,
                                      C.SymbolRef("_neuron_index_1_inner"))
            else:
                if self.vectorize:
                    return simd_macros.mm256_set1_ps(node)
                else:
                    return node

        node.left = self.visit(node.left)
        node.right = self.visit(node.right)
        return node
Exemplo n.º 22
0
def simd_add(left, right):
    func = {
        "AVX": "_mm256_add_ps",
        "AVX-2": "_mm256_add_ps",
        "AVX-512": "_mm512_add_ps",
    }[latte.config.vec_config]
    return C.FunctionCall(C.SymbolRef(func), [left, right])
Exemplo n.º 23
0
def get_register(sym_arr_ref):
    for i in range(len(du_map)):
        if ref_equal(du_map[i][0], sym_arr_ref):
            return C.SymbolRef(du_map[i][1])
            
 
    return None
Exemplo n.º 24
0
 def visit_For(self, node):
     node.body = [self.visit(s) for s in node.body]
     if node.init.left.name == self.target_var:
         if self.unroll_type == 0:
             node.incr = C.AddAssign(C.SymbolRef(self.target_var),
                                     C.Constant(self.factor))
             node.incr = C.AddAssign(C.SymbolRef(self.target_var),
                                     C.Constant(self.factor))
         elif self.unroll_type == 1:
             assert (node.test.right.value % self.factor == 0)
             node.test.right.value = node.test.right.value // self.factor
         else:
             assert (0)
         visitor = UnrollStatements(self.target_var, self.factor,
                                    self.unroll_type)
         node.body = util.flatten([visitor.visit(s) for s in node.body])
     return node
Exemplo n.º 25
0
def simd_fma(*args):
    assert len(args) == 3
    fma_func = {
        "AVX": "_mm256_fmadd_ps",
        "AVX-2": "_mm256_fmadd_ps",
        "AVX-512": "_mm512_fmadd_ps",
    }[latte.config.vec_config]
    return C.FunctionCall(C.SymbolRef(fma_func), list(args))
Exemplo n.º 26
0
 def test_no_folding(self):
     trees = [
         C.Add(C.SymbolRef("a"), C.SymbolRef("b")),
         C.Sub(C.SymbolRef("a"), C.SymbolRef("b")),
         C.Mul(C.SymbolRef("a"), C.SymbolRef("b")),
         C.Div(C.SymbolRef("a"), C.SymbolRef("b")),
     ]
     for tree in trees:
         new_tree = ConstantFold().visit(tree)
         self.assertEqual(tree, new_tree)
Exemplo n.º 27
0
def insert_malloc(body, shape, name, dtype, _global=False):
    shape_str = "".join("[{}]".format(d) for d in shape[1:])
    size = 1
    for d in shape:
        size *= d

    body.insert(
        0,
        StringTemplate(
            "$global$type (* $arg_name0)$shape = ($global$type (*)$cast) $arg_name1;",
            {
                "arg_name0":
                C.SymbolRef(name),
                "arg_name1":
                C.FunctionCall(C.SymbolRef('_mm_malloc'), [
                    C.Mul(
                        C.Constant(size),
                        C.FunctionCall(C.SymbolRef('sizeof'), [
                            ctree.types.codegen_type(
                                ctree.types.get_c_type_from_numpy_dtype(dtype)
                                ())
                        ])),
                    C.Constant(64)
                ]),
                "shape":
                C.SymbolRef(shape_str),
                "cast":
                C.SymbolRef(shape_str),
                "type":
                C.SymbolRef(
                    ctree.types.codegen_type(
                        ctree.types.get_c_type_from_numpy_dtype(dtype)())),
                "global":
                C.SymbolRef("__global " if _global else "")
            }))
Exemplo n.º 28
0
 def visit_Attribute(self, node):
     if isinstance(node.ctx, ast.Load):
         # Lifts attributes that can be declared as constants in the
         # current scope
         value = node.value.id
         if value in self.symbol_table:
             name = "_".join((value, node.attr))
             self.decls[name] = getattr(self.symbol_table[value], node.attr)
             return C.SymbolRef(name)
     return node
Exemplo n.º 29
0
    def visit_FunctionCall(self, node):
        if isinstance(node.func, C.SymbolRef):
            if node.func.name == "rand":
                if "OPENCL" in latte.config.parallel_strategy:
                    import struct
                    platform_c_maxint = 2 ** (struct.Struct('i').size * 8 - 1) - 1
                    return StringTemplate("((({} + get_global_id(0)) * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1)) >> 16".format(int(random.random() * platform_c_maxint)))
                return C.Div(node, C.Cast(ctypes.c_float(), C.SymbolRef("RAND_MAX")))
        #ANAND: 10/11/2016 Adding following uutility to convert python max to c max
        if isinstance(node.func, C.SymbolRef):
            if node.func.name == "max":
                return C.FunctionCall(C.SymbolRef("MAX"),
                [node.args[0],node.args[1]])
 




        return node
Exemplo n.º 30
0
 def collect_args_and_insert_casts(self, kernel_args, body):
     [CollectArrayReferences(kernel_args).visit(s) for s in body]
     params = []
     for arg in kernel_args:
         buf = self.buffers[arg]
         typ = np.ctypeslib.ndpointer(buf.dtype, buf.ndim, buf.shape)
         params.append(C.SymbolRef("_" + arg, typ()))
         params[-1].set_global()
         util.insert_cast(body, buf.shape[1:], arg, buf.dtype, _global=True)
     return params