示例#1
0
 def visit_For(self, node):
     if hasattr(node, 'parallel') and node.parallel:
         to_return = []
         #temp = deepcopy(node)
         collapse_count = 1
         temp = node.body
         while len(temp) == 1 and isinstance(temp[0], C.For) and hasattr(temp[0], 'parallel') and temp[0].parallel:
              loopvar1 = temp[0].init.left.name
              #print(loopvar1) 
              collapse_count= collapse_count+1
              temp = temp[0].body
         if all(isinstance(s, C.For) and hasattr(s, 'parallel') and s.parallel for s in temp):
               collapse_count = collapse_count + 1      
         
         # Supports depth one nesting with collapse
         if all(isinstance(s, C.For) and hasattr(s, 'parallel') and s.parallel for s in node.body):
             for s in node.body:
                 to_return.append(
                     C.For(node.init, node.test, node.incr, [s])
                 )
                
                 #to_return[-1].pragma = "omp parallel for collapse(2)"
                 to_return[-1].pragma = "omp parallel for collapse(" + str(collapse_count)+")"
         else:
             node.pragma = "omp parallel for"
             to_return = [node]
         if hasattr(node, 'reduce_vars') and len(node.reduce_vars) > 0:
             for var in node.reduce_vars:
                 size = np.prod(self.buffers[var].shape[1:])
                 to_return.append(self._gen_reduce_for_loop(node, var, size))
         return to_return
     return node
示例#2
0
    def transform(self, py_ast, program_cfg):
        arg_cfg, tune_cfg = program_cfg
        tree = PyBasicConversions().visit(py_ast)
        param_dict = {}
        tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]()))
        # Annotate arguments
        for param, type in zip(tree.body[0].params, arg_cfg):
            param.type = type()
            param_dict[param.name] = type._dtype_

        length = np.prod(arg_cfg[0]._shape_)
        transformer = MapTransformer("i", param_dict, "retval")
        body = list(map(transformer.visit, tree.body[0].defn))

        tree.body[0].defn = [C.For(
                C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)),
                C.Lt(C.SymbolRef("i"), C.Constant(length)),
                C.PostInc(C.SymbolRef("i")),
                body=body,
                pragma="ivdep"
            )]

        tree = DeclarationFiller().visit(tree)
        defns = []
        tree = HwachaVectorize(param_dict, defns).visit(tree)
        file_body = [
            StringTemplate("#include <stdlib.h>"),
            StringTemplate("#include <stdint.h>"),
            StringTemplate("#include <assert.h>"),
            StringTemplate("extern \"C\" void __hwacha_body(void);"),
        ]
        file_body.extend(defns)
        file_body.append(tree)
        return [CFile("generated", file_body)]
示例#3
0
 def gen_loop_nest(self, loopvars, cfg):
     body = []
     node = C.For(
         C.Assign(C.SymbolRef(loopvars[0], ct.c_int()), C.Constant(0)),
         C.Lt(C.SymbolRef(loopvars[0]), C.Constant(cfg.shape[0])),
         C.PostInc(C.SymbolRef(loopvars[0])), body)
     curr_node = node
     for loopvar, dim in zip(loopvars[1:], cfg.shape[1:]):
         curr_node = C.For(
             C.Assign(C.SymbolRef(loopvar, ct.c_int()), C.Constant(0)),
             C.Lt(C.SymbolRef(loopvar), C.Constant(dim)),
             C.PostInc(C.SymbolRef(loopvar)), [])
         body.append(curr_node)
         body = curr_node.body
     self.loop_shape_map[loopvars] = cfg.shape
     return node, curr_node
示例#4
0
 def block_loop(self, node):
     loopvar = node.init.left.name
     loopvar += loopvar
     self.nest.insert(
         0,
         C.For(
             C.Assign(C.SymbolRef(loopvar, node.init.left.type),
                      node.init.right),
             C.Lt(C.SymbolRef(loopvar), node.test.right),
             C.AddAssign(C.SymbolRef(loopvar),
                         C.Constant(self.block_factor)), [None]))
     node.init.right = C.SymbolRef(loopvar)
     node.test.right = C.FunctionCall(C.SymbolRef("fmin"), [
         C.Add(C.SymbolRef(loopvar), C.Constant(self.block_factor)),
         node.test.right
     ])
示例#5
0
def gen_for(loopvar, start, end, body, pragma=""):
    return C.For(
        C.Assign(C.SymbolRef(loopvar, ctypes.c_int()), C.Constant(start)),
        C.Lt(C.SymbolRef(loopvar), C.Constant(end)),
        C.PostInc(C.SymbolRef(loopvar)), body, pragma)
示例#6
0
    def visit_For(self, node):

        for j in range(1, self.factor):
            UnrollStatementsNoJam.new_body[j] = []

        # UnrollStatementsNoJam.new_body={}
        #for i in node.body:
        #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body)
        #node.body = [self.visit(s) for s in node.body]

        newbody = []

        for s in node.body:
            temp = deepcopy(UnrollStatementsNoJam.new_body)

            t = self.visit(s)
            stmt2 = deepcopy(t)
            stmt = deepcopy(t)
            if self.unroll_type == 0:
                s = util.replace_symbol(
                    self.target_var,
                    C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt)
            else:
                s = util.replace_symbol(
                    self.target_var,
                    C.Add(
                        C.Mul(C.Constant(self.factor),
                              C.SymbolRef(self.target_var)), C.Constant(0)),
                    stmt)

            newbody.append(t)

            if not isinstance(t, C.For):
                for i in range(1, self.factor):
                    stmt = deepcopy(stmt2)

                    if self.unroll_type == 0:
                        if i in UnrollStatementsNoJam.new_body:
                            UnrollStatementsNoJam.new_body[i].append(
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(C.SymbolRef(self.target_var),
                                          C.Constant(i)), stmt))
                        else:
                            UnrollStatementsNoJam.new_body[i] = [
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(C.SymbolRef(self.target_var),
                                          C.Constant(i)), stmt)
                            ]
                    elif self.unroll_type == 1:
                        if i in UnrollStatementsNoJam.new_body:
                            UnrollStatementsNoJam.new_body[i].append(
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(
                                        C.Mul(C.Constant(self.factor),
                                              C.SymbolRef(self.target_var)),
                                        C.Constant(i)), stmt))
                        else:
                            UnrollStatementsNoJam.new_body[i] = [
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(
                                        C.Mul(C.Constant(self.factor),
                                              C.SymbolRef(self.target_var)),
                                        C.Constant(i)), stmt)
                            ]
                    else:
                        assert (false)

            else:
                var = t.init.left.name

                #if var != self.target_var:
                for j in range(1, self.factor):
                    temp[j].append(
                        C.For(
                            C.Assign(C.SymbolRef(var, ctypes.c_int()),
                                     C.Constant(0)),
                            C.Lt(C.SymbolRef(var),
                                 C.Constant(t.test.right.value)),
                            C.AddAssign(C.SymbolRef(var),
                                        C.Constant(t.incr.value.value)),
                            UnrollStatementsNoJam.new_body[j]))

                UnrollStatementsNoJam.new_body = deepcopy(temp)

        node.body = newbody
        return node
示例#7
0
    def visit_RangeDim(self, node):
        iter = node.child_for.iter
        ensemble = node.ensemble
        ndim = node.mapping.ndim
        dim = iter.args[1].n
        offset = node.mapping.get_offset(dim)
        step = node.mapping.get_step(dim)
        length = len(node.mapping.shape[dim])
        if isinstance(iter, ast.Call) and iter.func.id == "range_dim":
            loop_var = node.child_for.target.id

            body = []
            body += [self.visit(s) for s in node.child_for.body]
            # FIXME: This check does not cover general cases
            #ANAND-special casing for LRN, needs refactoring
            if isinstance(self.ensemble, latte.ensemble.LRNEnsemble
                          ) and length < latte.config.SIMDWIDTH:
                if (
                        self.direction == "forward"
                        and "inputs" in self.ensemble.tiling_info
                        and any(dim == x[0]
                                for x in self.ensemble.tiling_info["inputs"])
                ) or (self.direction in ["backward", "update_internal"]
                      and "grad_inputs" in self.ensemble.tiling_info and any(
                          dim == x[0]
                          for x in self.ensemble.tiling_info["grad_inputs"])):
                    body = [
                        UpdateInputIndices(
                            loop_var + "_outer",
                            C.Div(
                                C.Add(
                                    C.SymbolRef(loop_var),
                                    C.SymbolRef(
                                        "_input_offset_{}_inner".format(dim +
                                                                        1))),
                                C.Constant(latte.config.SIMDWIDTH))).visit(s)
                        for s in body
                    ]
                    body = [
                        UpdateInputIndices(
                            "_input_offset_{}_inner".format(dim + 1),
                            C.Constant(0)).visit(s) for s in body
                    ]
                    body = [
                        UpdateInputIndices(
                            loop_var + "_inner",
                            C.Mod(
                                C.Add(
                                    C.SymbolRef(loop_var),
                                    C.SymbolRef(
                                        "_input_offset_{}_inner".format(dim +
                                                                        1))),
                                C.Constant(latte.config.SIMDWIDTH))).visit(s)
                        for s in body
                    ]
                    return C.For(
                        C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                                 C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                        body,
                        # "unroll_and_jam({})".format(length)
                        # "unroll"
                    )
                else:
                    body = [
                        UpdateInputIndices(
                            loop_var,
                            C.Mul(C.SymbolRef(loop_var),
                                  C.Constant(step))).visit(s) for s in body
                    ]
                    return C.For(
                        C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                                 C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                        body,
                        # "unroll_and_jam({})".format(length)
                        # "unroll"
                    )

            elif (
                    self.direction == "forward"
                    and "inputs" in self.ensemble.tiling_info
                    and any(dim == x[0]
                            for x in self.ensemble.tiling_info["inputs"])
            ) or (self.direction in ["backward", "update_internal"]
                  and "grad_inputs" in self.ensemble.tiling_info
                  and any(dim == x[0]
                          for x in self.ensemble.tiling_info["grad_inputs"])):
                outer_loop = C.For(
                    C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()),
                             C.Constant(0)),
                    C.Lt(C.SymbolRef(loop_var + "_outer"),
                         C.Constant(length // latte.config.SIMDWIDTH)),
                    C.AddAssign(C.SymbolRef(loop_var + "_outer"),
                                C.Constant(1)), [])
                self.tiled_loops.append(outer_loop)
                if self.direction == "forward" and length < latte.config.SIMDWIDTH:
                    inner_loop = C.For(
                        C.Assign(
                            C.SymbolRef(loop_var + "_inner", ctypes.c_int()),
                            C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var + "_inner"),
                             C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var + "_inner"),
                                    C.Constant(1)),
                        body,
                    )
                else:
                    inner_loop = C.For(
                        C.Assign(
                            C.SymbolRef(loop_var + "_inner", ctypes.c_int()),
                            C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var + "_inner"),
                             C.Constant(latte.config.SIMDWIDTH)),
                        C.AddAssign(C.SymbolRef(loop_var + "_inner"),
                                    C.Constant(1)),
                        body,
                    )

                return inner_loop
            else:
                body = [
                    UpdateInputIndices(
                        loop_var, C.Mul(C.SymbolRef(loop_var),
                                        C.Constant(step))).visit(s)
                    for s in body
                ]
                return C.For(
                    C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                             C.Constant(0)),
                    C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                    C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                    body,
                    # "unroll_and_jam({})".format(length)
                    # "unroll"
                )
        raise NotImplementedError()