def visit_AugAssign(self, node): node.value = self.visit(node.value) if util.contains_symbol(node.target, self.loop_var): if not util.contains_symbol(node.target.right, self.loop_var): target = self.visit(deepcopy(node.target)) curr_node = node.target idx = 1 while curr_node.left.right.name != self.loop_var: curr_node = curr_node.left idx += 1 curr_node.left = curr_node.left.left node.target = C.ArrayRef(node.target, C.SymbolRef(self.loop_var)) while not isinstance(curr_node, C.SymbolRef): curr_node = curr_node.left if curr_node.name in self.transposed_buffers and self.transposed_buffers[ curr_node.name] != idx: raise NotImplementedError() self.transposed_buffers[curr_node.name] = idx curr_node.name += "_transposed" if isinstance(node.target.right, C.Constant) and node.target.value == 0.0: return store_ps(node.target.left, C.BinaryOp(target, node.op, node.value)) else: return store_ps(C.Ref(node.target), C.BinaryOp(target, node.op, node.value)) else: if isinstance(node.target.right, C.Constant) and node.target.value == 0.0: return store_ps( node.target.left, C.BinaryOp(self.visit(node.target), node.op, node.value)) else: return store_ps( C.Ref(node.target), C.BinaryOp(self.visit(node.target), node.op, node.value)) elif isinstance(node.op, C.Op.Add) and isinstance( node.value, C.FunctionCall): # TODO: Verfiy it's a vector intrinsic return C.Assign( node.target, C.FunctionCall(C.SymbolRef("_mm256_add_ps"), [node.value, node.target])) elif isinstance(node.target, C.BinaryOp) and isinstance( node.target.op, C.Op.ArrayRef): raise NotImplementedError(node) node.target = self.visit(node.target) return node
def rewrite_arg(self, arg): if isinstance(arg, C.UnaryOp) and isinstance( arg.op, C.Op.Ref) and isinstance( arg.arg, C.BinaryOp) and isinstance( arg.arg.op, C.Op.ArrayRef): curr_node = arg.arg elif isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): curr_node = arg else: curr_node = None idx = self.dim num_zeroes = self.prefetch_num_zeroes while (idx + 1 != 0): if num_zeroes > 0: curr_node.right = C.Constant(0) num_zeroes -= 1 curr_node = curr_node.left idx += 1 old_expr = curr_node.right #if isinstance(old_expr, C.BinaryOp) and isinstance(old_expr.op, C.Op.Add): # old_expr = old_expr.left #new_expr = C.Add(old_expr, C.Mul(C.Add(C.SymbolRef(self.prefetch_loop_var), C.SymbolRef(self.prefetch_constant)), C.SymbolRef(self.prefetch_multiplier))) new_expr = C.Mul( C.Add(C.SymbolRef(self.prefetch_loop_var), C.SymbolRef(self.prefetch_constant)), C.SymbolRef(self.prefetch_multiplier)) curr_node.right = new_expr if isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): return C.Ref(arg) return arg
def rewrite_arg(self, arg): if isinstance(arg, C.UnaryOp) and isinstance( arg.op, C.Op.Ref) and isinstance( arg.arg, C.BinaryOp) and isinstance( arg.arg.op, C.Op.ArrayRef): curr_node = arg.arg elif isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): curr_node = arg else: curr_node = None idx = self.dim while (idx + 1 != 0): curr_node = curr_node.left idx += 1 old_expr = curr_node.right new_expr = C.Add(old_expr, C.Constant(self.prefetch_constant)) curr_node.right = new_expr if isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): return C.Ref(arg) return arg
def visit_BinaryOp(self, node): if isinstance(node.op, C.Op.ArrayRef): if util.contains_symbol(node, self.loop_var): if not util.contains_symbol(node.right, self.loop_var): curr_node = node idx = 1 while curr_node.left.right.name != self.loop_var: curr_node = curr_node.left idx += 1 curr_node.left = curr_node.left.left node = C.ArrayRef(node, C.SymbolRef(self.loop_var)) while not isinstance(curr_node, C.SymbolRef): curr_node = curr_node.left if curr_node.name in self.transposed_buffers and self.transposed_buffers[ curr_node.name] != idx: raise NotImplementedError() self.transposed_buffers[curr_node.name] = idx curr_node.name += "_transposed" if isinstance(node.right, C.Constant) and node.target.value == 0.0: return load_ps(node.left) else: return load_ps(C.Ref(node)) else: return broadcast_ss(C.Ref(node)) elif isinstance(node.op, C.Op.Assign): node.right = self.visit(node.right) if isinstance(node.right, C.FunctionCall) and \ ("load_ps" in node.right.func.name or "broadcast_ss" in node.right.func.name) and \ isinstance(node.left, C.SymbolRef) and node.left.type is not None: node.left.type = get_simd_type()() self.symbol_table[node.left.name] = node.left.type return node elif isinstance(node.left, C.BinaryOp) and util.contains_symbol( node.left, self.loop_var): if node.left.right.name != self.loop_var: curr_node = node idx = 1 while curr_node.left.right.name != self.loop_var: curr_node = curr_node.left idx += 1 curr_node.left = curr_node.left.left node = C.ArrayRef(node, C.SymbolRef(self.loop_var)) while not isinstance(curr_node, C.SymbolRef): curr_node = curr_node.left if curr_node.name in self.transposed_buffers and self.transposed_buffers[ curr_node.name] != idx: raise NotImplementedError() self.transposed_buffers[curr_node.name] = idx curr_node.name += "_transposed" is_float = self.get_type(node.left) if isinstance(is_float, ctypes.c_float): if isinstance(node.left.right, C.Constant) and node.target.value == 0.0: return store_ps(node.left.left, node.right) else: return store_ps(C.Ref(node.left), node.right) elif isinstance(is_float, ctypes.c_int): if isinstance(node.left.right, C.Constant) and node.target.value == 0.0: return store_epi32(node.left.left, node.right) else: return store_epi32(C.Ref(node.left), node.right) else: if isinstance(node.left.right, C.Constant) and node.target.value == 0.0: return store_ps(node.left.left, node.right) else: return store_ps(C.Ref(node.left), node.right) node.left = self.visit(node.left) return node node.left = self.visit(node.left) node.right = self.visit(node.right) return node
def visit_Call(self, node): if isinstance(node.func, ast.Name) and node.func.id == 'len': target = self.eval_in_table(node.args[0]) return C.Constant(len(target)) if self.table_contains(node.func): fn = self.eval_in_table(node.func) params = [] args = [] for arg in node.args: if isinstance(arg, ast.Subscript): value = self.eval_in_table(arg.value) if isinstance(arg.slice.value, ast.Tuple): index = self.eval_with_loop(arg.slice.value.elts) else: index = self.eval_with_loop([arg.slice.value]) params.append(value[index]) arg = self.visit(arg) if isinstance(value[index], Array): arg = C.Ref(arg) args.append(arg) elif isinstance(arg, ast.Attribute): value = self.eval_in_table(arg) params.append(value) else: arg = self.visit(arg) if isinstance(arg, C.SymbolRef): params.append(self.decls[arg.name]) args.append(arg) elif isinstance(arg, ast.Tuple): elts = () for elt in arg.elts: if isinstance(elt, C.SymbolRef): elts += (self.eval_in_table(elt), ) else: elts += (elt, ) params.append(elts) if hasattr(fn, 'specialized_dispatch'): if fn.num_args: trimmed = params[:fn.num_args] else: trimmed = params fn = fn.fn(*params) params = trimmed cfg = fn._specializer.get_program_config(params, {}) dir_name = fn._specializer.config_to_dirname(cfg) result = fn._specializer.get_transform_result(cfg, dir_name, cache=False) block = C.Block() cfile = result[0] func = cfile.find(C.FunctionDecl, name=cfile.name) cfile.body = [s for s in cfile.body if s is not func] self.files.extend(cfile.body) block.body = func.defn for arg, param in zip(args, func.params): block.body.insert(0, C.Assign(param, arg)) return block # node.args = args # node.func = ast.Name(result[0].body[-1].name, ast.Load()) else: node.args = [self.visit(arg) for arg in node.args] return node