x = True else: assert isinstance(t, IntT) x = 1 return Const(x, type = t) true = one(Bool) one_i32 = one(Int32) one_i64 = one(Int64) one_f32 = one(Float32) one_f64 = one(Float64) none_t = NoneType none = Const(None, type = none_t) slice_none_t = array_type.make_slice_type(none_t, none_t, none_t) slice_none = syntax.Slice(none, none, none, type = slice_none_t) def is_python_int(x): return isinstance(x, (int, long)) def is_python_float(x): return isinstance(x, float) def is_python_bool(x): return isinstance(x, bool) def is_python_scalar(x): return isinstance(x, (bool, int, long, float)) def is_python_constant(x):
def expr_Slice(): start = annotate_child(expr.start) stop = annotate_child(expr.stop) step = annotate_child(expr.step) slice_t = array_type.make_slice_type(start.type, stop.type, step.type) return typed_ast.Slice(start, stop, step, type = slice_t)
def transform_Slice(self, expr): start = self.transform_expr(expr.start) stop = self.transform_expr(expr.stop) step = self.transform_expr(expr.step) slice_t = array_type.make_slice_type(start.type, stop.type, step.type) return syntax.Slice(start, stop, step, type = slice_t)
def transform_TiledReduce(self, expr): args = expr.args axes = expr.axes # TODO: Should make sure that all the shapes conform here, # but we don't yet have anything like assertions or error handling. niters = self.shape(args[0], syntax_helpers.unwrap_constant(axes[0])) if expr.fixed_tile_size: self.fixed_idx += 1 tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx]) else: self.tiling = True self.fn.has_tiles = True self.nesting_idx += 1 tile_size = self.index(self.tile_sizes_param, self.nesting_idx, temp = True, name = "tilesize") slice_t = array_type.make_slice_type(Int64, Int64, Int64) untiled_map_fn = self.get_fn(expr.fn) acc_type = untiled_map_fn.return_type acc_is_array = not isinstance(acc_type, ScalarT) tiled_map_fn = self.transform_TypedFn(untiled_map_fn) map_closure_args = [self.get_closure_arg(e) for e in self.closure_elts(expr.fn)] untiled_combine = self.get_fn(expr.combine) combine_closure_args = [] tiled_combine = self.transform_TypedFn(untiled_combine, acc_is_array) if self.output_var and acc_is_array: result = self.output_var else: shape_args = map_closure_args + args result = self._create_output_array(untiled_map_fn, shape_args, [], "loop_result") init = result rslt_t = result.type if not acc_is_array: result_before = self.fresh_var(rslt_t, "result_before") init = result_before # Lift the initial value and fill it. def init_unpack(i, cur): if i == 0: return syntax.Assign(cur, syntax_helpers.zero_f64) else: j = self.fresh_i64("j") start = zero_i64 stop = self.shape(cur, 0) self.blocks.push() n = self.index_along_axis(cur, 0, j) self.blocks += init_unpack(i-1, n) body = self.blocks.pop() return syntax.ForLoop(j, start, stop, one_i64, body, {}) num_exps = array_type.get_rank(init.type) - \ array_type.get_rank(expr.init.type) # TODO: Get rid of this when safe to do so. if not expr.fixed_tile_size or True: self.comment("TiledReduce in %s: init_unpack" % self.fn.name) self.blocks += init_unpack(num_exps, init) # Loop over the remaining tiles. merge = {} if not acc_is_array: result_after = self.fresh_var(rslt_t, "result_after") merge[result.name] = (result_before, result_after) def make_loop(start, stop, step, do_min = True): i = self.fresh_var(niters.type, "i") self.blocks.push() slice_stop = self.add(i, step, "next_bound") slice_stop_min = self.min(slice_stop, stop) if do_min \ else slice_stop tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t) nested_args = [self.index_along_axis(arg, axis, tile_bounds) for arg, axis in zip(args, axes)] new_acc = self.fresh_var(tiled_map_fn.return_type, "new_acc") self.comment("TiledReduce in %s: map_fn " % self.fn.name) do_inline(tiled_map_fn, map_closure_args + nested_args, self.type_env, self.blocks.top(), result_var = new_acc) loop_body = self.blocks.pop() if acc_is_array: outidx = self.tuple([syntax_helpers.slice_none] * result.type.rank) result_slice = self.index(result, outidx, temp = False) self.comment("") do_inline(tiled_combine, combine_closure_args + [result, new_acc, result_slice], self.type_env, loop_body, result_var = None) else: do_inline(tiled_combine, combine_closure_args + [result, new_acc], self.type_env, loop_body, result_var = result_after) return syntax.ForLoop(i, start, stop, step, loop_body, merge) assert isinstance(tile_size, syntax.Expr), "%s not an expr" % tile_size self.comment("TiledReduce in %s: combine" % self.fn.name) if expr.fixed_tile_size and \ config.opt_reg_tiles_not_tile_size_dependent and \ syntax_helpers.unwrap_constant(tile_size) > 1: num_tiles = self.div(niters, tile_size, "num_tiles") tile_stop = self.mul(num_tiles, tile_size, "tile_stop") loop1 = make_loop(zero_i64, tile_stop, tile_size, False) self.blocks.append(loop1) loop2_start = self.assign_temp(loop1.var, "loop2_start") self.blocks.append(make_loop(loop2_start, niters, one_i64, False)) else: self.blocks.append(make_loop(zero_i64, niters, tile_size)) return result
def transform_TiledMap(self, expr): args = expr.args axes = expr.axes # TODO: Should make sure that all the shapes conform here, # but we don't yet have anything like assertions or error handling niters = self.shape(expr.args[0], syntax_helpers.unwrap_constant(axes[0])) # Create the tile size variable and find the number of tiles if expr.fixed_tile_size: self.fixed_idx += 1 tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx]) else: self.tiling = True self.fn.has_tiles = True self.nesting_idx += 1 tile_size = self.index(self.tile_sizes_param, self.nesting_idx, temp = True, name = "tilesize") untiled_inner_fn = self.get_fn(expr.fn) if isinstance(untiled_inner_fn.return_type, ScalarT): tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn) else: tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn, preallocate_output = True) nested_has_tiles = tiled_inner_fn.has_tiles # Increase the nesting_idx by the number of tiles in the nested fn self.nesting_idx += tiled_inner_fn.num_tiles slice_t = array_type.make_slice_type(Int64, Int64, Int64) closure_args = [self.get_closure_arg(e) for e in self.closure_elts(expr.fn)] if self.output_var and \ not isinstance(untiled_inner_fn.return_type, ScalarT): array_result = self.output_var else: shape_args = closure_args + expr.args array_result = self._create_output_array(untiled_inner_fn, shape_args, [], "array_result") assert self.output_var is None or \ self.output_var.type.__class__ is ArrayT, \ "Invalid output var %s : %s" % \ (self.output_var, self.output_var.type) def make_loop(start, stop, step, do_min = True): i = self.fresh_var(niters.type, "i") self.blocks.push() slice_stop = self.add(i, step, "slice_stop") slice_stop_min = self.min(slice_stop, niters, "slice_min") if do_min \ else slice_stop tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t) nested_args = [self.index_along_axis(arg, axis, tile_bounds) for arg, axis in zip(args, axes)] out_idx = self.fixed_idx if expr.fixed_tile_size else self.nesting_idx output_region = self.index_along_axis(array_result, out_idx, tile_bounds) nested_args.append(output_region) if nested_has_tiles: nested_args.append(self.tile_sizes_param) body = self.blocks.pop() do_inline(tiled_inner_fn, closure_args + nested_args, self.type_env, body, result_var = None) return syntax.ForLoop(i, start, stop, step, body, {}) assert isinstance(tile_size, syntax.Expr) self.comment("TiledMap in %s" % self.fn.name) if expr.fixed_tile_size and \ config.opt_reg_tiles_not_tile_size_dependent and \ syntax_helpers.unwrap_constant(tile_size) > 1: num_tiles = self.div(niters, tile_size, "num_tiles") tile_stop = self.mul(num_tiles, tile_size, "tile_stop") loop1 = make_loop(zero_i64, tile_stop, tile_size, False) self.blocks.append(loop1) loop2_start = self.assign_temp(loop1.var, "loop2_start") self.blocks.append(make_loop(loop2_start, niters, one_i64, False)) else: self.blocks.append(make_loop(zero_i64, niters, tile_size)) return array_result
def slice_value(self, start, stop, step): slice_t = array_type.make_slice_type(start.type, stop.type, step.type) return Slice(start, stop, step, type = slice_t)
def gen_par_work_function(adverb_class, f, nonlocals, nonlocal_types, args_t, arg_types, dont_slice_position = -1): key = (adverb_class, f.name, tuple(arg_types), config.opt_tile) if key in _par_wrapper_cache: return _par_wrapper_cache[key] else: fn = gen_tiled_wrapper(adverb_class, f, arg_types, nonlocal_types) num_tiles = fn.num_tiles # Construct a typed parallel wrapper function that unpacks the args struct # and calls the (possibly tiled) payload function with its slices of the # arguments. start_var = syntax.Var(names.fresh("start"), type = Int64) stop_var = syntax.Var(names.fresh("stop"), type = Int64) args_var = syntax.Var(names.fresh("args"), type = args_t) tile_type = tuple_type.make_tuple_type([Int64 for _ in range(num_tiles)]) tile_sizes_var = syntax.Var(names.fresh("tile_sizes"), type = tile_type) inputs = [start_var, stop_var, args_var, tile_sizes_var] # Manually unpack the args into types Vars and slice into them. slice_t = array_type.make_slice_type(Int64, Int64, Int64) arg_slice = syntax.Slice(start_var, stop_var, syntax_helpers.one_i64, type = slice_t) def slice_arg(arg, t): indices = [arg_slice] for _ in xrange(1, arg.type.rank): indices.append(syntax_helpers.slice_none) tuple_t = tuple_type.make_tuple_type(syntax_helpers.get_types(indices)) index_tuple = syntax.Tuple(indices, tuple_t) result_t = t.index_type(tuple_t) return syntax.Index(arg, index_tuple, type = result_t) unpacked_args = [] i = 0 for t in nonlocal_types: unpacked_args.append(syntax.Attribute(args_var, ("arg%d" % i), type = t)) i += 1 for t in arg_types: attr = syntax.Attribute(args_var, ("arg%d" % i), type = t) if isinstance(t, array_type.ArrayT) and i != dont_slice_position: # TODO: Handle axis. unpacked_args.append(slice_arg(attr, t)) else: unpacked_args.append(attr) i += 1 # If tiling, pass in the tile params array. if config.opt_tile: unpacked_args.append(tile_sizes_var) # Make a typed closure that calls the payload function with the arg slices. closure_t = closure_type.make_closure_type(fn, []) nested_closure = syntax.Closure(fn, [], type = closure_t) return_t = fn.return_type call = syntax.Call(nested_closure, unpacked_args, type = return_t) output_name = names.fresh("output") output_attr = syntax.Attribute(args_var, "output", type = return_t) output_var = syntax.Var(output_name, type = output_attr.type) output_slice = slice_arg(output_var, return_t) body = [syntax.Assign(output_var, output_attr), syntax.Assign(output_slice, call), syntax.Return(syntax_helpers.none)] type_env = {output_name:output_slice.type} for arg in inputs: type_env[arg.name] = arg.type # Construct the typed wrapper. wrapper_name = adverb_class.node_type() + fn.name + "_par" parallel_wrapper = \ syntax.TypedFn(name = names.fresh(wrapper_name), arg_names = [var.name for var in inputs], input_types = syntax_helpers.get_types(inputs), body = body, return_type = core_types.NoneType, type_env = type_env) lowered = lowering(parallel_wrapper) lowered.num_tiles = num_tiles lowered.dl_tile_estimates = fn.dl_tile_estimates lowered.ml_tile_estimates = fn.ml_tile_estimates _par_wrapper_cache[key] = lowered return lowered