示例#1
0
  def alloc_array(self, elt_t, dims, name = "array", explicit_struct = False):
    """
    Given an element type and sequence of expressions denoting each dimension
    size, generate code to allocate an array and its shape/strides metadata. For
    now I'm assuming that all arrays are in row-major, eventually we should make
    the layout an option.
    """

    if self.is_tuple(dims):
      shape = dims
      dims = self.tuple_elts(shape)
    else:
      if not isinstance(dims, (list, tuple)):
        dims = [dims]
      shape = self.tuple(dims, "shape", explicit_struct = explicit_struct)
    rank = len(dims)
    array_t = array_type.make_array_type(elt_t, rank)
    if explicit_struct:
      nelts = self.prod(dims, name = "nelts")
      ptr_t = core_types.ptr_type(elt_t)

      ptr_var = self.assign_temp(Alloc(elt_t, nelts, type = ptr_t), "data_ptr")
      stride_elts = [syntax_helpers.const(1)]

      # assume row-major for now!
      for d in reversed(dims[1:]):
        next_stride = self.mul(stride_elts[0], d, "dim")
        stride_elts = [next_stride] + stride_elts
      strides = self.tuple(stride_elts, "strides", explicit_struct = True)
      array = Struct([ptr_var, shape, strides, zero_i64, nelts], type = array_t)
    else:
      array = AllocArray(shape, type = array_t)
    return self.assign_temp(array, name)
示例#2
0
  def _create_wrapper(self, n_pos, static_pairs, dynamic_keywords):
    args = FormalArgs()
    pos_vars = []
    keyword_vars = {}
    for i in xrange(n_pos):
      local_name = names.fresh("input_%d" % i)
      args.add_positional(local_name)
      pos_vars.append(syntax.Var(local_name))
  
    
    for visible_name in dynamic_keywords:
      local_name = names.fresh(visible_name)
      args.add_positional(local_name, visible_name)
      keyword_vars[visible_name] = syntax.Var(local_name)

    for (static_name, value) in static_pairs:
      if isinstance(value, syntax.Expr):
        assert isinstance(value, syntax.Const)
        keyword_vars[static_name] = value
      elif value is not None:
        assert syntax_helpers.is_python_constant(value), \
            "Unexpected type for static/staged value: %s : %s" % \
            (value, type(value))
        keyword_vars[static_name] = syntax_helpers.const(value)

    result_expr = self.f(*pos_vars, **keyword_vars)
    body = [syntax.Return(result_expr)]
    wrapper_name = "%s_wrapper_%d_%d" % (self.name, n_pos,
                                         len(dynamic_keywords))
    wrapper_name = names.fresh(wrapper_name)
    return syntax.Fn(name = wrapper_name, args = args, body = body)
示例#3
0
def value_to_syntax(v):
  if syntax_helpers.is_python_constant(v):
    return syntax_helpers.const(v)
  elif isinstance(v, np.dtype):
    x = names.fresh("x")
    fn_name = names.fresh("cast") 
    formals = FormalArgs()
    formals.add_positional(x, "x")
    body = [syntax.Return(syntax.Cast(syntax.Var(x), type=core_types.from_dtype(v)))]
    return syntax.Fn(fn_name, formals, body)
  else:
    assert is_function_value(v), "Can't make value %s : %s into static syntax" % (v, type(v))
    return translate_function_value(v)  
示例#4
0
 def transform_Array(self, expr):
   array_t = expr.type
   elt_t = array_t.elt_type
   assert array_t.rank > 0
   if array_t.rank == 1:
     new_elts = [self.coerce_expr(elt, elt_t) for elt in expr.elts]
     return syntax.Array(new_elts, type = array_t)
   else:
     # need to allocate an output array and copy the elements in
     first_elt = self.assign_temp(expr.elts[0], "first_elt")
     elt_dims = [self.shape(first_elt, i) for i in xrange(array_t.rank - 1)]
     n = len(expr.elts)
     outer_dim = syntax_helpers.const(n)
     all_dims = (outer_dim,) + tuple(elt_dims)
     array = self.alloc_array(elt_t, all_dims, "array_literal")
     for i, elt in enumerate(expr.elts):
       idx_expr = self.index(array, i, temp = False)
       # transform indexing to make missing indices explicit
       self.assign(idx_expr, expr.elts[i])
     return array
示例#5
0
  def transform_TiledReduce(self, expr):
    args = expr.args
    axes = expr.axes

    # TODO: Should make sure that all the shapes conform here,
    # but we don't yet have anything like assertions or error handling.
    niters = self.shape(args[0], syntax_helpers.unwrap_constant(axes[0]))

    if expr.fixed_tile_size:
      self.fixed_idx += 1
      tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx])
    else:
      self.tiling = True
      self.fn.has_tiles = True
      self.nesting_idx += 1
      tile_size = self.index(self.tile_sizes_param, self.nesting_idx,
                             temp = True, name = "tilesize")

    slice_t = array_type.make_slice_type(Int64, Int64, Int64)

    untiled_map_fn = self.get_fn(expr.fn)

    acc_type = untiled_map_fn.return_type
    acc_is_array = not isinstance(acc_type, ScalarT)

    tiled_map_fn = self.transform_TypedFn(untiled_map_fn)
    map_closure_args = [self.get_closure_arg(e)
                        for e in self.closure_elts(expr.fn)]

    untiled_combine = self.get_fn(expr.combine)
    combine_closure_args = []

    tiled_combine = self.transform_TypedFn(untiled_combine, acc_is_array)
    if self.output_var and acc_is_array:
      result = self.output_var
    else:
      shape_args = map_closure_args + args
      result = self._create_output_array(untiled_map_fn, shape_args,
                                         [], "loop_result")
    init = result
    rslt_t = result.type

    if not acc_is_array:
      result_before = self.fresh_var(rslt_t, "result_before")
      init = result_before

    # Lift the initial value and fill it.
    def init_unpack(i, cur):
      if i == 0:
        return syntax.Assign(cur, syntax_helpers.zero_f64)
      else:
        j = self.fresh_i64("j")
        start = zero_i64
        stop = self.shape(cur, 0)

        self.blocks.push()
        n = self.index_along_axis(cur, 0, j)
        self.blocks += init_unpack(i-1, n)
        body = self.blocks.pop()

        return syntax.ForLoop(j, start, stop, one_i64, body, {})
    num_exps = array_type.get_rank(init.type) - \
               array_type.get_rank(expr.init.type)

    # TODO: Get rid of this when safe to do so.
    if not expr.fixed_tile_size or True:
      self.comment("TiledReduce in %s: init_unpack" % self.fn.name)
      self.blocks += init_unpack(num_exps, init)

    # Loop over the remaining tiles.
    merge = {}

    if not acc_is_array:
      result_after = self.fresh_var(rslt_t, "result_after")
      merge[result.name] = (result_before, result_after)

    def make_loop(start, stop, step, do_min = True):
      i = self.fresh_var(niters.type, "i")
      self.blocks.push()
      slice_stop = self.add(i, step, "next_bound")
      slice_stop_min = self.min(slice_stop, stop) if do_min \
                       else slice_stop

      tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t)
      nested_args = [self.index_along_axis(arg, axis, tile_bounds)
                     for arg, axis in zip(args, axes)]

      new_acc = self.fresh_var(tiled_map_fn.return_type, "new_acc")
      self.comment("TiledReduce in %s: map_fn " % self.fn.name)
      do_inline(tiled_map_fn,
                map_closure_args + nested_args,
                self.type_env,
                self.blocks.top(),
                result_var = new_acc)

      loop_body = self.blocks.pop()
      if acc_is_array:
        outidx = self.tuple([syntax_helpers.slice_none] * result.type.rank)
        result_slice = self.index(result, outidx, temp = False)
        self.comment("")
        do_inline(tiled_combine,
                  combine_closure_args + [result, new_acc, result_slice],
                  self.type_env,
                  loop_body,
                  result_var = None)
      else:
        do_inline(tiled_combine,
                  combine_closure_args + [result, new_acc],
                  self.type_env, loop_body,
                  result_var = result_after)
      return syntax.ForLoop(i, start, stop, step, loop_body, merge)

    assert isinstance(tile_size, syntax.Expr), "%s not an expr" % tile_size

    self.comment("TiledReduce in %s: combine" % self.fn.name)

    if expr.fixed_tile_size and \
       config.opt_reg_tiles_not_tile_size_dependent and \
       syntax_helpers.unwrap_constant(tile_size) > 1:
      num_tiles = self.div(niters, tile_size, "num_tiles")
      tile_stop = self.mul(num_tiles, tile_size, "tile_stop")
      loop1 = make_loop(zero_i64, tile_stop, tile_size, False)
      self.blocks.append(loop1)
      loop2_start = self.assign_temp(loop1.var, "loop2_start")
      self.blocks.append(make_loop(loop2_start, niters, one_i64, False))
    else:
      self.blocks.append(make_loop(zero_i64, niters, tile_size))

    return result
示例#6
0
  def transform_TiledMap(self, expr):
    args = expr.args
    axes = expr.axes

    # TODO: Should make sure that all the shapes conform here,
    # but we don't yet have anything like assertions or error handling
    niters = self.shape(expr.args[0],
                        syntax_helpers.unwrap_constant(axes[0]))

    # Create the tile size variable and find the number of tiles
    if expr.fixed_tile_size:
      self.fixed_idx += 1
      tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx])
    else:
      self.tiling = True
      self.fn.has_tiles = True
      self.nesting_idx += 1
      tile_size = self.index(self.tile_sizes_param, self.nesting_idx,
                             temp = True, name = "tilesize")

    untiled_inner_fn = self.get_fn(expr.fn)
    if isinstance(untiled_inner_fn.return_type, ScalarT):
      tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn)
    else:
      tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn,
                                              preallocate_output = True)

    nested_has_tiles = tiled_inner_fn.has_tiles

    # Increase the nesting_idx by the number of tiles in the nested fn
    self.nesting_idx += tiled_inner_fn.num_tiles

    slice_t = array_type.make_slice_type(Int64, Int64, Int64)

    closure_args = [self.get_closure_arg(e)
                    for e in self.closure_elts(expr.fn)]

    if self.output_var and \
       not isinstance(untiled_inner_fn.return_type, ScalarT):
      array_result = self.output_var
    else:
      shape_args = closure_args + expr.args
      array_result = self._create_output_array(untiled_inner_fn, shape_args,
                                               [], "array_result")

    assert self.output_var is None or \
           self.output_var.type.__class__ is ArrayT, \
           "Invalid output var %s : %s" % \
           (self.output_var, self.output_var.type)

    def make_loop(start, stop, step, do_min = True):
      i = self.fresh_var(niters.type, "i")

      self.blocks.push()
      slice_stop = self.add(i, step, "slice_stop")
      slice_stop_min = self.min(slice_stop, niters, "slice_min") if do_min \
                       else slice_stop

      tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t)
      nested_args = [self.index_along_axis(arg, axis, tile_bounds)
                     for arg, axis in zip(args, axes)]
      out_idx = self.fixed_idx if expr.fixed_tile_size else self.nesting_idx
      output_region = self.index_along_axis(array_result, out_idx, tile_bounds)
      nested_args.append(output_region)

      if nested_has_tiles:
        nested_args.append(self.tile_sizes_param)
      body = self.blocks.pop()
      do_inline(tiled_inner_fn,
                closure_args + nested_args,
                self.type_env,
                body,
                result_var = None)
      return syntax.ForLoop(i, start, stop, step, body, {})

    assert isinstance(tile_size, syntax.Expr)
    self.comment("TiledMap in %s" % self.fn.name)

    if expr.fixed_tile_size and \
       config.opt_reg_tiles_not_tile_size_dependent and \
       syntax_helpers.unwrap_constant(tile_size) > 1:
      num_tiles = self.div(niters, tile_size, "num_tiles")
      tile_stop = self.mul(num_tiles, tile_size, "tile_stop")
      loop1 = make_loop(zero_i64, tile_stop, tile_size, False)
      self.blocks.append(loop1)
      loop2_start = self.assign_temp(loop1.var, "loop2_start")
      self.blocks.append(make_loop(loop2_start, niters, one_i64, False))
    else:
      self.blocks.append(make_loop(zero_i64, niters, tile_size))
    return array_result
示例#7
0
 def visit_Const(self, v):
   return syntax_helpers.const(v.value)