def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.cast(hl.Int(32), 1) assert yy.type() == hl.Int(32) z = x + 1 input[x, y] input[0, 0] input[z, y] input[x + 1, y] input[x, y] + input[x + 1, y] if False: aa = blur_x[x, y] bb = blur_x[x, y + 1] aa + bb blur_x[x, y] + blur_x[x, y + 1] (input[x, y] + input[x + 1, y]) / 2 blur_x[x, y] blur_xx[x, y] = input[x, y] blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3 blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3 xi, yi = hl.Var('xi'), hl.Var('yi') blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit()
def test_image_to_ndarray(): if "image_to_ndarray" not in globals(): print("Skipping test_image_to_ndarray") return import numpy i0 = Image(hl.Float(32), 50, 50) assert i0.type() == hl.Float(32) a0 = image_to_ndarray(i0) print("a0.shape", a0.shape) print("a0.dtype", a0.dtype) assert a0.dtype == numpy.float32 i1 = Image(hl.Int(16), 50, 50) assert i1.type() == hl.Int(16) i1[24, 24] = 42 assert i1(24, 24) == 42 a1 = image_to_ndarray(i1) print("a1.shape", a1.shape) print("a1.dtype", a1.dtype) assert a1.dtype == numpy.int16 assert a1[24, 24] == 42 return
def merge_temporal(images, alignment): weight = hl.Func("merge_temporal_weights") total_weight = hl.Func("merge_temporal_total_weights") output = hl.Func("merge_temporal_output") ix, iy, tx, ty, n = hl.Var('ix'), hl.Var('iy'), hl.Var('tx'), hl.Var('ty'), hl.Var('n') rdom0 = hl.RDom([(0, 16), (0, 16)]) rdom1 = hl.RDom([(1, images.dim(2).extent() - 1)]) imgs_mirror = hl.BoundaryConditions.mirror_interior(images, [(0, images.width()), (0, images.height())]) layer = box_down2(imgs_mirror, "merge_layer") offset = Point(alignment[tx, ty, n]).clamp(Point(MINIMUM_OFFSET, MINIMUM_OFFSET), Point(MAXIMUM_OFFSET, MAXIMUM_OFFSET)) al_x = idx_layer(tx, rdom0.x) + offset.x / 2 al_y = idx_layer(ty, rdom0.y) + offset.y / 2 ref_val = layer[idx_layer(tx, rdom0.x), idx_layer(ty, rdom0.y), 0] alt_val = layer[al_x, al_y, n] factor = 8.0 min_distance = 10 max_distance = 300 # max L1 distance, otherwise the value is not used distance = hl.sum(hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val))) / 256 normal_distance = hl.max(1, hl.cast(hl.Int(32), distance) / factor - min_distance / factor) # Weight for the alternate frame weight[tx, ty, n] = hl.select(normal_distance > (max_distance - min_distance), 0.0, 1.0 / normal_distance) total_weight[tx, ty] = hl.sum(weight[tx, ty, rdom1]) + 1 offset = Point(alignment[tx, ty, rdom1]) al_x = idx_im(tx, ix) + offset.x al_y = idx_im(ty, iy) + offset.y ref_val = imgs_mirror[idx_im(tx, ix), idx_im(ty, iy), 0] alt_val = imgs_mirror[al_x, al_y, rdom1] # Sum all values according to their weight, and divide by total weight to obtain average output[ix, iy, tx, ty] = hl.sum(weight[tx, ty, rdom1] * alt_val / total_weight[tx, ty]) + ref_val / total_weight[ tx, ty] weight.compute_root().parallel(ty).vectorize(tx, 16) total_weight.compute_root().parallel(ty).vectorize(tx, 16) output.compute_root().parallel(ty).vectorize(ix, 32) return output
def test_types(): t0 = hl.Int(32) t1 = hl.Int(16) assert t0 != t1 assert t0.is_float() == False assert t1.is_float() == False print("hl.Int(32) type:", hl.Int(32)) print("hl.Int(16) type:", hl.Int(16)) return
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.cast(hl.Int(32), 1) assert yy.type() == hl.Int(32) print("yy type:", yy.type()) z = x + 1 input[x,y] input[0,0] input[z,y] input[x+1,y] print("ping 0.2") input[x,y]+input[x+1,y] if False: aa = blur_x[x,y] bb = blur_x[x,y+1] aa + bb blur_x[x,y]+blur_x[x,y+1] print("ping 0.3") (input[x,y]+input[x+1,y]) / 2 print("ping 0.4") blur_x[x,y] print("ping 0.4.1") blur_xx[x,y] = input[x,y] print("ping 0.5") blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3 print("ping 1") blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3 xi, yi = hl.Var('xi'), hl.Var('yi') print("ping 2") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit() print("Compiled to jit") return
def test_buffer_to_ndarray(): buf = hl.Buffer(hl.Int(16), [4, 4]) assert buf.type() == hl.Int(16) buf.fill(0) buf[1, 2] = 42 assert buf[1, 2] == 42 # Should share storage with buf array_shared = np.array(buf, copy=False) assert array_shared.shape == (4, 4) assert array_shared.dtype == np.int16 assert array_shared[1, 2] == 42 # Should *not* share storage with buf array_copied = np.array(buf, copy=True) assert array_copied.shape == (4, 4) assert array_copied.dtype == np.int16 assert array_copied[1, 2] == 42 buf[1, 2] = 3 assert array_shared[1, 2] == 3 assert array_copied[1, 2] == 42 # Ensure that Buffers that have nonzero mins get converted correctly, # since the Python Buffer Protocol doesn't have the 'min' concept cropped = buf.copy() cropped.crop(dimension=0, min=1, extent=2) # Should share storage with cropped (and buf) cropped_array_shared = np.array(cropped, copy=False) assert cropped_array_shared.shape == (2, 4) assert cropped_array_shared.dtype == np.int16 assert cropped_array_shared[0, 2] == 3 # Should *not* share storage with anything cropped_array_copied = np.array(cropped, copy=True) assert cropped_array_copied.shape == (2, 4) assert cropped_array_copied.dtype == np.int16 assert cropped_array_copied[0, 2] == 3 cropped[1, 2] = 5 assert buf[1, 2] == 3 assert array_shared[1, 2] == 3 assert array_copied[1, 2] == 42 assert cropped[1, 2] == 5 assert cropped_array_shared[0, 2] == 5 assert cropped_array_copied[0, 2] == 3
def resize_scale(input, fx, fy): shr = hl.Func('resize') x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") index_x = hl.Func("index_x") index_y = hl.Func("index_y") index_x.trace_stores() index_y.trace_stores() index_x[x] = hl.cast(hl.Int(32), x / fx) index_y[y] = hl.cast(hl.Int(32), y / fy) final = hl.Func("final") final[x, y, c] = input[index_x[x], index_y[y], c] return final
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] if True: print("s_sigma", s_sigma) print("s_sigma/2", s_sigma / 2) print("s_sigma//2", s_sigma // 2) print() print("x * s_sigma", x * s_sigma) print("x * 8", x * 8) print("x * 8 + 4", x * 8 + 4) print("x * 8 * 4", x * 8 * 4) print() print("x", x) print("(x * s_sigma).type()", ) print("(x * 8).type()", (x * 8).type()) print("(x * 8 + 4).type()", (x * 8 + 4).type()) print("(x * 8 * 4).type()", (x * 8 * 4).type()) print("(x * 8 / 4).type()", (x * 8 / 4).type()) print("((x * 8) * 4).type()", ((x * 8) * 4).type()) print("(x * (8 * 4)).type()", (x * (8 * 4)).type()) assert (x * 8).type() == hl.Int(32) assert (x * 8 * 4).type() == hl.Int(32) # yes this did fail at some point assert ((x * 8) / 4).type() == hl.Int(32) assert (x * (8 / 4)).type() == hl.Float(32) # under python3 division rules assert (x * (8 // 4)).type() == hl.Int(32) #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] return
def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] val = hl.clamp(val, 0.0, 1.0) #zi = hl.cast(hl.Int(32), val * (1.0/r_sigma) + 0.5) zi = hl.cast(hl.Int(32), (val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) print("hl.select(c == 0, val, 1.0)", ss) left = histogram[x, y, zi, c] print("histogram[x, y, zi, c]", histogram[x, y, zi, c]) print("histogram[x, y, zi, c]", left) left += 5 print("histogram[x, y, zi, c] after += 5", left) left += ss return
def test_ndarray_to_buffer(): a0 = np.ones((200, 300), dtype=np.int32) # Buffer always shares data (when possible) by default, # and maintains the shape of the data source. (note that # the ndarray is col-major by default!) b0 = hl.Buffer(a0, "float32_test_buffer") assert b0.type() == hl.Int(32) assert b0.name() == "float32_test_buffer" assert b0.all_equal(1) assert b0.dim(0).min() == 0 assert b0.dim(0).max() == 199 assert b0.dim(0).extent() == 200 assert b0.dim(0).stride() == 300 assert b0.dim(1).min() == 0 assert b0.dim(1).max() == 299 assert b0.dim(1).extent() == 300 assert b0.dim(1).stride() == 1 a0[12, 34] = 56 assert b0[12, 34] == 56 b0[56, 34] = 12 assert a0[56, 34] == 12
def test_nobuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) f = nobuildmethod.generate(target, b_in, 1.0) f.realize(b_out) assert b_out.all_equal(123)
def __init__(self, x=None, y=None): if x is None and y is None: self.x = hl.cast(hl.Int(16), 0) self.y = hl.cast(hl.Int(16), 0) elif x is not None and y is None: if type(x) is hl.FuncRef: hl.Tuple(x) self.x = hl.cast(hl.Int(16), x[0]) self.y = hl.cast(hl.Int(16), x[1]) elif type(x) is tuple: self.x = hl.cast(hl.Int(16), x[0]) self.y = hl.cast(hl.Int(16), x[1]) else: self.x = hl.cast(hl.Int(16), x) self.y = hl.cast(hl.Int(16), y)
def get_erode(input): """ Erode on 5x5 stencil, first erode x then erode y. """ x = hl.Var("x") y = hl.Var("y") c = hl.Var("c") input_clamped = hl.Func("input_clamped") erode_x = hl.Func("erode_x") erode_y = hl.Func("erode_y") input_clamped[x, y, c] = input[ hl.clamp(x, hl.cast(hl.Int(32), 0 ), hl.cast(hl.Int(32), input.width() - 1)), hl.clamp(y, hl.cast(hl.Int(32), 0 ), hl.cast(hl.Int(32), input.height() - 1)), c] erode_x[x, y, c] = hl.min( hl.min( hl.min( hl.min(input_clamped[x - 2, y, c], input_clamped[x - 1, y, c]), input_clamped[x, y, c]), input_clamped[x + 1, y, c]), input_clamped[x + 2, y, c]) erode_y[x, y, c] = hl.min( hl.min( hl.min(hl.min(erode_x[x, y - 2, c], erode_x[x, y - 1, c]), erode_x[x, y, c]), erode_x[x, y + 1, c]), erode_x[x, y + 2, c]) yi = hl.Var("yi") # CPU Schedule erode_x.compute_root().split(y, y, yi, 8).parallel(y) erode_y.compute_root().split(y, y, yi, 8).parallel(y) return erode_y
def test_partialbuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) try: f = partialbuildmethod.generate(target, b_in, 1) except RuntimeError as e: assert "Generators that use build() (instead of generate()+Output<>) are not supported in the Python bindings." in str(e) else: assert False, 'Did not see expected exception!'
def align_layer(layer, prev_alignment, prev_min, prev_max): scores = hl.Func(layer.name() + "_scores") alignment = hl.Func(layer.name() + "_alignment") xi, yi, tx, ty, n = hl.Var("xi"), hl.Var("yi"), hl.Var('tx'), hl.Var( 'ty'), hl.Var('n') rdom0 = hl.RDom([(0, 16), (0, 16)]) rdom1 = hl.RDom([(-4, 8), (-4, 8)]) # Alignment of the previous (more coarse) layer scaled to this (finer) layer prev_offset = DOWNSAMPLE_RATE * Point( prev_alignment[prev_tile(tx), prev_tile(ty), n]).clamp( prev_min, prev_max) x0 = idx_layer(tx, rdom0.x) y0 = idx_layer(ty, rdom0.y) # (x,y) coordinates in the search region relative to the offset obtained from the alignment of the previous layer x = x0 + prev_offset.x + xi y = y0 + prev_offset.y + yi ref_val = layer[x0, y0, 0] # Value of reference frame (the first frame) alt_val = layer[x, y, n] # alternate frame value # L1 distance between reference frame and alternate frame d = hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val)) scores[xi, yi, tx, ty, n] = hl.sum(d) # Alignment for each tile, where L1 distances are minimum alignment[tx, ty, n] = Point(hl.argmin(scores[rdom1.x, rdom1.y, tx, ty, n])) + prev_offset scores.compute_at(alignment, tx).vectorize(xi, 8) alignment.compute_root().parallel(ty).vectorize(tx, 16) return alignment
def test_division(): f32 = hl.Param(hl.Float(32), 'f32', -32.0) f64 = hl.Param(hl.Float(64), 'f64', 64.0) i16 = hl.Param(hl.Int(16), 'i16', -16) i32 = hl.Param(hl.Int(32), 'i32', 32) u16 = hl.Param(hl.UInt(16), 'u16', 16) u32 = hl.Param(hl.UInt(32), 'u32', 32) # Verify that the types match the rules in match_types() assert (f32 / f64).type() == hl.Float(64) assert (f32 // f64).type() == hl.Float(64) assert (i16 / i32).type() == hl.Int(32) assert (i16 // i32).type() == hl.Int(32) assert (u16 / u32).type() == hl.UInt(32) assert (u16 // u32).type() == hl.UInt(32) # int / uint -> int assert (u16 / i32).type() == hl.Int(32) assert (i32 // u16).type() == hl.Int(32) # any / float -> float # float / any -> float assert (u16 / f32).type() == hl.Float(32) assert (u16 // f32).type() == hl.Float(32) assert (i16 / f64).type() == hl.Float(64) assert (i16 // f64).type() == hl.Float(64) # Verify that division semantics match those for Halide # (rather than python); this differs for int/int which # defaults to float (rather than floordiv) in Python3. # Also test that // always floors the result, even for float. assert _evaluate(f32 / f64) == -0.5 assert _evaluate(f32 // f64) == -1.0 assert _evaluate(i16 / i32) == -1 assert _evaluate(i16 // i32) == -1 assert _evaluate(i32 / i16) == -2 assert _evaluate(u16 / u32) == 0 assert _evaluate(u16 // u32) == 0 assert _evaluate(u16 / i32) == 0 assert _evaluate(i32 // u16) == 2 assert _evaluate(u16 / f32) == -0.5 assert _evaluate(u16 // f32) == -1.0 assert _evaluate(i16 / f64) == -0.25 assert _evaluate(i16 // f64) == -1.0
def buffer_t_to_buffer_struct(buffer): assert buffer.type() == hl.Int(32) b = buffer.raw_buffer() bb = BufferStruct() uint8_p_t = ctypes.POINTER(ctypes.c_ubyte) # host_p0 is the complicated way... #host_p0 = hl.buffer_to_ndarray(hl.Buffer(hl.UInt(8), b)).ctypes.data # host_ptr_as_int is the easy way host_p = buffer.host_ptr_as_int() bb.host = ctypes.hl.cast(host_p, uint8_p_t) #print("host_p", host_p0, host_p, bb.host) bb.dev = b.dev bb.elem_size = b.elem_size bb.host_dirty = b.host_dirty bb.dev_dirty = b.dev_dirty for i in range(4): bb.extent[i] = b.extent[i] bb.stride[i] = b.stride[i] bb.hl.min[i] = b.hl.min[i] return bb
def test_float_or_int(): x = hl.Var('x') i, f = hl.Int(32), hl.Float(32) assert ((x//2) - 1 + 2*(x%2)).type() == i assert ((x/2) - 1 + 2*(x%2)).type() == i assert ((x/2)).type() == i assert ((x/2.0)).type() == f assert ((x//2)).type() == i assert ((x//2) - 1).type() == i assert ((x%2)).type() == i assert (2*(x%2)).type() == i assert ((x//2) - 1 + 2*(x%2)).type() == i assert type(x) == hl.Var assert (x.as_expr()).type() == i assert (hl.Expr(2.0)).type() == f assert (hl.Expr(2)).type() == i assert (x + 2).type() == i assert (2 + x).type() == i assert (hl.Expr(2) + hl.Expr(3)).type() == i assert (hl.Expr(2.0) + hl.Expr(3)).type() == f assert (hl.Expr(2) + 3.0).type() == f assert (hl.Expr(2) + 3).type() == i assert (x.as_expr() + 2).type() == i # yes this failed at some point assert (2 + x.as_expr()).type() == i assert (2 * (x + 2)).type() == i # yes this failed at some point assert (x + 0).type() == i assert (x % 2).type() == i assert (2 * x).type() == i assert (x * 2).type() == i assert (x * 2).type() == i assert ((x % 2)).type() == i assert ((x % 2) * 2).type() == i #assert (2 * (x % 2)).type() == i # yes this failed at some point assert ((x + 2) * 2).type() == i return
def test_float_or_int(): x = hl.Var('x') i32, f32 = hl.Int(32), hl.Float(32) assert hl.Expr(x).type() == i32 assert (x * 2).type() == i32 assert (x / 2).type() == i32 assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32 assert ((x / 2) - 1 + 2 * (x % 2)).type() == i32 assert ((x / 2)).type() == i32 assert ((x / 2.0)).type() == f32 assert ((x // 2)).type() == i32 assert ((x // 2) - 1).type() == i32 assert ((x % 2)).type() == i32 assert (2 * (x % 2)).type() == i32 assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32 assert type(x) == hl.Var assert (hl.Expr(x)).type() == i32 assert (hl.Expr(2.0)).type() == f32 assert (hl.Expr(2)).type() == i32 assert (x + 2).type() == i32 assert (2 + x).type() == i32 assert (hl.Expr(2) + hl.Expr(3)).type() == i32 assert (hl.Expr(2.0) + hl.Expr(3)).type() == f32 assert (hl.Expr(2) + 3.0).type() == f32 assert (hl.Expr(2) + 3).type() == i32 assert (hl.Expr(x) + 2).type() == i32 assert (2 + hl.Expr(x)).type() == i32 assert (2 * (x + 2)).type() == i32 assert (x + 0).type() == i32 assert (x % 2).type() == i32 assert (2 * x).type() == i32 assert (x * 2).type() == i32 assert (x * 2).type() == i32 assert ((x % 2)).type() == i32 assert ((x % 2) * 2).type() == i32 assert (2 * (x % 2)).type() == i32 assert ((x + 2) * 2).type() == i32
def contrast(input, strength, black_point): output = hl.Func("contrast_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") scale = strength inner_constant = math.pi / (2 * scale) sin_constant = hl.sin(inner_constant) slope = 65535 / (2 * sin_constant) constant = slope * sin_constant factor = math.pi / (scale * 65535) val = factor * hl.cast(hl.Float(32), input[x, y, c]) output[x, y, c] = hl.u16_sat(slope * hl.sin(val - inner_constant) + constant) white_scale = 65535 / (65535 - black_point) output[x, y, c] = hl.u16_sat((hl.cast(hl.Int(32), output[x, y, c]) - black_point) * white_scale) output.compute_root().parallel(y).vectorize(x, 16) return output
# TODO: This allows you to use "true" div (vs floordiv) in Python2 for the / operator; # unfortunately it appears to also replace the overloads we've carefully added for Halide. # Figure out if it's possible to allow this to leave our Halide stuff unaffected. # # from __future__ import division import time, sys import halide as hl from datetime import datetime from scipy.misc import imread, imsave import numpy as np import os.path int_t = hl.Int(32) float_t = hl.Float(32) def get_interpolate(input, levels): """ Build function, schedules it, and invokes jit compiler :return: halide.hl.Func """ # THE ALGORITHM downsampled = [hl.Func('downsampled%d' % i) for i in range(levels)] downx = [hl.Func('downx%d' % l) for l in range(levels)] interpolated = [hl.Func('interpolated%d' % i) for i in range(levels)] # level_widths = [hl.Param(int_t,'level_widths%d'%i) for i in range(levels)]
def demosaic(input, width, height): print(f'width: {width}, height: {height}') f0 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f0") f1 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f1") f2 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f2") f3 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f3") f0.translate([-2, -2]) f1.translate([-2, -2]) f2.translate([-2, -2]) f3.translate([-2, -2]) d0 = hl.Func("demosaic_0") d1 = hl.Func("demosaic_1") d2 = hl.Func("demosaic_2") d3 = hl.Func("demosaic_3") output = hl.Func("demosaic_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") rdom0 = hl.RDom([(-2, 5), (-2, 5)]) # rdom1 = hl.RDom([(0, width / 2), (0, height / 2)]) input_mirror = hl.BoundaryConditions.mirror_interior(input, [(0, width), (0, height)]) f0.fill(0) f1.fill(0) f2.fill(0) f3.fill(0) f0_sum = 8 f1_sum = 16 f2_sum = 16 f3_sum = 16 f0[0, -2] = -1 f0[0, -1] = 2 f0[-2, 0] = -1 f0[-1, 0] = 2 f0[0, 0] = 4 f0[1, 0] = 2 f0[2, 0] = -1 f0[0, 1] = 2 f0[0, 2] = -1 f1[0, -2] = 1 f1[-1, -1] = -2 f1[1, -1] = -2 f1[-2, 0] = -2 f1[-1, 0] = 8 f1[0, 0] = 10 f1[1, 0] = 8 f1[2, 0] = -2 f1[-1, 1] = -2 f1[1, 1] = -2 f1[0, 2] = 1 f2[0, -2] = -2 f2[-1, -1] = -2 f2[0, -1] = 8 f2[1, -1] = -2 f2[-2, 0] = 1 f2[0, 0] = 10 f2[2, 0] = 1 f2[-1, 1] = -2 f2[0, 1] = 8 f2[1, 1] = -2 f2[0, 2] = -2 f3[0, -2] = -3 f3[-1, -1] = 4 f3[1, -1] = 4 f3[-2, 0] = -3 f3[0, 0] = 12 f3[2, 0] = -3 f3[-1, 1] = 4 f3[1, 1] = 4 f3[0, 2] = -3 d0[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f0[rdom0.x, rdom0.y]) / f0_sum) d1[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f1[rdom0.x, rdom0.y]) / f1_sum) d2[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f2[rdom0.x, rdom0.y]) / f2_sum) d3[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f3[rdom0.x, rdom0.y]) / f3_sum) R_row = y % 2 == 0 B_row = y % 2 != 0 R_col = x % 2 == 0 B_col = x % 2 != 0 at_R = c == 0 at_G = c == 1 at_B = c == 2 output[x, y, c] = hl.select(at_R & R_row & B_col, d1[x, y], at_R & B_row & R_col, d2[x, y], at_R & B_row & B_col, d3[x, y], at_G & R_row & R_col, d0[x, y], at_G & B_row & B_col, d0[x, y], at_B & B_row & R_col, d1[x, y], at_B & R_row & B_col, d2[x, y], at_B & R_row & R_col, d3[x, y], input[x, y]) d0.compute_root().parallel(y).vectorize(x, 16) d1.compute_root().parallel(y).vectorize(x, 16) d2.compute_root().parallel(y).vectorize(x, 16) d3.compute_root().parallel(y).vectorize(x, 16) output.compute_root().parallel(y).align_bounds(x, 2).unroll(x, 2).align_bounds(y, 2).unroll(y, 2).vectorize(x, 16) return output
def test_fill_all_equal(): buf = hl.Buffer(hl.Int(32), [3, 4]) buf.fill(3) assert buf.all_equal(3) buf[1, 2] = 4 assert not buf.all_equal(3)
def test_schedules(verbose=False, test_random=False): #random_module.seed(int(sys.argv[1]) if len(sys.argv)>1 else 0) halide.exit_on_signal() f = halide.Func('f') x = halide.Var('x') y = halide.Var('y') c = halide.Var('c') g = halide.Func('g') v = halide.Var('v') input = halide.UniformImage(halide.UInt(16), 3) int_t = halide.Int(32) f[x, y, c] = input[ halide.clamp(x, halide.cast(int_t, 0 ), halide.cast(int_t, input.width() - 1)), halide.clamp(y, halide.cast(int_t, 0 ), halide.cast(int_t, input.height() - 1)), halide.clamp(c, halide.cast(int_t, 0), halide.cast(int_t, 2))] #g[v] = f[v,v] g[x, y, c] = f[x, y, c] + 1 assert sorted(halide.all_vars(g).keys()) == sorted(['x', 'y', 'c']) #, 'v']) if verbose: print halide.func_varlist(f) print 'caller_vars(f) =', caller_vars(g, f) print 'caller_vars(g) =', caller_vars(g, g) # validL = list(valid_schedules(g, f, 4)) # validL = [repr(_x) for _x in validL] # # for L in sorted(validL): # print repr(L) T0 = time.time() if not test_random: random = True #False nvalid_determ = 0 for L in schedules_func(g, f, 0, 3): nvalid_determ += 1 if verbose: print L nvalid_random = 0 for i in range(100): for L in schedules_func( g, f, 0, DEFAULT_MAX_DEPTH, random=True ): #sorted([repr(_x) for _x in valid_schedules(g, f, 3)]): if verbose and 0: print L #repr(L) nvalid_random += 1 s = [] for i in range(400): d = random_schedule(g, 0, DEFAULT_MAX_DEPTH) si = str(d) s.append(si) if verbose: print 'Schedule:', si d.apply() evaluate = d.test((36, 36, 3), input) print 'evaluate' evaluate() if test_random: print 'Success' sys.exit() T1 = time.time() s = '\n'.join(s) assert 'f.chunk(_c0)' in s assert 'f.root().vectorize' in s assert 'f.root().unroll' in s assert 'f.root().split' in s assert 'f.root().tile' in s assert 'f.root().parallel' in s assert 'f.root().transpose' in s assert nvalid_random == 100 if verbose: print 'generated in %.3f secs' % (T1 - T0) print 'random_schedule: OK'
def main(): # The last lesson was quite involved, and scheduling complex # multi-stage pipelines is ahead of us. As an interlude, let's # consider something easy: evaluating funcs over rectangular # domains that do not start at the origin. # We define our familiar gradient function. gradient = hl.Func("gradient") x, y = hl.Var("x"), hl.Var("y") gradient[x, y] = x + y # And turn on tracing so we can see how it is being evaluated. gradient.trace_stores() # Previously we've realized gradient like so: # # gradient.realize(8, 8) # # This does three things internally: # 1) Generates code than can evaluate gradient over an arbitrary # rectangle. # 2) Allocates a new 8 x 8 image. # 3) Runs the generated code to evaluate gradient for all x, y # from (0, 0) to (7, 7) and puts the result into the image. # 4) Returns the new image as the result of the realize call. # What if we're managing memory carefully and don't want Halide # to allocate a new image for us? We can call realize another # way. We can pass it an image we would like it to fill in. The # following evaluates our hl.Func into an existing image: print("Evaluating gradient from (0, 0) to (7, 7)") result = hl.Buffer(hl.Int(32), [8, 8]) gradient.realize(result) # Let's check it did what we expect: for yy in range(8): for xx in range(8): assert result[xx, yy] == xx + yy, "Something went wrong!" # Now let's evaluate gradient over a 5 x 7 rectangle that starts # somewhere else -- at position (100, 50). So x and y will run # from (100, 50) to (104, 56) inclusive. # We start by creating an image that represents that rectangle: # In the constructor we tell it the size. shifted = hl.Buffer(hl.Int(32), [5, 7]) shifted.set_min([100, 50]) # Then we tell it the top-left corner. print("Evaluating gradient from (100, 50) to (104, 56)") # Note that this won't need to compile any new code, because when # we realized it the first time, we generated code capable of # evaluating gradient over an arbitrary rectangle. gradient.realize(shifted) # From C++, we also access the image object using coordinates # that start at (100, 50). for yy in range(50, 57): for xx in range(100, 105): assert shifted[xx, yy] == xx + yy, "Something went wrong!" # The image 'shifted' stores the value of our hl.Func over a domain # that starts at (100, 50), so asking for shifted(0, 0) would in # fact read out-of-bounds and probably crash. # What if we want to evaluate our hl.Func over some region that # isn't rectangular? Too bad. Halide only does rectangles :) print("Success!") return 0
def main(): # So far Funcs (such as the one below) have evaluated to a single # scalar value for each point in their domain. single_valued = hl.Func() x, y = hl.Var("x"), hl.Var("y") single_valued[x, y] = x + y # One way to write a hl.Func that returns a collection of values is # to add an additional dimension which indexes that # collection. This is how we typically deal with color. For # example, the hl.Func below represents a collection of three values # for every x, y coordinate indexed by c. color_image = hl.Func() c = hl.Var("c") color_image[x, y, c] = hl.select( c == 0, 245, # Red value c == 1, 42, # Green value 132) # Blue value # Since this pattern appears quite often, Halide provides a # syntatic sugar to write the code above as the following, # using the "mux" function. # color_image[x, y, c] = hl.mux(c, [245, 42, 132]); # This method is often convenient because it makes it easy to # operate on this hl.Func in a way that treats each item in the # collection equally: brighter = hl.Func() brighter[x, y, c] = color_image[x, y, c] + 10 # However this method is also inconvenient for three reasons. # # 1) Funcs are defined over an infinite domain, so users of this # hl.Func can for example access color_image(x, y, -17), which is # not a meaningful value and is probably indicative of a bug. # # 2) It requires a hl.select, which can impact performance if not # bounded and unrolled: # brighter.bound(c, 0, 3).unroll(c) # # 3) With this method, all values in the collection must have the # same type. While the above two issues are merely inconvenient, # this one is a hard limitation that makes it impossible to # express certain things in this way. # It is also possible to represent a collection of values as a # collection of Funcs: func_array = [hl.Func() for i in range(3)] func_array[0][x, y] = x + y func_array[1][x, y] = hl.sin(x) func_array[2][x, y] = hl.cos(y) # This method avoids the three problems above, but introduces a # new annoyance. Because these are separate Funcs, it is # difficult to schedule them so that they are all computed # together inside a single loop over x, y. # A third alternative is to define a hl.Func as evaluating to a # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of # Exprs which may have different type. The following function # evaluates to an integer value (x+y), and a floating point value # (hl.sin(x*y)). multi_valued = hl.Func("multi_valued") multi_valued[x, y] = (x + y, hl.sin(x * y)) # Realizing a tuple-valued hl.Func returns a collection of # Buffers. We call this a Realization. It's equivalent to a # std::vector of hl.Buffer/Image objects: if True: im1, im2 = multi_valued.realize([80, 60]) assert im1.type() == hl.Int(32) assert im2.type() == hl.Float(32) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # You can also pass a tuple of pre-allocated buffers to realize() # rather than having new ones created. (The Buffers must have the correct # types and have identical sizes.) if True: im1, im2 = hl.Buffer(hl.Int(32), [80, 60]), hl.Buffer(hl.Float(32), [80, 60]) multi_valued.realize((im1, im2)) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # All Tuple elements are evaluated together over the same domain # in the same loop nest, but stored in distinct allocations. The # equivalent C++ code to the above is: if True: multi_valued_0 = np.empty((80 * 60), dtype=np.int32) multi_valued_1 = np.empty((80 * 60), dtype=np.int32) for yy in range(80): for xx in range(60): multi_valued_0[xx + 60 * yy] = xx + yy multi_valued_1[xx + 60 * yy] = math.sin(xx * yy) # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates # into multiple distinct output halide_buffer_t structs. These appear in # order at the end of the function signature: # int multi_valued(...input buffers and params..., halide_buffer_t # *output_1, halide_buffer_t *output_2) # You can construct a Tuple by passing multiple Exprs to the # Tuple constructor as we did above. Perhaps more elegantly, you # can also take advantage of initializer lists and just # enclose your Exprs in braces: multi_valued_2 = hl.Func("multi_valued_2") multi_valued_2[x, y] = (x + y, hl.sin(x * y)) # Calls to a multi-valued hl.Func cannot be treated as Exprs. The # following is a syntax error: # hl.Func consumer # consumer[x, y] = multi_valued_2[x, y] + 10 # Instead you must index the returned object with square brackets # to retrieve the individual Exprs: integer_part = multi_valued_2[x, y][0] floating_part = multi_valued_2[x, y][1] assert type(integer_part) is hl.FuncTupleElementRef assert type(floating_part) is hl.FuncTupleElementRef consumer = hl.Func() consumer[x, y] = (integer_part + 10, floating_part + 10.0) # Tuple reductions. if True: # Tuples are particularly useful in reductions, as they allow # the reduction to maintain complex state as it walks along # its domain. The simplest example is an argmax. # First we create an Image to take the argmax over. input_func = hl.Func() input_func[x] = hl.sin(x) input = input_func.realize([100]) assert input.type() == hl.Float(32) # Then we defined a 2-valued Tuple which tracks the maximum value # its index. arg_max = hl.Func() # Pure definition. # (using [()] for zero-dimensional Funcs is a convention of this python interface) arg_max[()] = (0, input[0]) # Update definition. r = hl.RDom([(1, 99)]) old_index = arg_max[()][0] old_max = arg_max[()][1] new_index = hl.select(old_max > input[r], r, old_index) new_max = hl.max(input[r], old_max) arg_max[()] = (new_index, new_max) # The equivalent C++ is: arg_max_0 = 0 arg_max_1 = float(input[0]) for r in range(1, 100): old_index = arg_max_0 old_max = arg_max_1 new_index = r if (old_max > input[r]) else old_index new_max = max(input[r], old_max) # In a tuple update definition, all loads and computation # are done before any stores, so that all Tuple elements # are updated atomically with respect to recursive calls # to the same hl.Func. arg_max_0 = new_index arg_max_1 = new_max # Let's verify that the Halide and C++ found the same maximum # value and index. if True: r0, r1 = arg_max.realize() assert r0.type() == hl.Int(32) assert r1.type() == hl.Float(32) assert arg_max_0 == r0[()] assert np.isclose(arg_max_1, r1[()]) # Halide provides argmax and hl.argmin as built-in reductions # similar to sum, product, maximum, and minimum. They return # a Tuple consisting of the point in the reduction domain # corresponding to that value, and the value itself. In the # case of ties they return the first value found. We'll use # one of these in the following section. # Tuples for user-defined types. if True: # Tuples can also be a convenient way to represent compound # objects such as complex numbers. Defining an object that # can be converted to and from a Tuple is one way to extend # Halide's type system with user-defined types. class Complex: def __init__(self, r, i=None): if type(r) is float and type(i) is float: self.real = hl.Expr(r) self.imag = hl.Expr(i) elif i is not None: self.real = r self.imag = i else: self.real = r[0] self.imag = r[1] def as_tuple(self): "Convert to a Tuple" return (self.real, self.imag) def __add__(self, other): "Complex addition" return Complex(self.real + other.real, self.imag + other.imag) def __mul__(self, other): "Complex multiplication" return Complex(self.real * other.real - self.imag * other.imag, self.real * other.imag + self.imag * other.real) def __getitem__(self, idx): return (self.real, self.imag)[idx] def __len__(self): return 2 def magnitude(self): "Complex magnitude" return (self.real * self.real) + (self.imag * self.imag) # Other complex operators would go here. The above are # sufficient for this example. # Let's use the Complex struct to compute a Mandelbrot set. mandelbrot = hl.Func() # The initial complex value corresponding to an x, y coordinate # in our hl.Func. initial = Complex(x / 15.0 - 2.5, y / 6.0 - 2.0) # Pure definition. t = hl.Var("t") mandelbrot[x, y, t] = Complex(0.0, 0.0) # We'll use an update definition to take 12 steps. r = hl.RDom([(1, 12)]) current = Complex(mandelbrot[x, y, r - 1]) # The following line uses the complex multiplication and # addition we defined above. mandelbrot[x, y, r] = (Complex(current * current) + initial) # We'll use another tuple reduction to compute the iteration # number where the value first escapes a circle of radius 4. # This can be expressed as an hl.argmin of a boolean - we want # the index of the first time the given boolean expression is # false (we consider false to be less than true). The argmax # would return the index of the first time the expression is # true. escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0 first_escape = hl.argmin(escape_condition) assert type(first_escape) is tuple # We only want the index, not the value, but hl.argmin returns # both, so we'll index the hl.argmin Tuple expression using # square brackets to get the hl.Expr representing the index. escape = hl.Func() escape[x, y] = first_escape[0] # Realize the pipeline and print the result as ascii art. result = escape.realize([61, 25]) assert result.type() == hl.Int(32) code = " .:-~*={&%#@" for yy in range(result.height()): for xx in range(result.width()): index = result[xx, yy] if index < len(code): print("%c" % code[index], end="") else: pass # is lesson 13 cpp version buggy ? print("") print("Success!") return 0
def test_target(): # Target("") should be exactly like get_host_target(). t1 = hl.get_host_target() t2 = hl.Target("") assert t1 == t2, "Default ctor failure" assert t1.supported() # to_string roundtripping t1 = hl.Target() ts = t1.to_string() assert ts == "arch_unknown-0-os_unknown" # Note, this should *not* validate, since validate_target_string # now returns false if any of arch-bits-os are undefined assert not hl.Target.validate_target_string(ts) # Don't attempt to roundtrip this: trying to create # a Target with unknown portions will now assert-fail. # # t2 = hl.Target(ts) # assert t2 == t1 # repr() and str() assert str(t1) == "arch_unknown-0-os_unknown" assert repr(t1) == "<halide.Target arch_unknown-0-os_unknown>" assert t1.os == hl.TargetOS.OSUnknown assert t1.arch == hl.TargetArch.ArchUnknown assert t1.bits == 0 # Full specification round-trip: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) ts = t1.to_string() assert ts == "x86-32-linux-sse41" assert hl.Target.validate_target_string(ts) # Full specification (without features) round-trip: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32) ts = t1.to_string() assert ts == "x86-32-linux" assert hl.Target.validate_target_string(ts) # Full specification round-trip, crazy features t1 = hl.Target(hl.TargetOS.Android, hl.TargetArch.ARM, 32, [ hl.TargetFeature.JIT, hl.TargetFeature.SSE41, hl.TargetFeature.AVX, hl.TargetFeature.AVX2, hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGL, hl.TargetFeature.OpenGLCompute, hl.TargetFeature.Debug ]) ts = t1.to_string() assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41" assert hl.Target.validate_target_string(ts) # Expected failures: ts = "host-unknowntoken" assert not hl.Target.validate_target_string(ts) ts = "x86-23" assert not hl.Target.validate_target_string(ts) # bits == 0 is allowed only if arch_unknown and os_unknown are specified, # and no features are set ts = "x86-0" assert not hl.Target.validate_target_string(ts) ts = "0-arch_unknown-os_unknown-sse41" assert not hl.Target.validate_target_string(ts) # "host" is only supported as the first token ts = "opencl-host" assert not hl.Target.validate_target_string(ts) # set_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.has_feature(hl.TargetFeature.SSE41) assert not t1.has_feature(hl.TargetFeature.AVX) t1.set_feature(hl.TargetFeature.AVX) t1.set_feature(hl.TargetFeature.SSE41, False) assert t1.has_feature(hl.TargetFeature.AVX) assert not t1.has_feature(hl.TargetFeature.SSE41) # set_features t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.has_feature(hl.TargetFeature.SSE41) assert not t1.has_feature(hl.TargetFeature.AVX) t1.set_features([hl.TargetFeature.SSE41], False) t1.set_features([hl.TargetFeature.AVX, hl.TargetFeature.AVX2], True) assert t1.has_feature(hl.TargetFeature.AVX) assert t1.has_feature(hl.TargetFeature.AVX2) assert not t1.has_feature(hl.TargetFeature.SSE41) # with_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) t2 = t1.with_feature(hl.TargetFeature.NoAsserts).with_feature( hl.TargetFeature.NoBoundsQuery) ts = t2.to_string() assert ts == "x86-32-linux-no_asserts-no_bounds_query-sse41" # without_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41, hl.TargetFeature.NoAsserts]) # Note that NoBoundsQuery wasn't set here, so 'without' is a no-op t2 = t1.without_feature(hl.TargetFeature.NoAsserts).without_feature( hl.TargetFeature.NoBoundsQuery) ts = t2.to_string() assert ts == "x86-32-linux-sse41" # natural_vector_size # SSE4.1 is 16 bytes wide t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.natural_vector_size(hl.UInt(8)) == 16 assert t1.natural_vector_size(hl.Int(16)) == 8 assert t1.natural_vector_size(hl.UInt(32)) == 4 assert t1.natural_vector_size(hl.Float(32)) == 4 # has_gpu_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.OpenCL]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, []) assert t1.has_gpu_feature() assert not t2.has_gpu_feature() # has_large_buffers & maximum_buffer_size t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, [hl.TargetFeature.LargeBuffers]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, []) assert t1.has_large_buffers() assert t1.maximum_buffer_size() == 9223372036854775807 assert not t2.has_large_buffers() assert t2.maximum_buffer_size() == 2147483647 # supports_device_api t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, [hl.TargetFeature.CUDA]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64) assert t1.supports_device_api(hl.DeviceAPI.CUDA) assert not t2.supports_device_api(hl.DeviceAPI.CUDA) # supports_type (deprecated version) t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64, [hl.TargetFeature.Metal]) t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64) assert not t1.supports_type(hl.Float(64)) assert t2.supports_type(hl.Float(64)) # supports_type (preferred version) t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64, [hl.TargetFeature.Metal]) t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64) assert not t1.supports_type(hl.Float(64), hl.DeviceAPI.Metal) assert not t2.supports_type(hl.Float(64), hl.DeviceAPI.Metal) # target_feature_for_device_api assert hl.target_feature_for_device_api( hl.DeviceAPI.OpenCL) == hl.TargetFeature.OpenCL # with_feature with non-convertible lists try: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, ["this is a string"]) except TypeError as e: assert "incompatible constructor arguments" in str(e) else: assert False, 'Did not see expected exception!'
def test_buffer_to_str(): b = hl.Buffer() assert str(b) == '<undefined halide.Buffer>' b = hl.Buffer(hl.Int(32), [128, 256]) assert str( b) == '<halide.Buffer of type int32 shape:[[0,128,1],[0,256,128]]>'