def test_pattern(c): x = hl.Var("x") f = hl.Func("f") f[x] = x * hl.f64(c) * (hl.f64(0.1) + hl.f64(0.2)) for i, hl_value in enumerate(numpy.asarray(f.realize(10))): py_value = i * c * (0.1 + 0.2) check = math.isclose(hl_value, py_value) assert check, "{}[{}]: {} != {}".format(i, c, hl_value, py_value)
def test(): def test_pattern(c): x = hl.Var("x") f = hl.Func("f") f[x] = x * hl.f64(c) * (hl.f64(0.1) + hl.f64(0.2)) for i, hl_value in enumerate(numpy.asarray(f.realize(10))): py_value = i * c * (0.1 + 0.2) check = math.isclose(hl_value, py_value) assert check, "{}[{}]: {} != {}".format(i, c, hl_value, py_value) test_pattern(0.123456789012345678) test_pattern(0.987654321098765432) x = hl.Var("x") with AssertWarnsContext(RuntimeWarning) as ctx: x + 0.123456789012345678 assert ctx.occurred, "RuntimeWarning didn't occur." with AssertWarnsContext(RuntimeWarning) as ctx: x + hl.f64(0.123456789012345678) assert not ctx.occurred, "RuntimeWarning occurred." with AssertWarnsContext(RuntimeWarning) as ctx: x + 0.75 # 0.5 + 0.25 assert not ctx.occurred, "RuntimeWarning occurred."
def gen_outputs(self): ''' define the outputs ''' nbfn = self.nbfn i, j = [self.vars[c] for c in "ij"] g_fock = self.funcs["g_fock"] g_dens = self.clamps["g_dens"] # output scalars rv = hl.Func("rv") # output matrix g_fock_out = hl.Func("g_fock_out") self.funcs.update({"rv": rv, "g_fock_out": g_fock_out}) self.outputs.update({"rv": rv, "g_fock_out": g_fock_out}) g_fock_out[i, j] = g_fock[i, j] rv[i] = hl.f64(0.0) r_rv = hl.RDom([(0, nbfn), (0, nbfn)]) rv[0] += g_fock[r_rv] * g_dens[r_rv] rv[0] *= hl.f64(0.5)
def test_typed_funcs(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') assert not f.defined() try: assert f.output_type() == Int(32) except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' try: assert f.outputs() == 0 except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' try: assert f.dimensions() == 0 except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' f = hl.Func(hl.Int(32), 2, 'f') assert not f.defined() assert f.output_type() == hl.Int(32) assert f.output_types() == [hl.Int(32)] assert f.outputs() == 1 assert f.dimensions() == 2 f = hl.Func([hl.Int(32), hl.Float(64)], 3, 'f') assert not f.defined() try: assert f.output_type() == hl.Int(32) except RuntimeError as e: assert 'it returns a Tuple' in str(e) else: assert False, 'Did not see expected exception!' assert f.output_types() == [hl.Int(32), hl.Float(64)] assert f.outputs() == 2 assert f.dimensions() == 3 f = hl.Func(hl.Int(32), 1, 'f') try: f[x, y] = hl.i32(0) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to have exactly 1 dimensions, but is defined with 2 dimensions' in str( e) else: assert False, 'Did not see expected exception!' f = hl.Func(hl.Int(32), 2, 'f') try: f[x, y] = hl.i16(0) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to only hold values of type int32 but is defined with values of type int16' in str( e) else: assert False, 'Did not see expected exception!' f = hl.Func((hl.Int(32), hl.Float(32)), 2, 'f') try: f[x, y] = (hl.i16(0), hl.f64(0)) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to only hold values of type (int32, float32) but is defined with values of type (int16, float64)' in str( e) else: assert False, 'Did not see expected exception!'
def gen_g(self): ''' define g() function ''' # vars i, j, k, l = [self.vars[c] for c in "ijkl"] # clamped inputs x, y, z, expnt, fm, rnorm = [ self.clamps[c] for c in ["x", "y", "z", "expnt", "fm", "rnorm"] ] # unclamped input (for sizing) fm_in = self.inputs["fm_in"] # scalar inputs delo2, delta, rdelta = [ self.inputs[c] for c in ["delo2", "delta", "rdelta"] ] dx = hl.Func("dx") dy = hl.Func("dy") dz = hl.Func("dz") r2 = hl.Func("g_r2") expnt2 = hl.Func("expnt2") expnt_inv = hl.Func("expnt_inv") self.add_funcs_by_name([dx, dy, dz, r2, expnt2, expnt_inv]) dx[i, j] = x[i] - x[j] dy[i, j] = y[i] - y[j] dz[i, j] = z[i] - z[j] r2[i, j] = dx[i, j] * dx[i, j] + dy[i, j] * dy[i, j] + dz[i, j] * dz[i, j] expnt2[i, j] = expnt[i] + expnt[j] expnt_inv[i, j] = hl.f64(1.0) / expnt2[i, j] fac2 = hl.Func("fac2") ex_arg = hl.Func("ex_arg") ex = hl.Func("ex") denom = hl.Func("denom") fac4d = hl.Func("fac4d") self.add_funcs_by_name([fac2, ex_arg, ex, denom, fac4d]) fac2[i, j] = expnt[i] * expnt[j] * expnt_inv[i, j] ex_arg[i, j, k, l] = -fac2[i, j] * r2[i, j] - fac2[k, l] * r2[k, l] ex[i, j, k, l] = hl.select(ex_arg[i, j, k, l] < hl.f64(-37.0), hl.f64(0.0), hl.exp(ex_arg[i, j, k, l])) denom[i, j, k, l] = expnt2[i, j] * expnt2[k, l] * hl.sqrt(expnt2[i, j] + expnt2[k, l]) fac4d[i, j, k, l] = expnt2[i, j] * expnt2[k, l] / (expnt2[i, j] + expnt2[k, l]) x2 = hl.Func("g_x2") y2 = hl.Func("g_y2") z2 = hl.Func("g_z2") rpq2 = hl.Func("rpq2") self.add_funcs_by_name([x2, y2, z2, rpq2]) x2[i, j] = (x[i] * expnt[i] + x[j] * expnt[j]) * expnt_inv[i, j] y2[i, j] = (y[i] * expnt[i] + y[j] * expnt[j]) * expnt_inv[i, j] z2[i, j] = (z[i] * expnt[i] + z[j] * expnt[j]) * expnt_inv[i, j] rpq2[i, j, k, l] = ((x2[i, j] - x2[k, l]) * (x2[i, j] - x2[k, l]) + (y2[i, j] - y2[k, l]) * (y2[i, j] - y2[k, l]) + (z2[i, j] - z2[k, l]) * (z2[i, j] - z2[k, l])) f0t = hl.Func("f0t") f0n = hl.Func("f0n") f0x = hl.Func("f0x") f0val = hl.Func("f0val") self.add_funcs_by_name([f0t, f0n, f0x, f0val]) f0t[i, j, k, l] = fac4d[i, j, k, l] * rpq2[i, j, k, l] f0n[i, j, k, l] = hl.clamp(hl.i32((f0t[i, j, k, l] + delo2) * rdelta), fm_in.dim(0).min(), fm_in.dim(0).max()) f0x[i, j, k, l] = delta * f0n[i, j, k, l] - f0t[i, j, k, l] f0val[i, j, k, l] = hl.select( f0t[i, j, k, l] >= hl.f64(28.0), hl.f64(0.88622692545276) / hl.sqrt(f0t[i, j, k, l]), fm[f0n[i, j, k, l], 0] + f0x[i, j, k, l] * (fm[f0n[i, j, k, l], 1] + f0x[i, j, k, l] * hl.f64(0.5) * (fm[f0n[i, j, k, l], 2] + f0x[i, j, k, l] * hl.f64(1. / 3.) * (fm[f0n[i, j, k, l], 3] + f0x[i, j, k, l] * hl.f64(0.25) * fm[f0n[i, j, k, l], 4])))) g = hl.Func("g") self.add_funcs_by_name([g]) if self.tracing and self.tracing_g: g_trace_in = hl.ImageParam(hl.Float(64), 4, "g_trace_in") g_trace = hl.BoundaryConditions.constant_exterior(g_trace_in, 0) self.inputs["g_trace_in"] = g_trace_in self.clamps["g_trace"] = g_trace g_trace.compute_root() g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l] ) * ex[i, j, k, l] * f0val[i, j, k, l] * rnorm[i] * rnorm[ j] * rnorm[k] * rnorm[l] + g_trace[i, j, k, l] else: g_trace = None g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l]) * ex[i, j, k, l] * f0val[ i, j, k, l] * rnorm[i] * rnorm[j] * rnorm[k] * rnorm[l]
def generate_halide(self, app, sizes): '''produce a Halide func implementing this loopnest''' if len(sizes) < len(self.indexes()): raise Exception("called without enough sizes") name = self.name g = app.funcs["g"] g_dens = app.clamps["g_dens"] self.simplify() logging.info("generating zone %s", name) # each symmetry zone has its own iteration space, implemented as an RDom with a where() clause. distinct_iters = self.indexes() logging.debug("distinct iters: %s", distinct_iters) piece_count = len(self.updates) iter_name_mapping = {k:k for k in app.vars} doing_something_useful = True while doing_something_useful: doing_something_useful = False for condition in self.conditions: if condition.op == "==": for lhs, rhs in zip(condition.lhs, condition.rhs): if iter_name_mapping[rhs] != iter_name_mapping[lhs]: doing_something_useful = True iter_name_mapping[rhs] = iter_name_mapping[lhs] logging.debug("iter_name_mapping: %s", iter_name_mapping) logging.debug("piece_count: %s", piece_count) rdom_iters = [(0, piece_count)] for size, index in zip(sizes, self.indexes()): rdom_iters.append((0, size)) logging.debug("rdom iters: %s", rdom_iters) r = hl.RDom(rdom_iters, name+"_dom") # set local variables for RVars expanded_iters = {} distinct_iters = [r[i] for i in range(len(r))] assigned_already = {} ru = distinct_iters.pop(0) for a, b in iter_name_mapping.items(): if b in assigned_already: expanded_iters[a] = assigned_already[b] else: iterator = distinct_iters.pop(0) expanded_iters[a] = iterator assigned_already[b] = iterator logging.debug("expanded_iters: %s", expanded_iters) for condition in self.conditions: if condition.op == "==": continue logging.debug("generating where condition %s", condition) expression = condition.generate(expanded_iters, N=sizes[0]) if expression is not None: r.where(expression) logging.debug("resulting where clause: %s", r) for update in self.updates: logging.debug(update) # generate this nested loop def maybe_mux(s): '''wrap multiple Exprs in mux()''' if len(set(s)) == 1: return s[0] else: return hl.mux(hl.Expr(ru), s) zone_func = hl.Func(name) zone_func_initial_params = list(app.vars.values())[0:len(self.updates[0].out.indexes)] zone_func.__setitem__(zone_func_initial_params, hl.f64(0.0)) left_hand_sides = [[] for x in self.updates[0].out.indexes] right_hand_sides = [] for update in self.updates: # LHS indexes for i in range(len(update.out.indexes)): index = update.out.indexes[i] left_hand_sides[i].append(expanded_iters[index]) # RHS rhs = None for element in update.elements: if element.name in app.clamps: func = app.clamps[element.name] elif element.name in app.funcs: func = app.funcs[element.name] else: logging.critical("func %s not found"%element.name) func_args = [expanded_iters[x] for x in element.indexes] value = func.__getitem__(func_args) if rhs is None: rhs = value else: rhs *= value if update.coeff != 1.0: rhs *= update.coeff right_hand_sides.append(rhs) left_hand_sides = [ maybe_mux(x) for x in left_hand_sides ] right_hand_sides = maybe_mux(right_hand_sides) lhs = zone_func.__getitem__(left_hand_sides) zone_func.__setitem__(left_hand_sides, lhs + right_hand_sides) logging.debug("%s[%s, %s] += %s", name, left_hand_sides, right_hand_sides) app.funcs[name] = zone_func app.loopnest_funcs[name] = { "func": zone_func, "loopnest": self, "iters": expanded_iters, "rdom": r, "unroll": ru } return zone_func