def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( """{ [i,j]: 0<=i,j<n }""", """ <> a = 1/(1+sinh(x[i] + y[j])**2) z[i] = sum(j, exp(a * x[j])) """, name="diff") knl = lp.fix_parameters(knl, n=50) from loopy.transform.diff import diff_kernel #FIXME Is this the correct interface. Does it make sense to take the entire #translation unit? dknl, diff_map = diff_kernel(knl["diff"], "z", "x") dknl = knl.with_kernel(dknl) dknl = lp.remove_unused_arguments(dknl) dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a") print(dknl) n = 50 x = np.random.randn(n) y = np.random.randn(n) dx = np.random.randn(n) fac = 1e-1 h1 = 1e-4 h2 = h1 * fac evt, (z0,) = knl(queue, x=x, y=y) evt, (z1,) = knl(queue, x=(x + h1*dx), y=y) evt, (z2,) = knl(queue, x=(x + h2*dx), y=y) dknl = lp.set_options(dknl, write_cl=True) evt, (df,) = dknl(queue, x=x, y=y) diff1 = (z1-z0) diff2 = (z2-z0) diff1_predicted = df.dot(h1*dx) diff2_predicted = df.dot(h2*dx) err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1) err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2) print(err1, err2) assert (err2 < err1 * fac * 1.1).all()
def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( """{ [i,j]: 0<=i,j<n }""", """ <> a = 1/(1+sinh(x[i] + y[j])**2) z[i] = sum(j, exp(a * x[j])) """) knl = lp.fix_parameters(knl, n=50) from loopy.transform.diff import diff_kernel dknl, diff_map = diff_kernel(knl, "z", "x") dknl = lp.remove_unused_arguments(dknl) dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a") print(dknl) n = 50 x = np.random.randn(n) y = np.random.randn(n) dx = np.random.randn(n) fac = 1e-1 h1 = 1e-4 h2 = h1 * fac evt, (z0,) = knl(queue, x=x, y=y) evt, (z1,) = knl(queue, x=(x + h1*dx), y=y) evt, (z2,) = knl(queue, x=(x + h2*dx), y=y) dknl = lp.set_options(dknl, write_cl=True) evt, (df,) = dknl(queue, x=x, y=y) diff1 = (z1-z0) diff2 = (z2-z0) diff1_predicted = df.dot(h1*dx) diff2_predicted = df.dot(h2*dx) err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1) err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2) print(err1, err2) assert (err2 < err1 * fac * 1.1).all()
def test_equality_constraints(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" n = 10 knl = lp.make_kernel([ "[n] -> {[i,j]: 0<=i,j<n }", "{[k]: k =i+5 and k < n}", ], [ "a[i,j] = 5 {id=set_all}", "b[i,k] = 22 {id=set_b, dep=set_all}", ], [ lp.GlobalArg("a,b", dtype, shape="n, n", order=order), lp.ValueArg("n", np.int32, approximately=1000), ], name="equality_constraints", assumptions="n>=1") seq_knl = knl knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.add_inames_to_insn(knl, "j_inner, j_outer", "id:set_b") #print(knl) #print(knl.domains[0].detect_equalities()) lp.auto_test_vs_ref(seq_knl, ctx, knl, parameters=dict(n=n), print_ref_code=True)
def variant_1(knl): knl = lp.add_prefetch(knl, "a", default_tag="l.auto") knl = lp.add_prefetch(knl, "b", default_tag="l.auto") knl = lp.prioritize_loops(knl, ["i", "j"]) knl = lp.add_inames_to_insn(knl, "i", "writes:b_fetch") return knl
def variant_1(knl): knl = lp.add_prefetch(knl, "a") knl = lp.add_prefetch(knl, "b") knl = lp.set_loop_priority(knl, ["i", "j"]) knl = lp.add_inames_to_insn(knl, "i", "writes:b_fetch") return knl