def test_conv2d_winograd_cuda(): mod = conv2d_winograd_cuda mod = IRModule({"main": mod}) context = TuneContext( mod=mod, target=Target("nvidia/geforce-rtx-3090", host="llvm"), task_name="Custom Search Space Task", sch_rules=DefaultCUDA._sch_rules(), # pylint: disable=protected-access ) for sch_rule in context.sch_rules: sch_rule.initialize_with_tune_context(context) post_order_apply = PostOrderApply() post_order_apply.initialize_with_tune_context(context) (sch,) = post_order_apply.generate_design_space(mod) decisions = dict( zip( [i for i in sch.trace.insts if i.kind.name.startswith("Sample")], [ # data_pack [3, 3], [64, 2], 2, # inverse [3, 3], [2, 64], 2, # bgemm [1, 1, 1, 1, 6], [1, 1, 1, 3, 2], [3, 1, 1, 1, 3], [4, 2, 1, 4, 4], [32, 1, 4], 1, 1, # root anno 2, # conv2d 2, ], ) ) trace = Trace(sch.trace.insts, decisions=decisions) sch = Schedule(mod=mod) trace.apply_to_schedule(sch, remove_postproc=False) answer = sch.mod expected = _get_mod() tvm.ir.assert_structural_equal(answer, expected)
def test_conv2d_winograd_cpu(): mod = conv2d_winograd_cpu mod = IRModule({"main": mod}) context = TuneContext( mod=mod, target=Target("llvm"), task_name="Custom Search Space Task", sch_rules=DefaultLLVM._sch_rules(), # pylint: disable=protected-access ) post_order_apply = PostOrderApply() post_order_apply.initialize_with_tune_context(context) (sch, ) = post_order_apply.generate_design_space(mod) decisions = dict( zip( [ i for i in sch.trace.insts[:-4] if i.kind.name.startswith("Sample") ], [ # data_pack [9, 1], [32, 4], # input_tile 4, # data_pad -2, # inverse [1, 9], [2, 64], # bgemm [1, 2, 3, 1], [1, 1, 1, 6], [1, 1, 1, 9], [2, 1, 16, 4], [16, 8], ], )) trace = Trace(sch.trace.insts[:-4], decisions=decisions) sch = Schedule(mod=mod) trace.apply_to_schedule(sch, remove_postproc=False) answer = sch.mod expected = _get_mod() tvm.ir.assert_structural_equal(answer, expected)