Пример #1
0
 def data_pack(sch: Schedule):
     b16 = sch.get_block(name="data_pack")
     l17, l18, l19, l20, l21, l22 = sch.get_loops(block=b16)
     sch.unroll(loop=l17)
     sch.unroll(loop=l18)
     v23, v24 = sch.sample_perfect_tile(
         n=2,
         loop=l19,
         max_innermost_factor=64,
         decision=[3, 3],
     )
     l25, l26 = sch.split(loop=l19, factors=[v23, v24])
     v27, v28 = sch.sample_perfect_tile(
         n=2,
         loop=l20,
         max_innermost_factor=64,
         decision=[64, 2],
     )
     l29, l30 = sch.split(loop=l20, factors=[v27, v28])
     sch.unroll(loop=l21)
     sch.unroll(loop=l22)
     sch.reorder(l25, l29, l26, l30, l17, l18, l21, l22)
Пример #2
0
 def inverse(sch: Schedule):
     b1 = sch.get_block(name="inverse")
     l2, l3, l4, l5, l6, l7 = sch.get_loops(block=b1)
     sch.unroll(loop=l2)
     sch.unroll(loop=l3)
     v8, v9 = sch.sample_perfect_tile(
         n=2,
         loop=l4,
         max_innermost_factor=64,
         decision=[3, 3],
     )
     l10, l11 = sch.split(loop=l4, factors=[v8, v9])
     v12, v13 = sch.sample_perfect_tile(
         n=2,
         loop=l5,
         max_innermost_factor=64,
         decision=[2, 64],
     )
     l14, l15 = sch.split(loop=l5, factors=[v12, v13])
     sch.unroll(loop=l6)
     sch.unroll(loop=l7)
     sch.reorder(l10, l14, l11, l15, l2, l3, l6, l7)
Пример #3
0
 def data_pack(sch: Schedule):
     b18 = sch.get_block(name="data_pack")
     l19, l20, l21, l22, l23, l24 = sch.get_loops(block=b18)
     sch.unroll(loop=l19)
     sch.unroll(loop=l20)
     v25, v26 = sch.sample_perfect_tile(
         n=2,
         loop=l21,
         max_innermost_factor=64,
         decision=[9, 1],
     )
     l27, l28 = sch.split(loop=l21, factors=[v25, v26])
     v29, v30 = sch.sample_perfect_tile(
         n=2,
         loop=l22,
         max_innermost_factor=64,
         decision=[32, 4],
     )
     l31, l32 = sch.split(loop=l22, factors=[v29, v30])
     sch.unroll(loop=l23)
     sch.unroll(loop=l24)
     sch.reorder(l27, l31, l28, l32, l19, l20, l23, l24)
Пример #4
0
 def inverse(sch: Schedule):
     b3 = sch.get_block(name="inverse")
     l4, l5, l6, l7, l8, l9 = sch.get_loops(block=b3)
     sch.unroll(loop=l4)
     sch.unroll(loop=l5)
     v10, v11 = sch.sample_perfect_tile(
         n=2,
         loop=l6,
         max_innermost_factor=64,
         decision=[1, 9],
     )
     l12, l13 = sch.split(loop=l6, factors=[v10, v11])
     v14, v15 = sch.sample_perfect_tile(
         n=2,
         loop=l7,
         max_innermost_factor=64,
         decision=[2, 64],
     )
     l16, l17 = sch.split(loop=l7, factors=[v14, v15])
     sch.unroll(loop=l8)
     sch.unroll(loop=l9)
     sch.reorder(l12, l16, l13, l17, l4, l5, l8, l9)
 def inverse(sch: Schedule):
     b1 = sch.get_block(name="inverse")
     l2, l3, l4, l5, l6, l7 = sch.get_loops(block=b1)
     sch.unroll(loop=l2)
     sch.unroll(loop=l3)
     v8, v9 = sch.sample_perfect_tile(
         n=2,
         loop=l4,
         max_innermost_factor=64,
         decision=[3, 3],
     )
     l10, l11 = sch.split(loop=l4, factors=[v8, v9])
     v12, v13 = sch.sample_perfect_tile(
         n=2,
         loop=l5,
         max_innermost_factor=64,
         decision=[2, 64],
     )
     l14, l15 = sch.split(loop=l5, factors=[v12, v13])
     sch.unroll(loop=l6)
     sch.unroll(loop=l7)
     sch.reorder(l10, l14, l11, l15, l2, l3, l6, l7)
     l59 = sch.fuse(l10, l14, l11, l15)
     v60 = sch.sample_categorical(
         candidates=[32, 64, 128, 256, 512, 1024],
         probs=[
             0.16666666666666666,
             0.16666666666666666,
             0.16666666666666666,
             0.16666666666666666,
             0.16666666666666666,
             0.16666666666666666,
         ],
         decision=2,
     )
     l61, l62 = sch.split(loop=l59, factors=[None, v60])
     sch.bind(loop=l61, thread_axis="blockIdx.x")
     sch.bind(loop=l62, thread_axis="threadIdx.x")