def _vrpsort16(instance: tik.Tik, dst, src, cnt, dst_offset=0, src_offset=0): """ _vrpsort16 """ repeat_255 = cnt // (16 * 255) repeat_remain = (cnt - repeat_255 * 16 * 255) // 16 if repeat_255 > 0: with instance.for_range(0, repeat_255) as i: instance.vrpsort16(dst[dst_offset + i * 255 * 16 * 8], src[src_offset + i * 255 * 16 * 8], 255) if repeat_remain > 0: instance.vrpsort16(dst[dst_offset + 255 * 16 * 8 * repeat_255], src[src_offset + 255 * 16 * 8 * repeat_255], repeat_remain)
def _merge_region(instance: tik.Tik, out_ub, dst, src, rows, cols): """ merge_region """ cols_padding = ((cols + 15) // 16) * 16 with instance.for_range(0, rows, name='merge_i0') as i: result_ub = _merge_recur(instance, out_ub, dst, src, cols, (cols + 15) // 16, 1, region_offset=i * cols_padding * 8) return result_ub
def _vconcat(instance: tik.Tik, dst, src, mode, cnt, dst_offset=0, src_offset=0): """ _vconcat """ repeat_255 = cnt // (16 * 255) repeat_remain = (cnt - repeat_255 * 16 * 255) // 16 with instance.if_scope(repeat_255 > 0): with instance.for_range(0, repeat_255) as i: instance.vconcat(dst[dst_offset + i * 255 * 16 * 8], src[src_offset + i * 255 * 16], 255, mode) if repeat_remain > 0: instance.vconcat(dst[dst_offset + 255 * 16 * 8 * repeat_255], src[src_offset + 255 * 16 * repeat_255], repeat_remain, mode)