Пример #1
0
def scalar_affine(
        op: ScalarAffine,
        constants_layout: MemoryLayout,
        variables_layout: MemoryLayout,
        metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]
    assert x.variable.shape == y.variable.shape

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()
    metabuffer_injector.register({
        "affine_transform_X_offset": x.offset,
        "affine_transform_Y_offset": y.offset,
        "affine_transform_N": y.variable.size,
        "affine_transform_scale": op.scale,
        "affine_transform_bias": op.bias
    })

    source = metabuffer_injector.inject(template)
    func_name = util.add_canonical_suffix("scalar_affine", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #2
0
def axiswise_scale_same_order(op: AxiswiseScale,
                              memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    s = memory_layout[op.inputs["s"]]
    y = memory_layout[op.outputs["y"]]

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = mul(x.variable.shape[:target_axis_index])
    D2 = x.variable.shape[target_axis_index]
    D3 = mul(x.variable.shape[target_axis_index + 1:])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "axiswise_scale_X": x,
        "axiswise_scale_S": s,
        "axiswise_scale_Y": y,
        "axiswise_scale_D1": D1,
        "axiswise_scale_D2": D2,
        "axiswise_scale_D3": D3
    })

    name_injector = KernelNameInjector(op)

    source = template_same_order
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #3
0
def max_handler(op: Max, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    axis = op.parameters["axis"]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "max_X": memory_layout[x],
        "max_Y": memory_layout[y],
        "max_y_stride": y.stride,
        "max_y_shape": y.shape,
        "max_x_stride": [x.stride_dict[a] for a in y.order.axes],
        "max_D": y.ndim,
        "max_N": x.shape_dict[axis],
        "max_MAX_GID": y.size,
        "max_x_target_axis_stride": x.stride_dict[axis]
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        buffer_injector.buffer,
        buffer_injector.unresolved_value_list
    )

    return [kernel]
Пример #4
0
def tile(op: Tile, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "tile_x": memory_layout[x],
        "tile_y": memory_layout[y],
        "tile_y_stride": y.stride,
        "tile_x_stride": [x.stride_dict[a] for a in y.order.axes],
        "tile_x_shape": [x.shape_dict[a] for a in y.order.axes],
        "tile_D": x.ndim,
        "tile_MAX_GID": y.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        buffer_injector.buffer,
        buffer_injector.unresolved_value_list
    )

    return [kernel]
Пример #5
0
def linear(op: Linear, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    w = op.inputs["w"]
    y = op.outputs["y"]

    assert x.order == OrderNC or x.order == OrderNHWC
    assert w.order == OrderCN or w.order == OrderHWCN
    assert y.order == OrderNC or y.order == OrderNHWC
    assert w.ndim == x.ndim

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "linear_X": memory_layout[x],
        "linear_Y": memory_layout[y],
        "linear_W": memory_layout[w],
        "linear_M": y.shape_dict[Axis.N],
        "linear_N": y.size // y.shape_dict[Axis.N],
        "linear_K": x.size // x.shape_dict[Axis.N],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        buffer_injector.buffer,
        buffer_injector.unresolved_value_list
    )

    return [kernel]
Пример #6
0
def embedding(op: Embedding, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    w = op.inputs["w"]
    y = op.outputs["y"]

    assert x.order == OrderNT
    assert w.order == OrderCN
    assert y.order == OrderNTC

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "embedding_X": memory_layout[x],
        "embedding_Y": memory_layout[y],
        "embedding_W": memory_layout[w],
        "embedding_vocabulary": w.shape_dict[Axis.C],
        "embedding_sequence_len": x.shape_dict[Axis.T],
        "embedding_batch_size": x.shape_dict[Axis.N],
        "embedding_dim": w.shape_dict[Axis.N]
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #7
0
def softmax(op: Softmax, memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    y = memory_layout[op.outputs["y"]]

    assert y.variable.order == x.variable.order
    assert y.variable.shape == x.variable.shape

    axis = op.parameters["axis"]
    assert axis == x.variable.order.axes[
        -1], "[Webassembly] Softmax supports only for aggregating last axis."

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "softmax_X":
        x,
        "softmax_Y":
        y,
        "softmax_N":
        y.variable.size // y.variable.shape_dict[axis],
        "softmax_C":
        y.variable.shape_dict[axis],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #8
0
def tanh(op: Tanh,
         constants_layout: MemoryLayout,
         variables_layout: MemoryLayout,
         metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == y.variable.order
    assert x.variable.shape == y.variable.shape

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()
    metabuffer_injector.register({
        "relu_X_offset": x.offset,
        "relu_Y_offset": y.offset,
        "relu_N": y.variable.size
    })

    source = metabuffer_injector.inject(template)
    func_name = util.add_canonical_suffix("tanh", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #9
0
def reshape(op: Reshape, memory_layout: MemoryLayout) -> List[Kernel]:
    # Operation without need for transposition is currently supported
    x = op.inputs["x"]
    y = op.outputs["y"]

    if memory_layout[x] == memory_layout[y]:
        # This is inplace operation
        return []

    assert x.order == op.parameters["in_order"]
    assert y.order == op.parameters["out_order"]
    assert y.size == mul(op.parameters["out_shape"])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "reshape_x": memory_layout[x],
        "reshape_y": memory_layout[y],
        "reshape_N": y.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #10
0
def reinterpret_axis(op: ReinterpretAxis,
                     memory_layout: MemoryLayout) -> List[Kernel]:
    # Operation without need for transposition is currently supported
    x = memory_layout[op.inputs["x"]]
    y = memory_layout[op.outputs["y"]]

    assert x.variable.order == op.parameters["in_order"]
    assert y.variable.order == op.parameters["out_order"]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "reinterpret_axis_x": x,
        "reinterpret_axis_y": y,
        "reinterpret_axis_N": y.variable.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #11
0
def space2depth(op: Space2Depth, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]
    r = op.parameters['r']

    assert x.order == OrderNHWC
    assert y.order == OrderNHWC

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "space2depth_x": memory_layout[x],
        "space2depth_y": memory_layout[y],
        'space2depth_r': r,
        "space2depth_N": x.shape_dict[Axis.N],
        "space2depth_C1": x.shape_dict[Axis.C],
        "space2depth_C2": y.shape_dict[Axis.C],
        "space2depth_H1": x.shape_dict[Axis.H],
        "space2depth_H2": y.shape_dict[Axis.H],
        "space2depth_W1": x.shape_dict[Axis.W],
        "space2depth_W2": y.shape_dict[Axis.W],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #12
0
def axiswise_scale_general(op: AxiswiseScale,
                           memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    s = memory_layout[op.inputs["s"]]
    y = memory_layout[op.outputs["y"]]

    x_shape = x.variable.shape

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = mul(x_shape[:target_axis_index])
    D2 = x_shape[target_axis_index]
    D3 = mul(x_shape[target_axis_index + 1:])

    y_strides = []
    stride = 1
    for sh in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= sh

    x_stride_in_y = [
        y_strides[y.variable.order.axes_dict[axis]]
        for axis in x.variable.order.axes
    ]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "axiswise_scale_X":
        x,
        "axiswise_scale_S":
        s,
        "axiswise_scale_Y":
        y,
        "axiswise_scale_D1":
        D1,
        "axiswise_scale_D2":
        D2,
        "axiswise_scale_D3":
        D3,
        "axiswise_scale_D":
        x.variable.ndim,
        "axiswise_scale_d_target":
        x.variable.order.axes_dict[op.axis],
        "axiswise_scale_x_shape":
        x_shape,
        "axiswise_scale_x_stride_in_y":
        x_stride_in_y,
    })

    name_injector = KernelNameInjector(op)

    source = template_general
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #13
0
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]
    axes = op.axes

    # Reduced axes must be located on inside of input variables.
    assert A.order.axes[-len(axes[0]):] == axes[0]
    assert B.order.axes[-len(axes[1]):] == axes[1]

    # output variable's axes order must be as [*a_remained_axes, *b_remained_axes]
    assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])]
    assert C.order.axes[-(B.ndim -
                          len(axes[1])):] == B.order.axes[:-len(axes[1])]
    assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1])

    K = mul(A.shape_dict[a] for a in axes[0])
    M = A.size // K
    N = B.size // K

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "sgemm_A": memory_layout[A],
        "sgemm_B": memory_layout[B],
        "sgemm_C": memory_layout[C],
        "sgemm_M": M,
        "sgemm_N": N,
        "sgemm_K": K
    })

    if op.has_attribute(UseEigenAttribute):
        source = generate_template_eigen(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C]
        })

    else:
        source = generate_template(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C],
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })

    name_injector = KernelNameInjector(op)

    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #14
0
def average_pooling_2d(op: AveragePooling2D,
                       memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    assert x.order == OrderNHWC
    assert y.order == OrderNHWC

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "average_pooling_2d_X":
        memory_layout[x],
        "average_pooling_2d_Y":
        memory_layout[y],
        "average_pooling_2d_N":
        x.shape_dict[Axis.N],
        "average_pooling_2d_H1":
        x.shape_dict[Axis.H],
        "average_pooling_2d_W1":
        x.shape_dict[Axis.W],
        "average_pooling_2d_C":
        x.shape_dict[Axis.C],
        "average_pooling_2d_H2":
        y.shape_dict[Axis.H],
        "average_pooling_2d_W2":
        y.shape_dict[Axis.W],
        "average_pooling_2d_KH":
        op.parameters["ksize"][0],
        "average_pooling_2d_KW":
        op.parameters["ksize"][1],
        "average_pooling_2d_SH":
        op.parameters["stride"][0],
        "average_pooling_2d_SW":
        op.parameters["stride"][1],
        "average_pooling_2d_PH":
        op.parameters["padding"][0],
        "average_pooling_2d_PW":
        op.parameters["padding"][1],
    })

    name_injector = KernelNameInjector(op)

    source = template
    for key, statement in statement_divide_without_padding[
            op.parameters["divide_without_padding"]].items():
        source = source.replace(key, statement)
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #15
0
def concat(op: Concat,
           constants_layout: MemoryLayout,
           variables_layout: MemoryLayout,
           metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    xs = [variables_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs))]
    y = variables_layout[op.outputs["y"]]
    target_axis = op.axis

    x_offsets = [x.offset for x in xs]
    x_shapes = [x.variable.shape for x in xs]

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    # x_strides[i][j] is stride size of xs[i].order.axes[j] in y
    x_strides_in_y = [[] for _ in xs]
    for x, strides in zip(xs, x_strides_in_y):
        for axis in x.variable.order.axes:
            strides.append(y_strides[y.variable.order.axes_dict[axis]])

    # x_offsets[i] is memory offset of xs[i]'s data in y.
    y_offsets = []
    target_axis_offset = 0
    for x in xs:
        y_offsets.append(target_axis_offset * y_strides[y.variable.order.axes_dict[target_axis]])
        target_axis_offset += x.variable.shape_dict[target_axis]

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    metabuffer_injector.register({
        "concat_y_offset": y.offset,
        "concat_D": len(y.variable.shape),
        "concat_N": len(xs),
        "concat_x_offsets": np.array(x_offsets, dtype=np.int32).tobytes(),
        "concat_x_strides_in_y": np.array(x_strides_in_y, dtype=np.int32).tobytes(),
        "concat_x_shapes": np.array(x_shapes, dtype=np.int32).tobytes(),
        "concat_y_offsets": np.array(y_offsets, dtype=np.int32).tobytes(),
    })

    source = metabuffer_injector.inject(template)
    func_name = util.add_canonical_suffix("concat", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel(
        {func_name: source},
        func_name,
        metabuffer_injector.generate_buffer()
    )

    return [kernel]
Пример #16
0
def concat(op: Concat, memory_layout: MemoryLayout) -> List[Kernel]:
    xs = [
        memory_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs))
    ]
    y = memory_layout[op.outputs["y"]]
    target_axis = op.axis

    x_offsets = [x.offset for x in xs]
    x_shapes = [x.variable.shape for x in xs]

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    # x_strides[i][j] is stride size of xs[i].order.axes[j] in y
    x_strides_in_y = [[] for _ in xs]
    for x, strides in zip(xs, x_strides_in_y):
        for axis in x.variable.order.axes:
            strides.append(y_strides[y.variable.order.axes_dict[axis]])

    # x_offsets[i] is memory offset of xs[i]'s data in y.
    y_offsets = []
    target_axis_offset = 0
    for x in xs:
        y_offsets.append(target_axis_offset *
                         y_strides[y.variable.order.axes_dict[target_axis]])
        target_axis_offset += x.variable.shape_dict[target_axis]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "concat_y": y,
        "concat_D": len(y.variable.shape),
        "concat_N": len(xs),
        "concat_xs": xs,
        "concat_x_strides_in_y": x_strides_in_y,
        "concat_x_shapes": x_shapes,
        "concat_y_offsets": y_offsets
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #17
0
def elementwise_kernel_base(op: Elementwise, command_buffer: CommandBuffer,
                            buffer_injector: BufferInjector):
    name_injector = KernelNameInjector(op)

    source = encode_command(command_buffer)
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #18
0
def average_pooling_2d(op: AveragePooling2D,
                       memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    y = memory_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "average_pooling_2d_X":
        x,
        "average_pooling_2d_Y":
        y,
        "average_pooling_2d_N":
        x.variable.shape_dict[Axis.N],
        "average_pooling_2d_H1":
        x.variable.shape_dict[Axis.H],
        "average_pooling_2d_W1":
        x.variable.shape_dict[Axis.W],
        "average_pooling_2d_C":
        x.variable.shape_dict[Axis.C],
        "average_pooling_2d_H2":
        y.variable.shape_dict[Axis.H],
        "average_pooling_2d_W2":
        y.variable.shape_dict[Axis.W],
        "average_pooling_2d_KH":
        op.parameters["ksize"][0],
        "average_pooling_2d_KW":
        op.parameters["ksize"][1],
        "average_pooling_2d_SH":
        op.parameters["stride"][0],
        "average_pooling_2d_SW":
        op.parameters["stride"][1],
        "average_pooling_2d_PH":
        op.parameters["padding"][0],
        "average_pooling_2d_PW":
        op.parameters["padding"][1],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #19
0
def sgemm(op: Sgemm,
          constants_layout: MemoryLayout,
          variables_layout: MemoryLayout,
          metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    A = variables_layout[op.inputs["A"]] if op.inputs[
        "A"] in variables_layout else constants_layout[op.inputs["A"]]
    B = variables_layout[op.inputs["B"]] if op.inputs[
        "B"] in variables_layout else constants_layout[op.inputs["B"]]
    C = variables_layout[op.outputs["C"]]

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    metabuffer_injector.register({
        "sgemm_A_offset": A.offset,
        "sgemm_B_offset": B.offset,
        "sgemm_C_offset": C.offset,
        "sgemm_M": op.M,
        "sgemm_N": op.N,
        "sgemm_K": op.K
    })

    if op.parameters["eigen"]:
        source = generate_template_eigen(op.transpose_A, op.transpose_B, op.M,
                                         op.N, op.K)

        metabuffer_injector.register({
            "sgemm_A_offset": A.offset,
            "sgemm_B_offset": B.offset,
            "sgemm_C_offset": C.offset
        })
    else:
        source = generate_template(op.transpose_A, op.transpose_B)

        metabuffer_injector.register({
            "sgemm_A_offset": A.offset,
            "sgemm_B_offset": B.offset,
            "sgemm_C_offset": C.offset,
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })
    source = metabuffer_injector.inject(source)
    func_name = util.add_canonical_suffix("sgemm", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #20
0
def elementwise_kernel(op: Elementwise,
                       memory_layout: MemoryLayout) -> List[Kernel]:
    xs = [
        memory_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs))
    ]
    y = memory_layout[op.outputs["y"]]
    item = _registered_items[op.__class__]

    parameters = {key: fn(op) for key, fn in item.parameters.items()}

    x_shapes = [x.variable.shape for x in xs]

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    # x_strides[i][j] is stride size of xs[i].order.axes[j] in y
    x_strides_in_y = [[] for _ in xs]
    for x, strides in zip(xs, x_strides_in_y):
        for axis in x.variable.order.axes:
            strides.append(y_strides[y.variable.order.axes_dict[axis]])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "elementwise_Y": y,
        "elementwise_D": len(y.variable.shape),
        "elementwise_N": xs[0].variable.size,
        "elementwise_Xs": xs,
        "elementwise_X_strides_in_Y": x_strides_in_y,
        "elementwise_X_shapes": x_shapes
    })
    buffer_injector.register({
        f"elementwise_parameters_{key}": val
        for key, val in parameters.items()
    })

    name_injector = KernelNameInjector(op)

    source = _generate_source(xs, y, item.code, parameters)
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #21
0
def slice_handler(op: Slice, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    remained_axes_in_y_order = [a for a in y.order.axes if a in x.order.axes]
    removed_axes = [a for a in x.order.axes if a not in y.order.axes]

    x_index_offset = 0
    x_strides = []

    for axis in remained_axes_in_y_order:
        assert isinstance(op.indices[axis], slice)
        index = normalize_slice(op.indices[axis], x.shape_dict[axis])
        x_index_offset += x.stride_dict[axis] * index.start
        x_strides.append(x.stride_dict[axis] * index.step)

    for axis in removed_axes:
        assert isinstance(op.indices[axis], int)
        x_index_offset += x.stride_dict[axis] * op.indices[axis]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "slice_ndim": len(remained_axes_in_y_order),

        "slice_X": memory_layout[x],
        "slice_x_stride_in_y_order": x_strides,
        "slice_x_index_offset": x_index_offset,

        "slice_Y": memory_layout[y],
        "slice_y_size": y.size,
        "slice_y_shape": [y.shape_dict[a] for a in remained_axes_in_y_order],
        "slice_y_stride": [y.stride_dict[a] for a in remained_axes_in_y_order]
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        buffer_injector.buffer,
        buffer_injector.unresolved_value_list
    )

    return [kernel]
Пример #22
0
def sgemm(op: Sgemm, memory_layout: MemoryLayout) -> List[Kernel]:
    A = memory_layout[op.inputs["A"]]
    B = memory_layout[op.inputs["B"]]
    C = memory_layout[op.outputs["C"]]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "sgemm_A": A,
        "sgemm_B": B,
        "sgemm_C": C,
        "sgemm_M": op.M,
        "sgemm_N": op.N,
        "sgemm_K": op.K
    })

    if op.parameters["eigen"]:
        source = generate_template_eigen(op.transpose_A, op.transpose_B, op.M, op.N, op.K)
        buffer_injector.register({
            "sgemm_A": A,
            "sgemm_B": B,
            "sgemm_C": C
        })

    else:
        source = generate_template(op.transpose_A, op.transpose_B)
        buffer_injector.register({
            "sgemm_A": A,
            "sgemm_B": B,
            "sgemm_C": C,
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })

    name_injector = KernelNameInjector(op)

    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        buffer_injector.buffer,
        buffer_injector.unresolved_value_list
    )

    return [kernel]
Пример #23
0
def average_pooling_2d(
        op: AveragePooling2D,
        constants_layout: MemoryLayout,
        variables_layout: MemoryLayout,
        metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    metabuffer_injector.register({
        "average_pooling_2d_X_offset":
        x.offset,
        "average_pooling_2d_Y_offset":
        y.offset,
        "average_pooling_2d_N":
        x.variable.shape_dict[Axis.N],
        "average_pooling_2d_H1":
        x.variable.shape_dict[Axis.H],
        "average_pooling_2d_W1":
        x.variable.shape_dict[Axis.W],
        "average_pooling_2d_C":
        x.variable.shape_dict[Axis.C],
        "average_pooling_2d_H2":
        y.variable.shape_dict[Axis.H],
        "average_pooling_2d_W2":
        y.variable.shape_dict[Axis.W],
        "average_pooling_2d_K":
        op.parameters["ksize"][0],
        "average_pooling_2d_S":
        op.parameters["stride"][0],
        "average_pooling_2d_P":
        op.parameters["padding"][0],
    })

    source = metabuffer_injector.inject(template)
    func_name = util.add_canonical_suffix("average_pooling_2d", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #24
0
def split_axis(op: SplitAxis, memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    ys = [
        memory_layout[op.outputs[f"y{str(i)}"]] for i in range(len(op.outputs))
    ]
    target_axis = op.parameters["axis"]

    y_shapes = [y.variable.shape for y in ys]

    # y_strides[i][j] is stride size of ys[i].order.axes[j] in x
    y_strides_in_x = [[] for _ in ys]
    for y, strides in zip(ys, y_strides_in_x):
        for axis in y.variable.order.axes:
            strides.append(x.variable.stride[x.variable.order.axes_dict[axis]])

    # x_offsets[i] is memory offset of ys[i]'s data in x.
    x_offsets = []
    target_axis_offset = 0
    for y in ys:
        x_offsets.append(
            target_axis_offset *
            x.variable.stride[x.variable.order.axes_dict[target_axis]])
        target_axis_offset += y.variable.shape_dict[target_axis]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "split_axis_x": x,
        "split_axis_D": len(x.variable.shape),
        "split_axis_N": len(ys),
        "split_axis_ys": ys,
        "split_axis_y_strides_in_x": y_strides_in_x,
        "split_axis_y_shapes": y_shapes,
        "split_axis_x_offsets": x_offsets
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #25
0
def local_response_normalization(
        op: LocalResponseNormalization,
        constants_layout: MemoryLayout,
        variables_layout: MemoryLayout,
        metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    metabuffer_injector.register({
        "local_response_normalization_X_offset":
        x.offset,
        "local_response_normalization_Y_offset":
        y.offset,
        "local_response_normalization_N":
        x.variable.shape_dict[Axis.N],
        "local_response_normalization_H":
        x.variable.shape_dict[Axis.H],
        "local_response_normalization_W":
        x.variable.shape_dict[Axis.W],
        "local_response_normalization_C":
        x.variable.shape_dict[Axis.C],
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    source = metabuffer_injector.inject(template)
    func_name = util.add_canonical_suffix("local_response_normalization",
                                          source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #26
0
def axiswise_bias(
        op: AxiswiseBias,
        constants_layout: MemoryLayout,
        variables_layout: MemoryLayout,
        metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    b = constants_layout[op.inputs["b"]]
    y = variables_layout[op.outputs["y"]]

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    assert x.variable.order == OrderNC or x.variable.order == OrderNHWC or x.variable.order == OrderHWNC
    assert y.variable.shape == x.variable.shape

    assert op.parameters[
        "axis"] == Axis.C, "[Webassembly] AxiswiseBias supports only channelwise bias."

    metabuffer_injector.register({
        "axiswise_bias_X_offset":
        x.offset,
        "axiswise_bias_Y_offset":
        y.offset,
        "axiswise_bias_B_offset":
        b.offset,
        "axiswise_bias_N":
        y.variable.size // y.variable.shape_dict[Axis.C],
        "axiswise_bias_C":
        y.variable.shape_dict[Axis.C],
    })

    inline_injector = InlineInjector()
    if "inline_elementwise" in op.parameters:
        inline_injector.delegate = op.parameters["inline_elementwise"]

    source = template
    source = metabuffer_injector.inject(source)
    source = inline_injector.inject(source)
    func_name = util.add_canonical_suffix("axiswise_bias", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel({func_name: source}, func_name,
                    metabuffer_injector.generate_buffer())

    return [kernel]
Пример #27
0
def sgemm(op: Sgemm, memory_layout: MemoryLayout) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "sgemm_A": memory_layout[A],
        "sgemm_B": memory_layout[B],
        "sgemm_C": memory_layout[C],
        "sgemm_M": op.M,
        "sgemm_N": op.N,
        "sgemm_K": op.K
    })

    if op.has_attribute(SgemmWithEigen):
        source = generate_template_eigen(op.transpose_A, op.transpose_B)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C]
        })

    else:
        source = generate_template(op.transpose_A, op.transpose_B)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C],
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })

    name_injector = KernelNameInjector(op)

    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #28
0
def im2col(op: Im2Col, memory_layout: MemoryLayout) -> List[Kernel]:
    im = op.inputs["im"]
    col = op.outputs["col"]

    assert im.order == OrderNHWC
    col_acceptable_order = [
        Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]),
        Order([Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W])
    ]
    assert col.order in col_acceptable_order

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "im2col_im": memory_layout[im],
        "im2col_col": memory_layout[col],
        "im2col_N": col.shape_dict[Axis.N],
        "im2col_C1": im.shape_dict[Axis.C],
        "im2col_H1": im.shape_dict[Axis.H],
        "im2col_W1": im.shape_dict[Axis.W],
        "im2col_H2": col.shape_dict[Axis.H],
        "im2col_W2": col.shape_dict[Axis.W],
        "im2col_KH": op.KH,
        "im2col_KW": op.KW,
        "im2col_DH": op.DH,
        "im2col_DW": op.DW,
        "im2col_SH": op.SH,
        "im2col_SW": op.SW,
        "im2col_PH": op.PH,
        "im2col_PW": op.PW,
    })

    name_injector = KernelNameInjector(op)

    source = template_KKCNHW if col.order == Order([
        Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W
    ]) else template_NHWKKC
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
def local_response_normalization(op: LocalResponseNormalization,
                                 memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    y = memory_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "local_response_normalization_X":
        x,
        "local_response_normalization_Y":
        y,
        "local_response_normalization_N":
        x.variable.shape_dict[Axis.N],
        "local_response_normalization_H":
        x.variable.shape_dict[Axis.H],
        "local_response_normalization_W":
        x.variable.shape_dict[Axis.W],
        "local_response_normalization_C":
        x.variable.shape_dict[Axis.C],
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Пример #30
0
def im2col(op: Im2Col,
           constants_layout: MemoryLayout,
           variables_layout: MemoryLayout,
           metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]:
    im = variables_layout[op.inputs["im"]]
    col = variables_layout[op.outputs["col"]]

    assert im.variable.order == OrderNHWC
    assert col.variable.order == OrderNHWC or col.variable.order == OrderCNHW

    if metabuffer_injector is None:
        metabuffer_injector = MetaBufferInjector()

    metabuffer_injector.register({
        "im2col_im_offset": im.offset,
        "im2col_col_offset": col.offset,
        "im2col_N": col.variable.shape_dict[Axis.N],
        "im2col_C1": im.variable.shape_dict[Axis.C],
        "im2col_H1": im.variable.shape_dict[Axis.H],
        "im2col_W1": im.variable.shape_dict[Axis.W],
        "im2col_H2": col.variable.shape_dict[Axis.H],
        "im2col_W2": col.variable.shape_dict[Axis.W],
        "im2col_KH": op.KH,
        "im2col_KW": op.KW,
        "im2col_SH": op.SH,
        "im2col_SW": op.SW,
        "im2col_PH": op.PH,
        "im2col_PW": op.PW,
    })

    source = template_CNHW if col.variable.order == OrderCNHW else template_NHWC
    source = metabuffer_injector.inject(source)
    func_name = util.add_canonical_suffix("im2col", source)
    source = source.replace("%%FUNC_NAME%%", func_name)

    kernel = Kernel(
        {func_name: source},
        func_name,
        metabuffer_injector.generate_buffer()
    )

    return [kernel]