def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.filter_nodes(traverse.listup_operators(graph),
                                        Linear):
            x = op.inputs["x"]
            w = op.inputs["w"]
            y = op.outputs["y"]

            flag_changed = True
            op.remove_all()
            a_filter = Axis()

            if x.ndim == 2:
                w, = ReinterpretAxis(None,
                                     in_order=OrderNC,
                                     out_order=Order([Axis.C, a_filter]))(w)
                new_y, = Tensordot(None, axes=[Axis.C, a_filter])(x, w)

            elif x.ndim == 4:
                w, = ReinterpretAxis(
                    None,
                    in_order=OrderNHWC,
                    out_order=Order([Axis.C, Axis.H, Axis.W, a_filter]))(w)
                new_y, = Tensordot(None,
                                   axes=[[Axis.H, Axis.W, Axis.C],
                                         [Axis.H, Axis.W, a_filter]])(x, w)

            else:
                raise NotImplementedError

            OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y)

        return graph, flag_changed
예제 #2
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.filter_nodes(
                traverse.listup_operators(graph),
                Deconvolution2D):  # type: Deconvolution2D
            x = op.inputs["x"]
            w = op.inputs["w"]
            y = op.outputs["y"]
            flag_changed = True
            op.remove_all()

            a_filter, a_kh, a_kw = Axis(), Axis(), Axis()
            w, = ReinterpretAxis(None,
                                 in_order=OrderNHWC,
                                 out_order=Order(
                                     [Axis.C, a_kh, a_kw, a_filter]))(w)
            x, = ReinterpretAxis(None,
                                 in_order=OrderNHWC,
                                 out_order=Order(
                                     [Axis.N, Axis.H, Axis.W, a_filter]))(x)

            col, = Tensordot(None, axes=a_filter)(x, w)
            col = col.transpose(
                Order([Axis.N, Axis.H, Axis.W, a_kh, a_kw, Axis.C]))
            col = col.reshape(shape=[*col.shape[0:3],
                                     mul(col.shape[3:6])],
                              order=OrderNHWC)

            new_y, = Col2Im(None,
                            ksize=op.ksize,
                            stride=op.stride,
                            padding=op.padding)(col)
            OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y)

        return graph, flag_changed
예제 #3
0
def _convert_gemm(converter: ONNXConverter, onnx_op: INodeProto):
    A = converter.get_variable(onnx_op.input[0])
    B = converter.get_variable(onnx_op.input[1])
    C = converter.get_variable(onnx_op.input[2])

    attrs = attribute_dict(onnx_op)
    alpha = attrs["alpha"].f
    beta = attrs["beta"].f
    broadcast = attrs.get("broadcast", 0)

    y, = Tensordot(
        None,
        axes=(A.order.axes[0 if (
            attrs.get("transA", False) and attrs["transA"].i) else 1],
              B.order.axes[1 if (
                  attrs.get("transB", False) and attrs["transB"].i) else 0]))(
                      A, B)

    if broadcast:
        check_broadcast_constraints(y, C)
    else:
        y.order.unify(C.order)

    y = alpha * y + beta * C

    converter.set_variable(onnx_op.output[0], y)
예제 #4
0
def matmul_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"):
    a = converter.get_variable(tf_op.inputs[0])
    b = converter.get_variable(tf_op.inputs[1])
    transposed_a = tf_op.get_attr("transpose_a")
    transposed_b = tf_op.get_attr("transpose_b")

    if a.ndim > 2 or b.ndim > 2:
        raise NotImplementedError(
            "[TensorFlowConverter] Currently, MatMul is supported only 2D * 2D case."
        )

    reduced_axes = []
    if transposed_a:
        reduced_axes.append(a.order.axes[0])

    else:
        reduced_axes.append(a.order.axes[1])

    if transposed_b:
        reduced_axes.append(b.order.axes[1])

    else:
        reduced_axes.append(b.order.axes[0])

    c, = Tensordot(None, axes=reduced_axes)(a, b)
    converter.set_variable(tf_op.outputs[0], c)
예제 #5
0
def _convert_mat_mul_var_var(
        converter: ChainerConverter,
        c_op: "chainer.functions.math.basic_math.MatMulVarVar"):
    x1 = converter.get_variable(c_op.inputs[0])
    x2 = converter.get_variable(c_op.inputs[1])
    y, = Tensordot(None, axes=[x1.order.axes[1], x2.order.axes[0]])(x1, x2)
    converter.set_variable(c_op.outputs[0](), y)
예제 #6
0
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]
    axes = op.axes

    # Reduced axes must be located on inside of input variables.
    assert A.order.axes[-len(axes[0]):] == axes[0]
    assert B.order.axes[-len(axes[1]):] == axes[1]

    # output variable's axes order must be as [*a_remained_axes, *b_remained_axes]
    assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])]
    assert C.order.axes[-(B.ndim -
                          len(axes[1])):] == B.order.axes[:-len(axes[1])]
    assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1])

    K = mul(A.shape_dict[a] for a in axes[0])
    M = A.size // K
    N = B.size // K

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "sgemm_A": memory_layout[A],
        "sgemm_B": memory_layout[B],
        "sgemm_C": memory_layout[C],
        "sgemm_M": M,
        "sgemm_N": N,
        "sgemm_K": K
    })

    if op.has_attribute(UseEigenAttribute):
        source = generate_template_eigen(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C]
        })

    else:
        source = generate_template(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C],
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })

    name_injector = KernelNameInjector(op)

    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
예제 #7
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.filter_nodes(traverse.listup_operators(graph),
                                        Convolution2D):  # type: Convolution2D
            x = op.inputs["x"]
            w = op.inputs["w"]
            y = op.outputs["y"]
            flag_changed = True
            op.remove_all()

            a_filter, a_kh, a_kw = Axis(), Axis(), Axis()
            w, = ReinterpretAxis(None,
                                 in_order=OrderNHWC,
                                 out_order=Order(
                                     [Axis.C, a_kh, a_kw, a_filter]))(w)

            if op.WH == 1 and op.WW == 1 and op.stride == (
                    1, 1) and op.padding == (0, 0):
                # Projection
                col, = ReinterpretAxis(
                    None,
                    in_order=OrderNHWC,
                    out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(x)

                new_y, = Tensordot(None,
                                   [[a_filter], [a_kh, a_kw, a_filter]])(col,
                                                                         w)

            elif op.WH == x.shape_dict[Axis.H] and op.WW == x.shape_dict[
                    Axis.W] and op.padding == (0, 0):
                # Global convolution
                col, = ReinterpretAxis(None,
                                       in_order=OrderNHWC,
                                       out_order=Order(
                                           [Axis.N, a_kh, a_kw, a_filter]))(x)

                new_y, = Tensordot(
                    None, [[[a_kh, a_kw, a_filter], [a_kh, a_kw, a_filter]],
                           [a_kh, a_kw, a_filter]])(col, w)

            else:
                # General convolution
                col, = Im2Col(None,
                              ksize=op.ksize,
                              stride=op.stride,
                              padding=op.padding,
                              dilation_rate=op.dilation_rate)(x)
                col, = ReinterpretAxis(
                    None,
                    in_order=OrderNHWC,
                    out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(col)

                new_y, = Tensordot(None,
                                   [[a_filter], [a_kh, a_kw, a_filter]])(col,
                                                                         w)

            new_y = new_y.transpose(y.order)
            OptimizeRule.replace_variable(graph, new_y, y)

        return graph, flag_changed
예제 #8
0
파일: core.py 프로젝트: zhangaz1/webdnn
def _convert_dense(converter: KerasConverter, k_op: "keras.layers.Dense"):
    x = converter.get_variable(converter.get_input_tensor(k_op)[0])
    w = converter.convert_to_constant_variable(k_op.kernel, Order([None, None]))
    y, = Tensordot(None, axes=[x.order.axes[-1], w.order.axes[0]])(x, w)

    if k_op.use_bias:
        b = converter.convert_to_constant_variable(k_op.bias, Order([None]))
        b.order.axes[0].unify(w.order.axes[1])
        y = y + b

    y = do_activation(k_op.activation, y)
    converter.set_variable(converter.get_output_tensor(k_op)[0], y)
예제 #9
0
파일: math.py 프로젝트: zhangaz1/webdnn
def _convert_mat_mul(converter: ChainerConverter,
                     c_op: "chainer.functions.MatMul"):
    x0 = converter.get_variable(c_op.inputs[0])
    x1 = converter.get_variable(c_op.inputs[1])
    if x0.order.axes[1 if c_op.transa else 0] == x1.order.axes[0 if c_op.
                                                               transb else 1]:
        x1 = x1.reinterpret_axes(Order([None, None]))

    y, = Tensordot(None,
                   axes=[
                       x0.order.axes[0 if c_op.transa else 1],
                       x1.order.axes[1 if c_op.transb else 0]
                   ])(x0, x1)
    converter.set_variable(c_op.outputs[0](), y)
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.filter_nodes(
                traverse.listup_operators(graph),
                Deconvolution2D):  # type: Deconvolution2D
            x = op.inputs["x"]
            w = op.inputs["w"]
            y = op.outputs["y"]
            flag_changed = True
            op.remove_all()

            a_filter = Axis()
            w, = ReinterpretAxis(
                None,
                in_order=Order([Axis.N, Axis.KH, Axis.KW, Axis.C]),
                out_order=Order([Axis.C, Axis.KH, Axis.KW, a_filter]))(w)

            if op.KH == 1 and op.KW == 1 and op.stride == (
                    1, 1) and op.padding == (0, 0):
                # Projection
                w = w.transpose(Order([Axis.C, Axis.KH, Axis.KW, a_filter]))
                w = w.reshape([w.shape_dict[Axis.C], w.shape_dict[a_filter]],
                              Order([Axis.C, a_filter]))
                new_y, = Tensordot(None, [Axis.C, a_filter])(x, w)

            else:
                # General deconvolution
                w = w.transpose(Order([a_filter, Axis.KH, Axis.KW, Axis.C]))
                col, = Tensordot(None, axes=[Axis.C, a_filter])(x, w)
                new_y, = Col2Im(None,
                                ksize=op.ksize,
                                stride=op.stride,
                                padding=op.padding)(col)

            OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y)

        return graph, flag_changed
예제 #11
0
def _convert_linear_function(
        converter: ChainerConverter,
        c_op: "chainer.functions.connection.linear.LinearFunction"):
    x = converter.get_variable(c_op.inputs[0])
    w = converter.get_variable(c_op.inputs[1])  # type: ConstantVariable

    y, = Tensordot(None, axes=[x.order.axes[1:], w.order.axes[1]])(x, w)

    if len(c_op.inputs) == 3:
        # with bias
        b = converter.get_variable(c_op.inputs[2])
        check_broadcast_constraints(y, b)
        y = y + b

    converter.set_variable(c_op.outputs[0](), y)
예제 #12
0
def template(a_shape=(2, 3, 4, 5), b_shape=(3, 4, 5, 6), axes=((1, 2, 3), (0, 1, 2)), backend=None, description: str = ""):
    va = np.random.rand(*a_shape).astype(np.float32)
    vb = np.random.rand(*b_shape).astype(np.float32)
    vc = np.tensordot(va, vb, axes)

    a = Variable(a_shape, Order([None] * len(a_shape)))
    b = Variable(b_shape, Order([None] * len(b_shape)))
    c, = Tensordot(None, axes=[[v.order.axes[aaa] for aaa in aa] for v, aa in zip([a, b], axes)])(a, b)

    generate_kernel_test_case(
        description=f"Tensordot {description}",
        backend=backend,
        graph=Graph([a, b], [c]),
        inputs={a: va, b: vb},
        expected={c: vc}
    )
예제 #13
0
def _convert_separable_conv2d(converter: KerasConverter,
                              k_op: "keras.layers.SeparableConv2D"):
    x = converter.get_variable(converter.get_input_tensor(k_op)[0])
    check_data_format(x, k_op.data_format)
    axis_c_in = Axis.C
    axis_c_out = Axis()
    axis_depth_multiplier = Axis()

    w_depthwise = converter.convert_to_constant_variable(
        k_op.depthwise_kernel,
        Order([Axis.KH, Axis.KW, axis_c_in, axis_depth_multiplier]))

    w_pointwise = converter.convert_to_constant_variable(
        k_op.pointwise_kernel, Order([Axis.KH, Axis.KW, axis_c_in,
                                      axis_c_out]))
    w_pointwise = w_pointwise.reshape(
        shape=[
            x.shape_dict[axis_c_in], k_op.depth_multiplier,
            w_pointwise.shape_dict[axis_c_out]
        ],
        order=Order([axis_c_in, axis_depth_multiplier, axis_c_out]))

    ksize = tuple(k_op.kernel_size)
    stride = tuple(k_op.strides)
    dilation_rate = tuple(k_op.dilation_rate)
    padding = (parse_padding(k_op.padding, ksize[0], dilation_rate[0]),
               parse_padding(k_op.padding, ksize[1], dilation_rate[1]))
    if any(p[0] != p[1] for p in padding):
        raise NotImplementedError(
            "[KerasConverter] \"Different size padding\" is not supported yet")
    padding = tuple(p[0] for p in padding)

    h, = Im2Col(None,
                ksize=ksize,
                stride=stride,
                padding=padding,
                dilation_rate=dilation_rate)(x)

    # TODO: Support depth-wise convolution natively
    # Currently, depth-wise convolution is not supported natively, and emulated by composition of small convolution operations.
    ys = []
    for i in range(h.shape_dict[axis_c_in]):
        # 1. Depthwise convolution
        #
        # Ideal                             | Current implementation
        # ----------------------------------+----------------------------------------------------
        # h.axes=[N, H, W, KH, KW, C_in]    | g_sub.axes=[N, H, W, KH, KW]
        # w.axes=[KH, KW, C_in, DM]         | w_sub.axes=[KH, KW, DM]
        # g.axes=[N, H, W, C_in, DM]        | g_sub.axes=[N, H, W, DM]

        h_sub, = Slice(
            None,
            indices=AxisKeyDict(
                h.order.axes,
                [i if a == axis_c_in else slice(None)
                 for a in h.order.axes]))(h)
        w_depthwise_sub = w_depthwise[:, :, i, :]
        g_sub, = Tensordot(None, axes=((Axis.KH, Axis.KW),
                                       (Axis.KH, Axis.KW)))(h_sub,
                                                            w_depthwise_sub)

        # 2. Pointwise (projection) convolution
        #
        # Ideal                             | Current implementation
        # ----------------------------------+----------------------------------------------------
        # g.axes=[N, H, W, C_in, DM]        | g_sub.axes=[N, H, W, DM]
        # w.axes=[DM, Cin, C_out]           | w_sub.axes=[DM, C_out]
        # y.axes=[N, H, W, C_out]           | y_sub.axes=[N, H, W, C_out]

        w_pointwise_sub = w_pointwise[i, :, :]
        y_sub, = Tensordot(None,
                           axes=((axis_depth_multiplier, ),
                                 (axis_depth_multiplier, )))(g_sub,
                                                             w_pointwise_sub)
        ys.append(y_sub)

    # Sum up all sub convolution results to one
    while len(ys) > 1:
        ys.append(ys.pop(0) + ys.pop(0))

    y = ys[0]

    # reinterpret axis "C_out" as C
    axes = list(y.order.axes)
    i = axes.index(axis_c_out)
    axes.pop(i)
    axes.insert(i, Axis.C)
    y = y.reinterpret_axes(Order(axes))

    if k_op.data_format == "channels_last":
        y = y.transpose(OrderNHWC)

    elif k_op.data_format == "channels_first":
        y = y.transpose(OrderNCHW)

    else:
        raise NotImplementedError(
            f"[KerasConverter] Unknown data format: {k_op.data_format}")

    if k_op.use_bias:
        b = converter.convert_to_constant_variable(k_op.bias, OrderC)
        y = y + b

    y = do_activation(k_op.activation, y)
    converter.set_variable(converter.get_output_tensor(k_op)[0], y)
예제 #14
0
def _split_tensordot(graph: Graph, op: Tensordot, v: Variable,
                     v_pair: Sequence[Variable], axis: Axis):
    s1 = v_pair[0].shape_dict[axis]
    s2 = v_pair[1].shape_dict[axis]
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]
    axes_M = tuple(filter(lambda a: a not in op.axes[0], A.order.axes))
    axes_N = tuple(filter(lambda a: a not in op.axes[1], B.order.axes))

    axes_K_A, axes_K_B = op.axes

    K = mul(A.shape_dict[a] for a in axes_K_A)
    M = A.size // K
    N = B.size // K

    shape_M = [A.shape_dict[a] for a in axes_M]
    shape_N = [B.shape_dict[a] for a in axes_N]

    op.remove_all()

    if v == A:
        A1, A2 = v_pair

        if axis in axes_K_A:
            split_axis_A = axis

            if (B.shape_dict[axes_K_B[0]] * s1) % (s1 + s2) == 0:
                split_axis_B = axes_K_B[0]

            else:
                # Factorize B's axes consisting to K into A's corresponding axes
                B = B.transpose(Order(axes_N + axes_K_B))
                B = B.reshape(order=Order((Axis(), ) + axes_K_A),
                              shape=[N] + [A.shape_dict[a] for a in axes_K_A])
                split_axis_B = split_axis_A
                axes_K_B = axes_K_A

            B1, B2 = SplitAxis(None,
                               axis=split_axis_B,
                               sections=[(B.shape_dict[split_axis_B] * s1) //
                                         (s1 + s2)])(B)

            C1, = Tensordot(None, [axes_K_A, axes_K_B])(A1, B1)
            C2, = Tensordot(None, [axes_K_A, axes_K_B])(A2, B2)
            OptimizeRule.replace_variable(graph, (C1 + C2).reshape(
                shape_M + shape_N, Order(axes_M + axes_N)).transpose_like(C),
                                          C)

        else:
            C1, = Tensordot(None, op.axes)(A1, B)
            C2, = Tensordot(None, op.axes)(A2, B)

            for a1, a2 in zip(C1.order.axes, C2.order.axes):
                if a1 == a2 == axis:
                    continue
                a1.unify(a2)

            C_new, = Concat(None, axis=axis)(C1, C2)
            OptimizeRule.replace_variable(graph, C_new, C)

    elif v == B:
        B1, B2 = v_pair

        if axis in axes_K_B:
            split_axis_B = axis

            if (A.shape_dict[axes_K_A[0]] * (s1 + s2)) % s1 == 0:
                split_axis_A = axes_K_A[0]

            else:
                # Factorize A's axes consisting to K into B's corresponding axes
                A = A.transpose(Order(axes_M + axes_K_A))
                A = A.reshape(order=Order((Axis(), ) + axes_K_B),
                              shape=[M] + [B.shape_dict[a] for a in axes_K_B])
                split_axis_A = split_axis_B
                axes_K_A = axes_K_B

            A1, A2 = SplitAxis(None,
                               axis=split_axis_A,
                               sections=[(A.shape_dict[split_axis_A] * s1) //
                                         (s1 + s2)])(A)

            C1, = Tensordot(None, [axes_K_A, axes_K_B])(A1, B1)
            C2, = Tensordot(None, [axes_K_A, axes_K_B])(A2, B2)
            OptimizeRule.replace_variable(graph, (C1 + C2).reshape(
                shape_M + shape_N, Order(axes_M + axes_N)).transpose_like(C),
                                          C)

        else:
            C1, = Tensordot(None, op.axes)(A, B1)
            C2, = Tensordot(None, op.axes)(A, B2)

            for a1, a2 in zip(C1.order.axes, C2.order.axes):
                if a1 == a2 == axis:
                    continue
                a1.unify(a2)

            C_new, = Concat(None, axis=axis)(C1, C2)
            OptimizeRule.replace_variable(graph, C_new, C)

    elif v == C:
        """
        before)

            C[M, N] = A[M, K] @ B[K, N]

        after) In case `axis` is in `N`,

            C[M, N1] = Concat(A[M, K] @ B1[K, N1])
            C[M, N2] = Concat(A[M, K] @ B2[K, N2])
        """
        raise NotImplementedError(
            f"Variable is too large to handle in WebGL backend: {v}")

    else:
        raise UnexpectedAndPleaseReportError