Пример #1
0
def do_activation(activation: any, x: Variable) -> Variable:
    if activation is keras.activations.relu:
        return Relu(None)(x)[0]

    elif activation is keras.activations.sigmoid:
        return Sigmoid(None)(x)[0]

    elif activation is keras.activations.hard_sigmoid:
        return HardSigmoid(None)(x)[0]

    elif activation is keras.activations.softplus:
        return Softplus(None, beta=1.0)(x)[0]

    elif activation is keras.activations.softsign:
        return Softsign(None)(x)[0]

    elif activation is keras.activations.softmax:
        return Softmax(None, axis=x.order.axes[-1])(x)[0]

    elif activation is keras.activations.elu:
        return Elu(None)(x)[0]

    elif activation is keras.activations.tanh:
        return Tanh(None)(x)[0]

    elif activation is keras.activations.linear:
        return x

    else:
        raise NotImplementedError(
            f"[KerasConverter] Unknown activation: {activation}")
Пример #2
0
def _convert_softmax(converter: ONNXConverter, onnx_op: INodeProto):
    x = converter.get_variable(onnx_op.input[0])

    attrs = attribute_dict(onnx_op)
    axis = attrs["axis"].i

    y, = Softmax(None, axis=x.order.axes[axis])(x)
    converter.set_variable(onnx_op.output[0], y)
Пример #3
0
def test_double_softmax():
    linear = Linear('linear')
    softmax1 = Softmax('softmax', axis=Axis.C)
    softmax2 = Softmax('softmax', axis=Axis.C)

    x = Variable([4, 5], OrderNC)
    w = Variable([4, 5], OrderNC)
    h, = linear(x, w)
    h, = softmax1(h)
    y, = softmax2(h)

    graph = Graph([x], [y])

    graph, _ = RemoveLastSoftmax().optimize(graph)

    ops = listup_operators(graph)
    assert len(ops) == 1 and isinstance(ops[0], Linear)
    assert len(graph.outputs) == 1 and ops[0].outputs["y"] == graph.outputs[0]
Пример #4
0
def softmax_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"):
    x = converter.get_variable(tf_op.inputs[0])
    y, = Softmax(None, axis=x.order.axes[-1])(x)

    if flags.AGGRESSIVE_ORDER_INFERENCE:
        # Assumption: Softmax is computed along to Axis.C
        unify(x.order.axes[-1], Axis.C)

    converter.set_variable(tf_op.outputs[0], y)
Пример #5
0
def _convert_softmax(converter: ChainerConverter,
                     c_op: "chainer.functions.Softmax"):
    x = converter.get_variable(c_op.inputs[0])
    y, = Softmax(None, axis=x.order.axes[c_op.axis])(x)

    if flags.AGGRESSIVE_ORDER_INFERENCE:
        # Most of all cast, softmax is performed along to Axis.C
        unify(y.order.axes[c_op.axis], Axis.C)

    converter.set_variable(c_op.outputs[0](), y)
Пример #6
0
def _convert_softmax(converter: ONNXConverter, onnx_op: INodeProto):
    x = converter.get_variable(onnx_op.input[0])

    attrs = attribute_dict(onnx_op)
    axis = attrs["axis"].i if "axis" in attrs else 1
    new_shape = [mul(x.shape[:axis]), mul(x.shape[axis:])]
    new_order = Order([None, None])

    x = x.reshape(shape=new_shape, order=new_order)
    y, = Softmax(None, axis=x.order.axes[1])(x)

    converter.set_variable(onnx_op.output[0], y)
Пример #7
0
def test_general():
    vx = np.random.rand(2, 3, 4, 5) - 0.5
    vy = np.exp(vx) / np.sum(np.exp(vx), axis=3, keepdims=True)

    x = Variable(vx.shape, order=OrderNHWC)
    y, = Softmax(None, axis=Axis.C)(x)

    generate_kernel_test_case(description=f"Softmax",
                              backend=["webgpu", "webassembly", "fallback"],
                              graph=Graph([x], [y]),
                              inputs={x: vx},
                              expected={y: vy})
Пример #8
0
def test_internal_softmax():
    linear1 = Linear('linear')
    softmax1 = Softmax('softmax', axis=Axis.C)
    linear2 = Linear('linear')
    softmax2 = Softmax('softmax', axis=Axis.C)

    x = Variable([4, 5], OrderNC)
    w1 = Variable([4, 5], OrderNC)
    w2 = Variable([3, 4], OrderNC)
    h, = linear1(x, w1)
    h, = softmax1(h)
    h, = linear2(h, w2)
    y, = softmax2(h)

    graph = Graph([x], [y])

    graph, _ = RemoveLastSoftmax().optimize(graph)

    ops = listup_operators(graph)
    assert len(ops) == 3 and isinstance(ops[0], Linear) and isinstance(ops[1], Softmax) and isinstance(ops[2], Linear)
    assert len(graph.outputs) == 1 and ops[2].outputs["y"] == graph.outputs[0]
Пример #9
0
def test_every_order():
    orders = [
        OrderC, OrderNC, OrderCN, OrderNHWC, OrderHWNC, OrderHWCN, OrderNCHW,
        OrderCNHW, OrderCHWN
    ]

    for order in orders:
        op = Softmax("op", axis=order.axes[-1])

        x = Variable(np.arange(order.ndim) + 1, order)
        y, = op(x)
        for axis in y.order.axes:
            assert y.shape_dict[axis] == x.shape_dict[axis]
Пример #10
0
def _convert_softmax(converter: ChainerConverter,
                     c_op: "chainer.functions.Softmax"):
    x = converter.get_variable(c_op.inputs[0])

    # chainer.functions.softmax supported "axis" parameter since v1.24
    if chainer.__version__ < "1.24":
        axis = 1
    else:
        axis = c_op.axis

    y, = Softmax(None, axis=x.order.axes[axis])(x)

    converter.set_variable(c_op.outputs[0](), y)
Пример #11
0
def template(x_order=OrderNC,
             y_order=OrderNC,
             axis=Axis.C,
             description: str = ""):
    shape = (np.arange(x_order.ndim) + 2).tolist()
    vx = np.random.rand(*shape) - 0.5
    vy = np.exp(vx) / np.sum(
        np.exp(vx), axis=x_order.axes_dict[axis], keepdims=True)

    x = Variable(vx.shape, order=x_order)
    y, = Softmax(None, axis=axis)(x)

    y.change_order(y_order)

    generate_kernel_test_case(
        description=f"Softmax {description}",
        graph=Graph([x], [y]),
        inputs={x: vx},
        backend=["webgpu", "webassembly"],
        expected={
            y: np.transpose(vy, [x_order.axes_dict[a] for a in y.order.axes])
        },
    )
Пример #12
0
def generate_graph_model2(caption_net, hidden_num):
    # inputs
    var_input_img = Variable([1, 1, hidden_num], OrderNTC)
    var_input_word = Variable([1, 1], OrderNT)
    var_switch_img = Variable([1, 1, hidden_num], OrderNTC)
    var_switch_word = Variable([1, 1, hidden_num], OrderNTC)
    var_last_h = Variable([1, hidden_num], OrderNC)
    var_last_c = Variable([1, hidden_num], OrderNC)

    # prepare for lstm
    var_emb_word, = Embedding(None)(var_input_word,
                                    ConstantVariable(
                                        caption_net.word_vec.W.data,
                                        OrderCN))  # OrderNTC
    var_lstm_input = (var_emb_word * var_switch_word) + \
        (var_input_img * var_switch_img)

    # lstm
    lstm_opr = LSTM(None,
                    use_bias=True,
                    return_sequences=False,
                    activation="tanh",
                    recurrent_activation="sigmoid",
                    use_initial_h=True,
                    use_initial_c=True)
    w_input = _convert_lstm_to_webdnn_order(caption_net.lstm.upward.W.data.T)
    w_hidden = _convert_lstm_to_webdnn_order(caption_net.lstm.lateral.W.data.T)
    b = _convert_lstm_to_webdnn_order(
        caption_net.lstm.upward.b.data[None, :])[0]
    var_lstm_h, var_lstm_c = lstm_opr(
        x=var_lstm_input,
        w_input=ConstantVariable(w_input, OrderCN),
        w_hidden=ConstantVariable(w_hidden, OrderCN),
        b=ConstantVariable(b, OrderC),
        initial_h=var_last_h,
        initial_c=var_last_c)

    # word probability
    var_word_score, = Linear(None)(var_lstm_h,
                                   ConstantVariable(
                                       caption_net.out_word.W.data.T, OrderCN))
    var_word_score_biased, = AxiswiseBias(None, axis=Axis.C)(
        var_word_score, ConstantVariable(caption_net.out_word.b.data, OrderC))
    var_word_prob, = Softmax(None, axis=Axis.C)(var_word_score_biased)

    return Graph([
        var_input_img, var_input_word, var_switch_img, var_switch_word,
        var_last_h, var_last_c
    ], [var_word_prob, var_lstm_h, var_lstm_c])
Пример #13
0
def template(x_order=OrderNC,
             y_order=OrderNC,
             axis=Axis.C,
             description: str = ""):
    vx = np.random.rand(2, 3) - 0.5
    vy = np.exp(vx) / np.sum(
        np.exp(vx), axis=OrderNC.axes_dict[axis], keepdims=True)

    x = Variable(vx.shape, order=OrderNC)
    y, = Softmax(None, axis=axis)(x)

    x.change_order(x_order)
    y.change_order(y_order)

    generate_kernel_test_case(
        description=f"Softmax {description}",
        graph=Graph([x], [y]),
        inputs={
            x: np.transpose(vx, [OrderNC.axes_dict[a] for a in x.order.axes])
        },
        expected={
            y: np.transpose(vy, [OrderNC.axes_dict[a] for a in y.order.axes])
        },
    )
Пример #14
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.listup_operators(graph):
            if isinstance(op, Reshape):
                flag_changed |= _replace_input(op, "x",
                                               op.parameters["in_order"])
                flag_changed |= _replace_output(op, "y",
                                                op.parameters["out_order"])
                continue

            elif isinstance(op, (Convolution2D, MaxPooling2D, AveragePooling2D,
                                 Deconvolution2D, Space2Depth, Depth2Space)):
                flag_changed |= _replace_input(op, "x", OrderNHWC)
                flag_changed |= _replace_output(op, "y", OrderNHWC)
                continue

            elif isinstance(op, Softmax):
                x = op.inputs["x"]
                y = op.outputs["y"]
                target_axis = op.parameters["axis"]

                if not (x.ndim == 2
                        and x.order.axes_dict[target_axis] == x.ndim - 1):
                    """
                    Before)
                    | x   |              | y   |
                    |-----| -{softmax}-> |-----|
                    | XYZ |   axis=Y     | XYZ |
                    
                    After)
                    | x   |                | hx1 |              | hx2 |              | hy1 |              | hy2 |                | y   |
                    |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----|
                    | XYZ |                | XZY |              | NC  |   axis=C     | NC  |              | XZY |                | XYZ |
                                              :                    :
                                        order_nd = XZY       order_2d = NC
                    """
                    op.remove_all()

                    axes_nd = list(x.order.axes)
                    axes_nd.remove(target_axis)
                    axes_nd.append(target_axis)
                    order_nd = Order(axes_nd)
                    shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd])

                    order_2d = OrderNC
                    shape_2d = tuple([
                        x.size // x.shape_dict[target_axis],
                        x.shape_dict[target_axis]
                    ])

                    if x.order == order_nd:
                        hx1 = x

                    else:
                        hx1, = Transpose(None)(x)
                        hx1.change_order(order_nd)
                        flag_changed = True

                    if hx1.order == order_2d and hx1.shape == shape_2d:
                        hx2 = hx1

                    else:
                        hx2, = Reshape(None,
                                       in_order=hx1.order,
                                       out_order=order_2d,
                                       out_shape=shape_2d)(hx1)
                        flag_changed = True

                    hy1, = Softmax(None, axis=Axis.C)(hx2)

                    if hy1.order == order_nd and hy1.shape == shape_nd:
                        hy2 = hy1

                    else:
                        hy2, = Reshape(None,
                                       in_order=hy1.order,
                                       out_order=order_nd,
                                       out_shape=shape_nd)(hy1)
                        flag_changed = True

                    if hy2.order == y.order:
                        y_dummy = hy2

                    else:
                        y_dummy, = Transpose(None)(hy2)
                        y_dummy.change_order(y.order)
                        flag_changed = True

                    y_dummy.replace(y)

                    continue

        return graph, flag_changed
Пример #15
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.listup_operators(graph):
            if isinstance(op, Transpose):
                x = op.inputs["x0"]
                y = op.outputs["y"]

                if x.order == y.order:
                    op.remove_all()
                    OptimizeRule.replace_variable(graph, x, y)

                    if x in graph.inputs:
                        index = graph.inputs.index(x)
                        graph.inputs.remove(x)
                        graph.inputs.insert(index, y)

                    flag_changed = True
                    continue

                if y not in graph.outputs and all(
                        isinstance(op2, (Elementwise, SplitAxis))
                        for op2 in y.input_to):
                    op.remove_all()
                    for op2 in list(y.input_to):
                        name = op2.get_input_name(y)
                        op2.remove_input(y)
                        op2.append_input(name, x)

                    flag_changed = True
                    continue

            elif isinstance(op, Reshape):
                flag_changed |= _replace_input(op, "x",
                                               op.parameters["in_order"])
                flag_changed |= _replace_output(op, "y",
                                                op.parameters["out_order"])
                continue

            elif isinstance(op, (Tensordot, )):
                op = op  # type: Tensordot
                A = op.inputs["A"]
                B = op.inputs["B"]
                C = op.outputs["C"]

                # Reduced axes must be located in inner side.
                a_axes = list(A.order.axes)
                for axis in op.axes[0]:
                    a_axes.remove(axis)
                    a_axes.append(axis)

                b_axes = list(B.order.axes)
                for axis in op.axes[1]:
                    b_axes.remove(axis)
                    b_axes.append(axis)

                # Remained axes must be located in same order as A and B's axes order.
                if all(axis in a_axes
                       for axis in C.order.axes[:A.ndim - len(op.axes[0])]):
                    # C's order is as [*a_remained_axes, *b_remained_axes], so it's not need to transpose C.
                    for i, axis in enumerate(C.order.axes[:A.ndim -
                                                          len(op.axes[0])]):
                        a_axes.remove(axis)
                        a_axes.insert(i, axis)

                    for i, axis in enumerate(C.order.axes[A.ndim -
                                                          len(op.axes[0]):]):
                        b_axes.remove(axis)
                        b_axes.insert(i, axis)

                else:
                    c_axes = a_axes[:len(op.axes[0])] + b_axes[:len(op.axes[1]
                                                                    )]
                    flag_changed |= _replace_output(op, "C", Order(c_axes))

                flag_changed |= _replace_input(op, "A", Order(a_axes))
                flag_changed |= _replace_input(op, "B", Order(b_axes))
                continue

            elif isinstance(op, (Convolution2D, Deconvolution2D, MaxPooling2D,
                                 AveragePooling2D, Space2Depth, Depth2Space,
                                 LocalResponseNormalization, Unpooling2D)):
                flag_changed |= _replace_input(op, "x", OrderNHWC)
                flag_changed |= _replace_output(op, "y", OrderNHWC)
                continue

            elif isinstance(op, Softmax):
                x = op.inputs["x"]
                y = op.outputs["y"]
                target_axis = op.parameters["axis"]

                if not (x.ndim == 2
                        and x.order.axes_dict[target_axis] == x.ndim - 1):
                    """
                    Before)
                    | x   |              | y   |
                    |-----| -{softmax}-> |-----|
                    | XYZ |   axis=Y     | XYZ |

                    After)
                    | x   |                | hx1 |              | hx2 |              | hy1 |              | hy2 |                | y   |
                    |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----|
                    | XYZ |                | XZY |              | NC  |   axis=C     | NC  |              | XZY |                | XYZ |
                                              :                    :
                                        order_nd = XZY       order_2d = NC
                    """
                    op.remove_all()

                    axes_nd = list(x.order.axes)
                    axes_nd.remove(target_axis)
                    axes_nd.append(target_axis)
                    order_nd = Order(axes_nd)
                    shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd])

                    order_2d = OrderNC
                    shape_2d = tuple([
                        x.size // x.shape_dict[target_axis],
                        x.shape_dict[target_axis]
                    ])

                    if x.order == order_nd:
                        hx1 = x

                    else:
                        hx1 = x.transpose(order_nd)
                        flag_changed = True

                    if hx1.order == order_2d and hx1.shape == shape_2d:
                        hx2 = hx1

                    else:
                        hx2 = hx1.reshape(shape_2d, order_2d)
                        flag_changed = True

                    hy1, = Softmax(None, axis=Axis.C)(hx2)

                    if hy1.order == order_nd and hy1.shape == shape_nd:
                        hy2 = hy1

                    else:
                        hy2 = hy1.reshape(shape_nd, order_nd)
                        flag_changed = True

                    if hy2.order == y.order:
                        y_dummy = hy2

                    else:
                        y_dummy = hy2.transpose(y.order)
                        flag_changed = True

                    OptimizeRule.replace_variable(graph, y_dummy, y)

                    continue

        return graph, flag_changed
Пример #16
0
def _convert_softmax(converter: ChainerConverter,
                     c_op: "chainer.functions.Softmax"):
    x = converter.get_variable(c_op.inputs[0])
    y, = Softmax(None, axis=x.order.axes[c_op.axis])(x)

    converter.set_variable(c_op.outputs[0](), y)
Пример #17
0
def softmax_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"):
    x = converter.get_variable(tf_op.inputs[0])
    y, = Softmax(None, axis=x.order.axes[-1])(x)

    converter.set_variable(tf_op.outputs[0], y)
Пример #18
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.listup_operators(graph):
            if isinstance(op, (Reshape, ReinterpretAxis)):
                flag_changed |= _replace_input(graph, op, "x",
                                               op.parameters["in_order"])
                flag_changed |= _replace_output(graph, op, "y",
                                                op.parameters["out_order"])
                continue

            elif isinstance(op, LSTM):
                flag_changed |= _replace_input(graph, op, "x", OrderNTC)
                flag_changed |= _replace_input(graph, op, "w_input", OrderCN)
                flag_changed |= _replace_input(graph, op, "w_hidden", OrderCN)
                flag_changed |= _replace_output(
                    graph, op, "y",
                    OrderNTC if op.parameters["return_sequences"] else OrderNC)
                flag_changed |= _replace_output(graph, op, "final_c", OrderNC)
                continue

            elif isinstance(op, Embedding):
                flag_changed |= _replace_input(graph, op, "x", OrderNT)
                flag_changed |= _replace_input(graph, op, "w", OrderCN)
                flag_changed |= _replace_output(graph, op, "y", OrderNTC)
                continue

            elif isinstance(op, Im2Col):
                flag_changed |= _replace_input(graph, op, "im", OrderNHWC)
                flag_changed |= _replace_output(graph, op, "col", [
                    Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]),
                    Order([Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W])
                ])
                continue

            elif isinstance(op, Col2Im):
                flag_changed |= _replace_input(graph, op, "col", [
                    Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])
                ])
                flag_changed |= _replace_output(graph, op, "im", OrderNHWC)
                continue

            elif isinstance(op, (Tensordot, )):
                op = op  # type: Tensordot
                A = op.inputs["A"]
                B = op.inputs["B"]
                C = op.outputs["C"]

                # Reduced axes must be located in inner side.
                a_axes = list(A.order.axes)
                for axis in op.axes[0]:
                    a_axes.remove(axis)
                    a_axes.append(axis)

                b_axes = list(B.order.axes)
                for axis in op.axes[1]:
                    b_axes.remove(axis)
                    b_axes.append(axis)

                # Remained axes must be located in same order as A and B's axes order.
                if all(axis in op.axes[0]
                       for axis in C.order.axes[:A.ndim - len(op.axes[0])]):
                    # C's order is as [*a_remained_axes, *b_remained_axes], so it's not need to transpose C.
                    for i, axis in enumerate(C.order.axes[:A.ndim -
                                                          len(op.axes[0])]):
                        a_axes.remove(axis)
                        a_axes.insert(i, axis)

                    for i, axis in enumerate(C.order.axes[A.ndim -
                                                          len(op.axes[0]):]):
                        b_axes.remove(axis)
                        b_axes.insert(i, axis)

                else:
                    c_axes = a_axes[:(A.ndim - len(op.axes[0]))] + b_axes[:(
                        B.ndim - len(op.axes[1]))]
                    flag_changed |= _replace_output(graph, op, "C",
                                                    Order(c_axes))

                flag_changed |= _replace_input(graph, op, "A", Order(a_axes))
                flag_changed |= _replace_input(graph, op, "B", Order(b_axes))
                continue

            elif isinstance(op, (Convolution2D, Deconvolution2D, MaxPooling2D,
                                 AveragePooling2D, Space2Depth, Depth2Space,
                                 LocalResponseNormalization, Unpooling2D)):
                flag_changed |= _replace_input(graph, op, "x", OrderNHWC)
                flag_changed |= _replace_output(graph, op, "y", OrderNHWC)
                continue

            elif isinstance(op, Softmax):
                x = op.inputs["x"]
                y = op.outputs["y"]
                target_axis = op.parameters["axis"]

                if not (x.ndim == 2
                        and x.order.axes_dict[target_axis] == x.ndim - 1):
                    """
                    Before)
                    | x   |              | y   |
                    |-----| -{softmax}-> |-----|
                    | XYZ |   axis=Y     | XYZ |

                    After)
                    | x   |                | hx1 |              | hx2 |              | hy1 |              | hy2 |                | y   |
                    |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----|
                    | XYZ |                | XZY |              | NC  |   axis=C     | NC  |              | XZY |                | XYZ |
                                              :                    :
                                        order_nd = XZY       order_2d = NC
                    """
                    op.remove_all()

                    axes_nd = list(x.order.axes)
                    axes_nd.remove(target_axis)
                    axes_nd.append(target_axis)
                    order_nd = Order(axes_nd)
                    shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd])

                    order_2d = OrderNC
                    shape_2d = tuple([
                        x.size // x.shape_dict[target_axis],
                        x.shape_dict[target_axis]
                    ])

                    if x.order == order_nd:
                        hx1 = x

                    else:
                        hx1 = x.transpose(order_nd)
                        flag_changed = True

                    if hx1.order == order_2d and hx1.shape == shape_2d:
                        hx2 = hx1

                    else:
                        hx2 = hx1.reshape(shape_2d, order_2d)
                        flag_changed = True

                    hy1, = Softmax(None, axis=Axis.C)(hx2)

                    if hy1.order == order_nd and hy1.shape == shape_nd:
                        hy2 = hy1

                    else:
                        hy2 = hy1.reshape(shape_nd, order_nd)
                        flag_changed = True

                    if hy2.order == y.order:
                        y_dummy = hy2

                    else:
                        y_dummy = hy2.transpose(y.order)
                        flag_changed = True

                    OptimizeRule.replace_variable(graph, y_dummy, y)

                    continue

            else:
                # "op" accepts any order. Remove redundant transpose operations if exist.
                for key in op.inputs:
                    flag_changed |= _optimize_redundant_transposed_input(
                        graph, op, key, None)
                for key in op.outputs:
                    flag_changed |= _optimize_redundant_transposed_output(
                        graph, op, key, None)
                continue

        return graph, flag_changed
Пример #19
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.listup_operators(graph):
            if isinstance(op, Transpose):
                x = op.inputs["x0"]
                y = op.outputs["y"]
                if x.order == y.order:
                    op.remove_all()
                    x.replace(y)
                    flag_changed = True

                if all(isinstance(op2, (Elementwise, SplitAxis)) for op2 in y.input_to):
                    op.remove_all()
                    for op2 in list(y.input_to):
                        name = op2._get_input_name(y)
                        op2.remove_input(y)
                        op2.append_input(name, x)

            elif isinstance(op, Reshape):
                flag_changed |= _replace_input(op, "x", op.parameters["in_order"])
                flag_changed |= _replace_output(op, "y", op.parameters["out_order"])

            elif isinstance(op, (Convolution2D, MaxPooling2D, AveragePooling2D, Deconvolution2D)):
                flag_changed |= _replace_input(op, "x", OrderNHWC)
                flag_changed |= _replace_output(op, "y", OrderNHWC)

            elif isinstance(op, Softmax):
                x = op.inputs["x"]
                y = op.outputs["y"]

                if x.ndim > 2:
                    """
                    Before)
                    | x    |              | y    |
                    |------| -{softmax}-> |------|
                    | NCHW |              | NCHW |

                    After)
                    | x    |                | hx1  |              | hx2 |              | hy1 |              | hy2  |                | y    |
                    |------| -{transpose}-> |------| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |------| -{transpose}-> |------|
                    | NCHW |                | NHWC |              | NC  |              | NC  |              | NHWC |                | NCHW |
                    """
                    op.remove_all()

                    target_axis = op.parameters["axis"]
                    axes_nd = list(x.order.axes)
                    axes_nd.remove(target_axis)
                    axes_nd.append(target_axis)
                    order_nd = Order(axes_nd)
                    shape_nd = [x.shape_dict[axis] for axis in axes_nd]

                    order_2d = OrderNC
                    shape_2d = [x.size // x.shape_dict[target_axis], x.shape_dict[target_axis]]

                    hx1, = Transpose(None)(x)
                    hx1.change_order(order_nd)

                    hx2, = Reshape(None, in_order=hx1.order, out_order=order_2d, out_shape=shape_2d)(hx1)

                    hy1, = Softmax(None, axis=Axis.C)(hx2)

                    hy2, = Reshape(None, in_order=hy1.order, out_order=order_nd, out_shape=shape_nd)(hy1)

                    y_dummy, = Transpose(None)(hy2)
                    y_dummy.change_order(y.order)

                    y_dummy.replace(y)
                    flag_changed = True

                else:
                    flag_changed |= _replace_input(op, "x", OrderNC)
                    flag_changed |= _replace_output(op, "y", OrderNC)

        return graph, flag_changed