def do_activation(activation: any, x: Variable) -> Variable: if activation is keras.activations.relu: return Relu(None)(x)[0] elif activation is keras.activations.sigmoid: return Sigmoid(None)(x)[0] elif activation is keras.activations.hard_sigmoid: return HardSigmoid(None)(x)[0] elif activation is keras.activations.softplus: return Softplus(None, beta=1.0)(x)[0] elif activation is keras.activations.softsign: return Softsign(None)(x)[0] elif activation is keras.activations.softmax: return Softmax(None, axis=x.order.axes[-1])(x)[0] elif activation is keras.activations.elu: return Elu(None)(x)[0] elif activation is keras.activations.tanh: return Tanh(None)(x)[0] elif activation is keras.activations.linear: return x else: raise NotImplementedError( f"[KerasConverter] Unknown activation: {activation}")
def _convert_softmax(converter: ONNXConverter, onnx_op: INodeProto): x = converter.get_variable(onnx_op.input[0]) attrs = attribute_dict(onnx_op) axis = attrs["axis"].i y, = Softmax(None, axis=x.order.axes[axis])(x) converter.set_variable(onnx_op.output[0], y)
def test_double_softmax(): linear = Linear('linear') softmax1 = Softmax('softmax', axis=Axis.C) softmax2 = Softmax('softmax', axis=Axis.C) x = Variable([4, 5], OrderNC) w = Variable([4, 5], OrderNC) h, = linear(x, w) h, = softmax1(h) y, = softmax2(h) graph = Graph([x], [y]) graph, _ = RemoveLastSoftmax().optimize(graph) ops = listup_operators(graph) assert len(ops) == 1 and isinstance(ops[0], Linear) assert len(graph.outputs) == 1 and ops[0].outputs["y"] == graph.outputs[0]
def softmax_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): x = converter.get_variable(tf_op.inputs[0]) y, = Softmax(None, axis=x.order.axes[-1])(x) if flags.AGGRESSIVE_ORDER_INFERENCE: # Assumption: Softmax is computed along to Axis.C unify(x.order.axes[-1], Axis.C) converter.set_variable(tf_op.outputs[0], y)
def _convert_softmax(converter: ChainerConverter, c_op: "chainer.functions.Softmax"): x = converter.get_variable(c_op.inputs[0]) y, = Softmax(None, axis=x.order.axes[c_op.axis])(x) if flags.AGGRESSIVE_ORDER_INFERENCE: # Most of all cast, softmax is performed along to Axis.C unify(y.order.axes[c_op.axis], Axis.C) converter.set_variable(c_op.outputs[0](), y)
def _convert_softmax(converter: ONNXConverter, onnx_op: INodeProto): x = converter.get_variable(onnx_op.input[0]) attrs = attribute_dict(onnx_op) axis = attrs["axis"].i if "axis" in attrs else 1 new_shape = [mul(x.shape[:axis]), mul(x.shape[axis:])] new_order = Order([None, None]) x = x.reshape(shape=new_shape, order=new_order) y, = Softmax(None, axis=x.order.axes[1])(x) converter.set_variable(onnx_op.output[0], y)
def test_general(): vx = np.random.rand(2, 3, 4, 5) - 0.5 vy = np.exp(vx) / np.sum(np.exp(vx), axis=3, keepdims=True) x = Variable(vx.shape, order=OrderNHWC) y, = Softmax(None, axis=Axis.C)(x) generate_kernel_test_case(description=f"Softmax", backend=["webgpu", "webassembly", "fallback"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy})
def test_internal_softmax(): linear1 = Linear('linear') softmax1 = Softmax('softmax', axis=Axis.C) linear2 = Linear('linear') softmax2 = Softmax('softmax', axis=Axis.C) x = Variable([4, 5], OrderNC) w1 = Variable([4, 5], OrderNC) w2 = Variable([3, 4], OrderNC) h, = linear1(x, w1) h, = softmax1(h) h, = linear2(h, w2) y, = softmax2(h) graph = Graph([x], [y]) graph, _ = RemoveLastSoftmax().optimize(graph) ops = listup_operators(graph) assert len(ops) == 3 and isinstance(ops[0], Linear) and isinstance(ops[1], Softmax) and isinstance(ops[2], Linear) assert len(graph.outputs) == 1 and ops[2].outputs["y"] == graph.outputs[0]
def test_every_order(): orders = [ OrderC, OrderNC, OrderCN, OrderNHWC, OrderHWNC, OrderHWCN, OrderNCHW, OrderCNHW, OrderCHWN ] for order in orders: op = Softmax("op", axis=order.axes[-1]) x = Variable(np.arange(order.ndim) + 1, order) y, = op(x) for axis in y.order.axes: assert y.shape_dict[axis] == x.shape_dict[axis]
def _convert_softmax(converter: ChainerConverter, c_op: "chainer.functions.Softmax"): x = converter.get_variable(c_op.inputs[0]) # chainer.functions.softmax supported "axis" parameter since v1.24 if chainer.__version__ < "1.24": axis = 1 else: axis = c_op.axis y, = Softmax(None, axis=x.order.axes[axis])(x) converter.set_variable(c_op.outputs[0](), y)
def template(x_order=OrderNC, y_order=OrderNC, axis=Axis.C, description: str = ""): shape = (np.arange(x_order.ndim) + 2).tolist() vx = np.random.rand(*shape) - 0.5 vy = np.exp(vx) / np.sum( np.exp(vx), axis=x_order.axes_dict[axis], keepdims=True) x = Variable(vx.shape, order=x_order) y, = Softmax(None, axis=axis)(x) y.change_order(y_order) generate_kernel_test_case( description=f"Softmax {description}", graph=Graph([x], [y]), inputs={x: vx}, backend=["webgpu", "webassembly"], expected={ y: np.transpose(vy, [x_order.axes_dict[a] for a in y.order.axes]) }, )
def generate_graph_model2(caption_net, hidden_num): # inputs var_input_img = Variable([1, 1, hidden_num], OrderNTC) var_input_word = Variable([1, 1], OrderNT) var_switch_img = Variable([1, 1, hidden_num], OrderNTC) var_switch_word = Variable([1, 1, hidden_num], OrderNTC) var_last_h = Variable([1, hidden_num], OrderNC) var_last_c = Variable([1, hidden_num], OrderNC) # prepare for lstm var_emb_word, = Embedding(None)(var_input_word, ConstantVariable( caption_net.word_vec.W.data, OrderCN)) # OrderNTC var_lstm_input = (var_emb_word * var_switch_word) + \ (var_input_img * var_switch_img) # lstm lstm_opr = LSTM(None, use_bias=True, return_sequences=False, activation="tanh", recurrent_activation="sigmoid", use_initial_h=True, use_initial_c=True) w_input = _convert_lstm_to_webdnn_order(caption_net.lstm.upward.W.data.T) w_hidden = _convert_lstm_to_webdnn_order(caption_net.lstm.lateral.W.data.T) b = _convert_lstm_to_webdnn_order( caption_net.lstm.upward.b.data[None, :])[0] var_lstm_h, var_lstm_c = lstm_opr( x=var_lstm_input, w_input=ConstantVariable(w_input, OrderCN), w_hidden=ConstantVariable(w_hidden, OrderCN), b=ConstantVariable(b, OrderC), initial_h=var_last_h, initial_c=var_last_c) # word probability var_word_score, = Linear(None)(var_lstm_h, ConstantVariable( caption_net.out_word.W.data.T, OrderCN)) var_word_score_biased, = AxiswiseBias(None, axis=Axis.C)( var_word_score, ConstantVariable(caption_net.out_word.b.data, OrderC)) var_word_prob, = Softmax(None, axis=Axis.C)(var_word_score_biased) return Graph([ var_input_img, var_input_word, var_switch_img, var_switch_word, var_last_h, var_last_c ], [var_word_prob, var_lstm_h, var_lstm_c])
def template(x_order=OrderNC, y_order=OrderNC, axis=Axis.C, description: str = ""): vx = np.random.rand(2, 3) - 0.5 vy = np.exp(vx) / np.sum( np.exp(vx), axis=OrderNC.axes_dict[axis], keepdims=True) x = Variable(vx.shape, order=OrderNC) y, = Softmax(None, axis=axis)(x) x.change_order(x_order) y.change_order(y_order) generate_kernel_test_case( description=f"Softmax {description}", graph=Graph([x], [y]), inputs={ x: np.transpose(vx, [OrderNC.axes_dict[a] for a in x.order.axes]) }, expected={ y: np.transpose(vy, [OrderNC.axes_dict[a] for a in y.order.axes]) }, )
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if isinstance(op, Reshape): flag_changed |= _replace_input(op, "x", op.parameters["in_order"]) flag_changed |= _replace_output(op, "y", op.parameters["out_order"]) continue elif isinstance(op, (Convolution2D, MaxPooling2D, AveragePooling2D, Deconvolution2D, Space2Depth, Depth2Space)): flag_changed |= _replace_input(op, "x", OrderNHWC) flag_changed |= _replace_output(op, "y", OrderNHWC) continue elif isinstance(op, Softmax): x = op.inputs["x"] y = op.outputs["y"] target_axis = op.parameters["axis"] if not (x.ndim == 2 and x.order.axes_dict[target_axis] == x.ndim - 1): """ Before) | x | | y | |-----| -{softmax}-> |-----| | XYZ | axis=Y | XYZ | After) | x | | hx1 | | hx2 | | hy1 | | hy2 | | y | |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----| | XYZ | | XZY | | NC | axis=C | NC | | XZY | | XYZ | : : order_nd = XZY order_2d = NC """ op.remove_all() axes_nd = list(x.order.axes) axes_nd.remove(target_axis) axes_nd.append(target_axis) order_nd = Order(axes_nd) shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd]) order_2d = OrderNC shape_2d = tuple([ x.size // x.shape_dict[target_axis], x.shape_dict[target_axis] ]) if x.order == order_nd: hx1 = x else: hx1, = Transpose(None)(x) hx1.change_order(order_nd) flag_changed = True if hx1.order == order_2d and hx1.shape == shape_2d: hx2 = hx1 else: hx2, = Reshape(None, in_order=hx1.order, out_order=order_2d, out_shape=shape_2d)(hx1) flag_changed = True hy1, = Softmax(None, axis=Axis.C)(hx2) if hy1.order == order_nd and hy1.shape == shape_nd: hy2 = hy1 else: hy2, = Reshape(None, in_order=hy1.order, out_order=order_nd, out_shape=shape_nd)(hy1) flag_changed = True if hy2.order == y.order: y_dummy = hy2 else: y_dummy, = Transpose(None)(hy2) y_dummy.change_order(y.order) flag_changed = True y_dummy.replace(y) continue return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if isinstance(op, Transpose): x = op.inputs["x0"] y = op.outputs["y"] if x.order == y.order: op.remove_all() OptimizeRule.replace_variable(graph, x, y) if x in graph.inputs: index = graph.inputs.index(x) graph.inputs.remove(x) graph.inputs.insert(index, y) flag_changed = True continue if y not in graph.outputs and all( isinstance(op2, (Elementwise, SplitAxis)) for op2 in y.input_to): op.remove_all() for op2 in list(y.input_to): name = op2.get_input_name(y) op2.remove_input(y) op2.append_input(name, x) flag_changed = True continue elif isinstance(op, Reshape): flag_changed |= _replace_input(op, "x", op.parameters["in_order"]) flag_changed |= _replace_output(op, "y", op.parameters["out_order"]) continue elif isinstance(op, (Tensordot, )): op = op # type: Tensordot A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] # Reduced axes must be located in inner side. a_axes = list(A.order.axes) for axis in op.axes[0]: a_axes.remove(axis) a_axes.append(axis) b_axes = list(B.order.axes) for axis in op.axes[1]: b_axes.remove(axis) b_axes.append(axis) # Remained axes must be located in same order as A and B's axes order. if all(axis in a_axes for axis in C.order.axes[:A.ndim - len(op.axes[0])]): # C's order is as [*a_remained_axes, *b_remained_axes], so it's not need to transpose C. for i, axis in enumerate(C.order.axes[:A.ndim - len(op.axes[0])]): a_axes.remove(axis) a_axes.insert(i, axis) for i, axis in enumerate(C.order.axes[A.ndim - len(op.axes[0]):]): b_axes.remove(axis) b_axes.insert(i, axis) else: c_axes = a_axes[:len(op.axes[0])] + b_axes[:len(op.axes[1] )] flag_changed |= _replace_output(op, "C", Order(c_axes)) flag_changed |= _replace_input(op, "A", Order(a_axes)) flag_changed |= _replace_input(op, "B", Order(b_axes)) continue elif isinstance(op, (Convolution2D, Deconvolution2D, MaxPooling2D, AveragePooling2D, Space2Depth, Depth2Space, LocalResponseNormalization, Unpooling2D)): flag_changed |= _replace_input(op, "x", OrderNHWC) flag_changed |= _replace_output(op, "y", OrderNHWC) continue elif isinstance(op, Softmax): x = op.inputs["x"] y = op.outputs["y"] target_axis = op.parameters["axis"] if not (x.ndim == 2 and x.order.axes_dict[target_axis] == x.ndim - 1): """ Before) | x | | y | |-----| -{softmax}-> |-----| | XYZ | axis=Y | XYZ | After) | x | | hx1 | | hx2 | | hy1 | | hy2 | | y | |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----| | XYZ | | XZY | | NC | axis=C | NC | | XZY | | XYZ | : : order_nd = XZY order_2d = NC """ op.remove_all() axes_nd = list(x.order.axes) axes_nd.remove(target_axis) axes_nd.append(target_axis) order_nd = Order(axes_nd) shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd]) order_2d = OrderNC shape_2d = tuple([ x.size // x.shape_dict[target_axis], x.shape_dict[target_axis] ]) if x.order == order_nd: hx1 = x else: hx1 = x.transpose(order_nd) flag_changed = True if hx1.order == order_2d and hx1.shape == shape_2d: hx2 = hx1 else: hx2 = hx1.reshape(shape_2d, order_2d) flag_changed = True hy1, = Softmax(None, axis=Axis.C)(hx2) if hy1.order == order_nd and hy1.shape == shape_nd: hy2 = hy1 else: hy2 = hy1.reshape(shape_nd, order_nd) flag_changed = True if hy2.order == y.order: y_dummy = hy2 else: y_dummy = hy2.transpose(y.order) flag_changed = True OptimizeRule.replace_variable(graph, y_dummy, y) continue return graph, flag_changed
def _convert_softmax(converter: ChainerConverter, c_op: "chainer.functions.Softmax"): x = converter.get_variable(c_op.inputs[0]) y, = Softmax(None, axis=x.order.axes[c_op.axis])(x) converter.set_variable(c_op.outputs[0](), y)
def softmax_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): x = converter.get_variable(tf_op.inputs[0]) y, = Softmax(None, axis=x.order.axes[-1])(x) converter.set_variable(tf_op.outputs[0], y)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if isinstance(op, (Reshape, ReinterpretAxis)): flag_changed |= _replace_input(graph, op, "x", op.parameters["in_order"]) flag_changed |= _replace_output(graph, op, "y", op.parameters["out_order"]) continue elif isinstance(op, LSTM): flag_changed |= _replace_input(graph, op, "x", OrderNTC) flag_changed |= _replace_input(graph, op, "w_input", OrderCN) flag_changed |= _replace_input(graph, op, "w_hidden", OrderCN) flag_changed |= _replace_output( graph, op, "y", OrderNTC if op.parameters["return_sequences"] else OrderNC) flag_changed |= _replace_output(graph, op, "final_c", OrderNC) continue elif isinstance(op, Embedding): flag_changed |= _replace_input(graph, op, "x", OrderNT) flag_changed |= _replace_input(graph, op, "w", OrderCN) flag_changed |= _replace_output(graph, op, "y", OrderNTC) continue elif isinstance(op, Im2Col): flag_changed |= _replace_input(graph, op, "im", OrderNHWC) flag_changed |= _replace_output(graph, op, "col", [ Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]), Order([Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W]) ]) continue elif isinstance(op, Col2Im): flag_changed |= _replace_input(graph, op, "col", [ Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]) ]) flag_changed |= _replace_output(graph, op, "im", OrderNHWC) continue elif isinstance(op, (Tensordot, )): op = op # type: Tensordot A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] # Reduced axes must be located in inner side. a_axes = list(A.order.axes) for axis in op.axes[0]: a_axes.remove(axis) a_axes.append(axis) b_axes = list(B.order.axes) for axis in op.axes[1]: b_axes.remove(axis) b_axes.append(axis) # Remained axes must be located in same order as A and B's axes order. if all(axis in op.axes[0] for axis in C.order.axes[:A.ndim - len(op.axes[0])]): # C's order is as [*a_remained_axes, *b_remained_axes], so it's not need to transpose C. for i, axis in enumerate(C.order.axes[:A.ndim - len(op.axes[0])]): a_axes.remove(axis) a_axes.insert(i, axis) for i, axis in enumerate(C.order.axes[A.ndim - len(op.axes[0]):]): b_axes.remove(axis) b_axes.insert(i, axis) else: c_axes = a_axes[:(A.ndim - len(op.axes[0]))] + b_axes[:( B.ndim - len(op.axes[1]))] flag_changed |= _replace_output(graph, op, "C", Order(c_axes)) flag_changed |= _replace_input(graph, op, "A", Order(a_axes)) flag_changed |= _replace_input(graph, op, "B", Order(b_axes)) continue elif isinstance(op, (Convolution2D, Deconvolution2D, MaxPooling2D, AveragePooling2D, Space2Depth, Depth2Space, LocalResponseNormalization, Unpooling2D)): flag_changed |= _replace_input(graph, op, "x", OrderNHWC) flag_changed |= _replace_output(graph, op, "y", OrderNHWC) continue elif isinstance(op, Softmax): x = op.inputs["x"] y = op.outputs["y"] target_axis = op.parameters["axis"] if not (x.ndim == 2 and x.order.axes_dict[target_axis] == x.ndim - 1): """ Before) | x | | y | |-----| -{softmax}-> |-----| | XYZ | axis=Y | XYZ | After) | x | | hx1 | | hx2 | | hy1 | | hy2 | | y | |-----| -{transpose}-> |-----| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |-----| -{transpose}-> |-----| | XYZ | | XZY | | NC | axis=C | NC | | XZY | | XYZ | : : order_nd = XZY order_2d = NC """ op.remove_all() axes_nd = list(x.order.axes) axes_nd.remove(target_axis) axes_nd.append(target_axis) order_nd = Order(axes_nd) shape_nd = tuple([x.shape_dict[axis] for axis in axes_nd]) order_2d = OrderNC shape_2d = tuple([ x.size // x.shape_dict[target_axis], x.shape_dict[target_axis] ]) if x.order == order_nd: hx1 = x else: hx1 = x.transpose(order_nd) flag_changed = True if hx1.order == order_2d and hx1.shape == shape_2d: hx2 = hx1 else: hx2 = hx1.reshape(shape_2d, order_2d) flag_changed = True hy1, = Softmax(None, axis=Axis.C)(hx2) if hy1.order == order_nd and hy1.shape == shape_nd: hy2 = hy1 else: hy2 = hy1.reshape(shape_nd, order_nd) flag_changed = True if hy2.order == y.order: y_dummy = hy2 else: y_dummy = hy2.transpose(y.order) flag_changed = True OptimizeRule.replace_variable(graph, y_dummy, y) continue else: # "op" accepts any order. Remove redundant transpose operations if exist. for key in op.inputs: flag_changed |= _optimize_redundant_transposed_input( graph, op, key, None) for key in op.outputs: flag_changed |= _optimize_redundant_transposed_output( graph, op, key, None) continue return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if isinstance(op, Transpose): x = op.inputs["x0"] y = op.outputs["y"] if x.order == y.order: op.remove_all() x.replace(y) flag_changed = True if all(isinstance(op2, (Elementwise, SplitAxis)) for op2 in y.input_to): op.remove_all() for op2 in list(y.input_to): name = op2._get_input_name(y) op2.remove_input(y) op2.append_input(name, x) elif isinstance(op, Reshape): flag_changed |= _replace_input(op, "x", op.parameters["in_order"]) flag_changed |= _replace_output(op, "y", op.parameters["out_order"]) elif isinstance(op, (Convolution2D, MaxPooling2D, AveragePooling2D, Deconvolution2D)): flag_changed |= _replace_input(op, "x", OrderNHWC) flag_changed |= _replace_output(op, "y", OrderNHWC) elif isinstance(op, Softmax): x = op.inputs["x"] y = op.outputs["y"] if x.ndim > 2: """ Before) | x | | y | |------| -{softmax}-> |------| | NCHW | | NCHW | After) | x | | hx1 | | hx2 | | hy1 | | hy2 | | y | |------| -{transpose}-> |------| -{reshape}-> |-----| -{softmax}-> |-----| -{reshape}-> |------| -{transpose}-> |------| | NCHW | | NHWC | | NC | | NC | | NHWC | | NCHW | """ op.remove_all() target_axis = op.parameters["axis"] axes_nd = list(x.order.axes) axes_nd.remove(target_axis) axes_nd.append(target_axis) order_nd = Order(axes_nd) shape_nd = [x.shape_dict[axis] for axis in axes_nd] order_2d = OrderNC shape_2d = [x.size // x.shape_dict[target_axis], x.shape_dict[target_axis]] hx1, = Transpose(None)(x) hx1.change_order(order_nd) hx2, = Reshape(None, in_order=hx1.order, out_order=order_2d, out_shape=shape_2d)(hx1) hy1, = Softmax(None, axis=Axis.C)(hx2) hy2, = Reshape(None, in_order=hy1.order, out_order=order_nd, out_shape=shape_nd)(hy1) y_dummy, = Transpose(None)(hy2) y_dummy.change_order(y.order) y_dummy.replace(y) flag_changed = True else: flag_changed |= _replace_input(op, "x", OrderNC) flag_changed |= _replace_output(op, "y", OrderNC) return graph, flag_changed