def _make_fake_loop_op(self, body_nodes, # type: Sequence[NodeProto] input_types, # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] ): # type: (...) -> List[NodeProto] zero = helper.make_tensor("trip_count_value", TensorProto.INT32, (), [10]) true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) # lcd is a dummy loop-carried dependency that only exists because # right now the schema checker is broken and assumes a variadic # input needs at least one value. graph_inputs = [helper.make_tensor_value_info("i", TensorProto.INT32, ()), helper.make_tensor_value_info("cond", TensorProto.BOOL, ())] for type, shape, name in input_types: graph_inputs.append(helper.make_tensor_value_info("_" + name, type, shape)) graph_outputs = [helper.make_tensor_value_info("cond", TensorProto.BOOL, ())] for type, shape, name in output_types: graph_outputs.append(helper.make_tensor_value_info("_" + name, type, shape)) body_graph = helper.make_graph(body_nodes, "body_graph", graph_inputs, graph_outputs) loop_inputs = ["trip_count", "condition"] loop_inputs.extend([name for _, _, name in input_types]) # TODO: fix checker to accept 0-input variadic inputs if len(loop_inputs) == 2: loop_inputs.append("") loop_outputs = [name for _, _, name in output_types] retval_nodes = [ helper.make_node("Constant", [], ["trip_count"], value=zero), helper.make_node("Constant", [], ["condition"], value=true), helper.make_node("Loop", loop_inputs, loop_outputs, body=body_graph) ] return retval_nodes
def test_make_tensor(self): # type: () -> None np_array = np.random.randn(2, 3).astype(np.float32) tensor = helper.make_tensor( name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tolist() ) self.assertEqual(tensor.name, 'test') np.testing.assert_equal(np_array, numpy_helper.to_array(tensor)) # use raw_data field to store the data tensor = helper.make_tensor( name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tobytes(), raw=True, ) np.testing.assert_equal(np_array, numpy_helper.to_array(tensor)) string_list = list(s.encode('ascii') for s in ['Amy', 'Billy', 'Cindy', 'David']) tensor = helper.make_tensor( name='test', data_type=TensorProto.STRING, dims=(2, 2), vals=string_list, raw=False ) self.assertEqual(string_list, list(tensor.string_data))
def test_onnx_to_caffe2_zipfile(self): buf = tempfile.NamedTemporaryFile() onnx_model = zipfile.ZipFile(buf, 'w') output = tempfile.NamedTemporaryFile() init_net_output = tempfile.NamedTemporaryFile() node_def = helper.make_node( "MatMul", ["X", "W"], ["Y"]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( [node_def], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], b'__EXTERNAL', raw=True)]) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') onnx_model.writestr('__MODEL_PROTO', model_def.SerializeToString()) onnx_model.writestr('W', W.tobytes()) onnx_model.close() W = W.reshape((3, 2)) Y_expect = np.matmul(X, W) c2_model = c2.prepare_zip_archive(buf) Y = c2_model.run(X).Y np.testing.assert_allclose(Y, Y_expect)
def test_tensor_filling_ops(self): for dtype in [ onnx.TensorProto.FLOAT, onnx.TensorProto.DOUBLE, onnx.TensorProto.BOOL, onnx.TensorProto.INT8, onnx.TensorProto.INT16, onnx.TensorProto.INT32, onnx.TensorProto.INT64, onnx.TensorProto.UINT8, onnx.TensorProto.UINT16, onnx.TensorProto.UINT32, ]: shape = (1, 2, 3) vals = np.random.randn(*shape) if dtype != onnx.TensorProto.BOOL: vals *= 5 vals = vals.astype( mapping.TENSOR_TYPE_TO_NP_TYPE[dtype]) tensor = make_tensor( name='test-tensor-{}'.format(dtype), data_type=dtype, dims=[1, 2, 3], vals=vals.flatten().tolist(), ) op = c2.Caffe2Backend._create_tensor_filling_op(tensor) self.assertEqual(len(op.input), 0) self.assertEqual(op.output, [tensor.name]) ws, output = c2_native_run_op(op, inputs=[]) self.assertEqual(len(output), 1) np.testing.assert_almost_equal(output[0], vals) np.testing.assert_almost_equal(ws.FetchBlob(op.output[0]), vals)
def test_initializer(self): X = np.array([[1, 2], [3, 4]]).astype(np.float32) Y = np.array([[1, 2], [3, 4]]).astype(np.float32) weight = np.array([[1, 0], [0, 1]]) graph_def = make_graph( [make_node("Add", ["X", "Y"], ["Z0"]), make_node("Cast", ["Z0"], ["Z"], to="float"), make_node("Mul", ["Z", "weight"], ["W0"]), make_node("Tanh", ["W0"], ["W1"]), make_node("Sigmoid", ["W1"], ["W2"]), make_node("Scale", ["W2"], ["W3"], scale=-1.0)], name="test_initializer", inputs=[ make_tensor_value_info("X", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("Y", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("weight", onnx.TensorProto.FLOAT, (2, 2)), ], outputs=[ make_tensor_value_info("W3", onnx.TensorProto.FLOAT, (2, 2)) ], initializer=[make_tensor("weight", onnx.TensorProto.FLOAT, [2, 2], weight.flatten().astype(float))] ) def sigmoid(x): return 1 / (1 + np.exp(-x)) W_ref = -sigmoid(np.tanh((X + Y) * weight)) c2_rep = c2.prepare(make_model(graph_def, producer_name='caffe2-ref-test')) output = c2_rep.run({"X": X, "Y": Y}) np.testing.assert_almost_equal(output["W3"], W_ref)
def test_onnx_to_caffe2_loop(self): body_nodes = [helper.make_node( "MatMul", ["_X", "W"], ["_Y"])] nodes = self._make_fake_loop_op(body_nodes, [(TensorProto.FLOAT, (2, 2), "X")], [(TensorProto.FLOAT, (2, 2), "Y")]) X = np.random.rand(2, 2).astype(np.float32) W = np.random.rand(2, 2).flatten().astype(np.float32) graph_def = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 2)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (2, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [2, 2], W.tolist())] ) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') Y = X for _ in range(10): Y = np.matmul(Y, W.reshape(2, 2)) p = c2.prepare(model_def) out = p.run(X) np.testing.assert_allclose(out.Y, Y)
def test_extract_constant_to_initializer(self): # type: () -> None conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) constant = helper.make_node("Constant", [], ["A"], value=helper.make_tensor( name="bias", data_type=TensorProto.FLOAT, dims=(16,), vals=np.random.randn(16).astype(np.float32).tolist())) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( [conv, constant, add], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], [helper.make_tensor_value_info("B", TensorProto.FLOAT, (1, 16, 3, 3))], ) optimized_model = self._optimized(graph, ["extract_constant_to_initializer"]) self.assertEqual( set(vi.name for vi in optimized_model.graph.input), {'X', 'Y', 'A'}) self.assertEqual(len(optimized_model.graph.initializer), 1) init = optimized_model.graph.initializer[0] self.assertEqual(init.name, 'A') self.assertEqual(init.dims, [16]) self.assertEqual(init.data_type, TensorProto.FLOAT) self.assertEqual([n.op_type for n in optimized_model.graph.node], ['Conv', 'Add'])
def caffe2_init_net_to_initializer(cls, init_net): initializer = [] for op in init_net.op: assert not op.input try: data_type, field_name = { 'GivenTensorFill': (TensorProto.FLOAT, 'floats'), 'GivenTensorInt64Fill': (TensorProto.INT64, 'ints'), 'GivenTensorIntFill': (TensorProto.INT32, 'ints'), 'GivenTensorBoolFill': (TensorProto.BOOL, 'ints'), 'GivenTensorStringFill': (TensorProto.STRING, 'strings'), }[op.type] except KeyError: raise RuntimeError( "Can not translate init_net with operator '{}' " "to initializer".format(op.type) ) raw = (data_type != TensorProto.STRING) args = {a.name: a for a in op.arg} vals = getattr(args['values'], field_name) if raw: vals = np.asarray( vals, dtype=mapping.TENSOR_TYPE_TO_NP_TYPE[data_type]).tobytes() initializer.append(make_tensor( name=op.output[0], data_type=data_type, dims=args['shape'].ints, vals=vals, raw=raw, )) return initializer
def test_eliminate_unused_initializer_no_eliminate_used(self): # type: () -> None nodes = [helper.make_node("Add", ["X", "A"], ["Z"])] nodes.extend(self._make_fake_loop_op( [helper.make_node("Add", ["_X", "_A"], ["_Z2"])], [(TensorProto.FLOAT, (1, 2), "X"), (TensorProto.FLOAT, (1, 2), "A")], [(TensorProto.FLOAT, (1, 2), "Z2")])) graph = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 2))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], [helper.make_tensor("A", TensorProto.FLOAT, dims=(1, 2), vals=np.random.randn(1, 2).astype(np.float32).tobytes(), raw=True)]) optimized_model = self._optimized(graph, ["eliminate_unused_initializer"]) # Add, Constant (trip count), Constant (cond), Loop assert len(list(optimized_model.graph.node)) == 4 assert optimized_model.graph.node[0].op_type == "Add" assert optimized_model.graph.output[0].name == "Z" # Add assert len(optimized_model.graph.node[3].attribute[0].g.node) == 1 assert optimized_model.graph.node[3].attribute[0].g.node[0].op_type == 'Add' assert optimized_model.graph.node[3].attribute[0].g.output[1].name == '_Z2' assert len(list(optimized_model.graph.initializer)) == 1
def test_fuse_add_bias_into_conv_use_move_constant(self): # type: () -> None conv = helper.make_node("Conv", ["X", "Y"], ["Z"]) constant = helper.make_node("Constant", [], ["A"], value=helper.make_tensor( name="bias", data_type=TensorProto.FLOAT, dims=(16,), vals=np.random.randn(16).astype(np.float32).tolist())) add = helper.make_node("Add", ["Z", "A"], ["B"]) graph = helper.make_graph( [conv, constant, add], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)), helper.make_tensor_value_info("Y", TensorProto.FLOAT, (16, 5, 3, 3))], [helper.make_tensor_value_info("B", TensorProto.FLOAT, (1, 16, 3, 3))], value_info=[ helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 1, 1)), ] ) optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"]) assert len(optimized_model.graph.node) == 3 assert optimized_model.graph.node[0].op_type == 'Constant' assert optimized_model.graph.node[1].op_type == 'Squeeze' assert optimized_model.graph.node[2].op_type == 'Conv' assert optimized_model.graph.output[0].name == 'Z' assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT assert len(optimized_model.graph.output[0].type.tensor_type.shape.dim) == 4
def _sample_float_tensor(self): # type: () -> TensorProto np_array = np.random.randn(2, 3).astype(np.float32) return helper.make_tensor( name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tolist() )
def _simple_tensor(self): # type: () -> TensorProto # Create a TensorProto. tensor = helper.make_tensor( name='test-tensor', data_type=TensorProto.FLOAT, dims=(2, 3, 4), vals=[x + 0.5 for x in range(24)] ) return tensor
def test_attr_repeated_tensor_proto(self): # type: () -> None tensors = [ helper.make_tensor( name='a', data_type=TensorProto.FLOAT, dims=(1,), vals=np.ones(1).tolist() ), helper.make_tensor( name='b', data_type=TensorProto.FLOAT, dims=(1,), vals=np.ones(1).tolist() )] attr = helper.make_attribute("tensors", tensors) self.assertEqual(attr.name, "tensors") self.assertEqual(list(attr.tensors), tensors) checker.check_attribute(attr)
def test_eliminate_unused_initializer_no_eliminate_used_default(self): # type: () -> None add = helper.make_node("Add", ["X", "A"], ["Z"]) graph = helper.make_graph( [add], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 2)), helper.make_tensor_value_info("A", TensorProto.FLOAT, (1, 2))], [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (1, 2))], [helper.make_tensor("A", TensorProto.FLOAT, dims=(1, 2), vals=np.random.randn(1, 2).astype(np.float32).tobytes(), raw=True)]) optimized_model = self._optimized(graph, ["eliminate_unused_initializer"]) assert len(list(optimized_model.graph.initializer)) == 1
def _make_fake_loop_op(self, body_nodes, input_types, output_types): ten = helper.make_tensor("trip_count_value", TensorProto.INT64, (1,), [10]) true = helper.make_tensor("condition", TensorProto.BOOL, (1,), [True]) # lcd is a dummy loop-carried dependency that only exists because # right now the schema checker is broken and assumes a variadic # input needs at least one value. graph_inputs = [helper.make_tensor_value_info("i", TensorProto.INT32, ()), helper.make_tensor_value_info("cond", TensorProto.BOOL, ())] for type, shape, name in input_types: graph_inputs.append(helper.make_tensor_value_info("_" + name, type, shape)) graph_outputs = [helper.make_tensor_value_info("cond", TensorProto.BOOL, ())] for type, shape, name in output_types: graph_outputs.append(helper.make_tensor_value_info("_" + name, type, shape)) body_graph = helper.make_graph(body_nodes, "body_graph", graph_inputs, graph_outputs) loop_inputs = ["trip_count", "condition"] loop_inputs.extend([name for _, _, name in input_types]) loop_outputs = [name for _, _, name in output_types] retval_nodes = [ helper.make_node("Constant", [], ["trip_count"], value=ten), helper.make_node("Constant", [], ["condition"], value=true), helper.make_node("Loop", loop_inputs, loop_outputs, body=body_graph) ] return retval_nodes
def _make_fake_if_op(self, true_nodes, false_nodes, output_types): true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) true_graph = helper.make_graph(true_nodes, "true_graph", [], [ helper.make_tensor_value_info("_Y", TensorProto.FLOAT, (2, 2)), ]) false_graph = helper.make_graph(false_nodes, "false_graph", [], [ helper.make_tensor_value_info("_Y", TensorProto.FLOAT, (2, 2)), ]) if_inputs = ["condition"] if_outputs = [name for _, _, name in output_types] retval_nodes = [ helper.make_node("Constant", [], ["condition"], value=true), helper.make_node("If", if_inputs, if_outputs, then_branch=true_graph, else_branch=false_graph) ] return retval_nodes
def _make_fake_if_op(self, true_nodes, # type: Sequence[NodeProto] false_nodes, # type: Sequence[NodeProto] output_types # type: Sequence[Tuple[TensorProto.DataType, Sequence[int], Text]] ): # type: (...) -> List[NodeProto] true = helper.make_tensor("condition", TensorProto.BOOL, (), [True]) true_graph = helper.make_graph(true_nodes, "true_graph", [], []) false_graph = helper.make_graph(false_nodes, "false_graph", [], []) if_inputs = ["condition"] if_outputs = [name for _, _, name in output_types] retval_nodes = [ helper.make_node("Constant", [], ["condition"], value=true), helper.make_node("If", if_inputs, if_outputs, then_branch=true_graph, else_branch=false_graph) ] return retval_nodes
def test_fuse_bn_into_conv_simple(self): # type: () -> None for (tensor_type, np_type) in [(TensorProto.FLOAT, np.float32), (TensorProto.DOUBLE, np.float64)]: conv = helper.make_node("Conv", ["X", "W", "B"], ["Y"]) bn = helper.make_node("BatchNormalization", ["Y", "scale", "b", "mean", "var"], ["Z"]) W = np.random.randn(3, 2, 5, 5).astype(np_type) + 2 B = np.random.randn(3,).astype(np_type) + 2 scale = np.random.randn(3,).astype(np_type) + 2 b = np.random.randn(3,).astype(np_type) + 2 mean = np.random.randn(3,).astype(np_type) + 2 var = np.abs(np.random.randn(3,).astype(np_type)) + 2 initializers = [ helper.make_tensor(name, tensor_type, npa.shape, npa.tobytes(), raw=True) for name, npa in [('W', W), ('B', B), ('scale', scale), ('b', b), ('mean', mean), ('var', var)] ] graph = helper.make_graph( [conv, bn], "test", [helper.make_tensor_value_info("X", tensor_type, (5, 2, 28, 28)), helper.make_tensor_value_info("W", tensor_type, (3, 2, 5, 5)), helper.make_tensor_value_info("B", tensor_type, (3,)), helper.make_tensor_value_info("scale", tensor_type, (3,)), helper.make_tensor_value_info("b", tensor_type, (3,)), helper.make_tensor_value_info("mean", tensor_type, (3,)), helper.make_tensor_value_info("var", tensor_type, (3,))], [helper.make_tensor_value_info("Z", tensor_type, (3,))], initializer=initializers, value_info=[ helper.make_tensor_value_info("Y", tensor_type, (3,)) ] ) optimized_model = self._optimized(graph, ["fuse_bn_into_conv"]) self.assertEqual(len(optimized_model.graph.node), 1) self.assertEqual(optimized_model.graph.node[0].op_type, 'Conv') self.assertEqual(len(optimized_model.graph.initializer), 2) new_W = numpy_helper.to_array(optimized_model.graph.initializer[0]) new_b = numpy_helper.to_array(optimized_model.graph.initializer[1]) f = scale / np.sqrt(var + 1e-5) np.testing.assert_almost_equal((B - mean) * f + b, new_b) np.testing.assert_almost_equal(W * f[:, np.newaxis, np.newaxis, np.newaxis], new_W)
def test_model_with_initializer(self): X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [3, 1]) Z2 = helper.make_tensor_value_info('Z2', TensorProto.FLOAT, [2, 3, 6]) expand_node_def = helper.make_node('Expand', ['X', 'Y'], ['Z1']) cast_node_def = helper.make_node('Scale', ['Z1'], ['Z2']) graph_def = helper.make_graph([expand_node_def, cast_node_def], "test-node", [X], [Z2], initializer=[ helper.make_tensor('Y', TensorProto.INT64, (3,), (2, 1, 6))]) onnx_model = helper.make_model(graph_def, producer_name='onnx-example') model = Model() model.BuildFromOnnxModel(onnx_model) input_data = np.random.rand(3, 1) outputs = model.run([input_data]) expected = input_data * np.ones([2, 1, 6], dtype=np.float32) np.testing.assert_allclose(expected, outputs[0])
def test_tensors_rank_zero(self): X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [3, 2]) S1 = helper.make_tensor_value_info('S1', TensorProto.INT64, []) S2 = helper.make_tensor_value_info('S2', TensorProto.FLOAT, []) size_node = helper.make_node('Size', ['X'], ['S1']) graph_def = helper.make_graph([size_node], "rank_zero_test", [X], [S1, S2], initializer=[ helper.make_tensor('S2', TensorProto.FLOAT, (), (3.14,))]) onnx_model = helper.make_model(graph_def, producer_name='onnx-example') model = Model() model.BuildFromOnnxModel(onnx_model) input_data = np.random.rand(3, 2) outputs = model.run([input_data]) self.assertEqual(6, outputs[0]) self.assertAlmostEqual(3.14, outputs[1])
def test_onnx_to_caffe2(self): onnx_model = tempfile.NamedTemporaryFile() output = tempfile.NamedTemporaryFile() init_net_output = tempfile.NamedTemporaryFile() node_def = helper.make_node( "Mul", ["X", "W"], ["Y"]) graph_def = helper.make_graph( [node_def], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], np.zeros((3, 2)).flatten().astype(float))]) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') onnx_model.write(model_def.SerializeToString()) onnx_model.flush() result = self._run_command( onnx_to_caffe2, [ onnx_model.name, '--output', output.name, '--init-net-output', init_net_output.name, ]) caffe2_net = caffe2_pb2.NetDef() caffe2_net.ParseFromString(output.read()) self.assertEqual(len(caffe2_net.op), 1) self.assertEqual(caffe2_net.op[0].type, 'Mul') caffe2_init_net = caffe2_pb2.NetDef() caffe2_init_net.ParseFromString(init_net_output.read()) self.assertEqual(len(caffe2_init_net.op), 1) self.assertEqual(set(sum([list(init_op.output) for init_op in caffe2_init_net.op], [])), {'W'})
def make_graph(node, inputs): """ Created ONNX GraphProto from node""" initializer = [] tensor_input_info = [] tensor_output_info = [] # Adding input tensor info. for index in range(len(node.input)): tensor_input_info.append( helper.make_tensor_value_info(str(node.input[index]), TensorProto.FLOAT, [1])) # Creating an initializer for Weight params. # Assumes that weight params is named as 'W'. if node.input[index] == 'W': dim = inputs[index].shape param_tensor = helper.make_tensor( name=node.input[index], data_type=TensorProto.FLOAT, dims=dim, vals=inputs[index].flatten()) initializer.append(param_tensor) # Adding output tensor info. for index in range(len(node.output)): tensor_output_info.append( helper.make_tensor_value_info(str(node.output[index]), TensorProto.FLOAT, [1])) # creating graph proto object. graph_proto = helper.make_graph( [node], "test", tensor_input_info, tensor_output_info, initializer=initializer) return graph_proto
def test_onnx_to_caffe2_if(self): true_nodes = [helper.make_node( "MatMul", ["X", "W"], ["_Y"])] false_nodes = [helper.make_node("Slice", ["X"], ["_Y"], axes=[0, 1], starts=[0, 0], ends=[0, 2])] nodes = self._make_fake_if_op(true_nodes, false_nodes, [(TensorProto.FLOAT, (2, 2), "Y")]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], W.tolist())] ) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') p = c2.prepare(model_def) Y = np.matmul(X, W.reshape(3, 2)) out = p.run(X) np.testing.assert_allclose(out.Y, Y)
def create_gpt2_attention(hidden_size=64, num_heads=4, max_seq_len=32, switch_add_inputs=False): # unsqueeze in opset version 13 has two inputs (axis is moved from attribute to input). is_opset_13_or_newer = version.parse( onnx.__version__) >= version.parse("1.8.0") # nodes in attention subgraph nodes = [ helper.make_node("Add", ["input_1", "input_2"], ["layernorm_input"], "add_layernorm"), helper.make_node( "LayerNormalization", ["layernorm_input", "layer_norm_weight", "layer_norm_bias"], ["layernorm_out"], "layernorm", epsion=0.000009999999747378752, ), # fully connection nodes helper.make_node( "MatMul", ["layernorm_out", "matmul_fc_weight"], ["matmul_fc_out"], "matmul_fc", ), helper.make_node( "Add", reverse_if(["matmul_fc_out", "add_fc_weight"], switch_add_inputs), ["fc_out"], "add_fc", ), helper.make_node("Split", ["fc_out", "split_q_k_v"], ["q", "k", "v"], "split_qkv", axis=2) if is_opset_13_or_newer else helper.make_node( "Split", ["fc_out"], ["q", "k", "v"], "split_qkv", axis=2, split=[hidden_size, hidden_size, hidden_size], ), # q nodes helper.make_node("Reshape", ["q", "reshape_x_shape"], ["reshape_q_out"], "reshape_q"), helper.make_node( "Transpose", ["reshape_q_out"], ["transpose_q_out"], "transpose_q", perm=[0, 2, 1, 3], ), # k nodes helper.make_node("Reshape", ["k", "reshape_x_shape"], ["reshape_k_out"], "reshape_k"), helper.make_node( "Transpose", ["reshape_k_out"], ["transpose_k_out"], "transpose_k", perm=[0, 2, 1, 3], ), # v nodes helper.make_node("Reshape", ["v", "reshape_x_shape"], ["reshape_v_out"], "reshape_v"), helper.make_node( "Transpose", ["reshape_v_out"], ["transpose_v_out"], "transpose_v", perm=[0, 2, 1, 3], ), # past helper.make_node("Split", ["past", "split_1_1"], ["split_k", "split_v"], "split_past", axis=0) if is_opset_13_or_newer else helper.make_node( "Split", ["past"], ["split_k", "split_v"], "split_past", axis=0, split=[1, 1], ), helper.make_node("Squeeze", ["split_k", "axes_0"], ["past_k"], "squeeze_past_k") if is_opset_13_or_newer else helper.make_node( "Squeeze", ["split_k"], ["past_k"], "squeeze_past_k", axes=[0]), helper.make_node( "Concat", ["past_k", "transpose_k_out"], ["concat_k_out"], "concat_k", axis=-2, ), helper.make_node( "Transpose", ["concat_k_out"], ["concat_k_transpose_out"], "transpose_concat_k", perm=[0, 1, 3, 2], ), helper.make_node("Squeeze", ["split_v", "axes_0"], ["past_v"], "squeeze_past_v") if is_opset_13_or_newer else helper.make_node( "Squeeze", ["split_v"], ["past_v"], "squeeze_past_v", axes=[0]), helper.make_node( "Concat", ["past_v", "transpose_v_out"], ["concat_v_out"], "concat_v", axis=-2, ), # present helper.make_node( "Unsqueeze", ["concat_k_out", "axes_0"], ["concat_k_unsqueeze_out"], "concat_k_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["concat_k_out"], ["concat_k_unsqueeze_out"], "concat_k_unsqueeze", axes=[0], ), helper.make_node( "Unsqueeze", ["concat_v_out", "axes_0"], ["concat_v_unsqueeze_out"], "concat_v_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["concat_v_out"], ["concat_v_unsqueeze_out"], "concat_v_unsqueeze", axes=[0], ), helper.make_node( "Concat", ["concat_k_unsqueeze_out", "concat_v_unsqueeze_out"], ["present"], "concat_present", axis=0, ), helper.make_node("Shape", ["transpose_q_out"], ["transpose_q_shape_out"], "transpose_q_shape"), helper.make_node( "Slice", ["transpose_q_shape_out", "starts_n2", "ends_n1", "axes_0"], ["transpose_q_shape_slice_out"], "transpose_q_shape_slice", ), helper.make_node( "Squeeze", ["transpose_q_shape_slice_out", "axes_0"], ["transpose_q_shape_slice_squeeze_out"], "transpose_q_shape_slice_squeeze", ) if is_opset_13_or_newer else helper.make_node( "Squeeze", ["transpose_q_shape_slice_out"], ["transpose_q_shape_slice_squeeze_out"], "transpose_q_shape_slice_squeeze", axes=[0], ), helper.make_node("Shape", ["concat_k_out"], ["concat_k_shape_out"], "concat_k_shape"), helper.make_node( "Slice", ["concat_k_shape_out", "starts_n2", "ends_n1", "axes_0"], ["concat_k_shape_slice_out"], "concat_k_shape_slice", ), helper.make_node( "Squeeze", ["concat_k_shape_slice_out", "axes_0"], ["concat_k_shape_slice_squeeze_out"], "concat_k_shape_slice_squeeze", ) if is_opset_13_or_newer else helper.make_node( "Squeeze", ["concat_k_shape_slice_out"], ["concat_k_shape_slice_squeeze_out"], "concat_k_shape_slice_squeeze", axes=[0], ), helper.make_node( "Sub", [ "concat_k_shape_slice_squeeze_out", "transpose_q_shape_slice_squeeze_out" ], ["sub_out"], "sub", ), helper.make_node("Unsqueeze", ["sub_out", "axes_0"], ["sub_unsqueeze_out"], "sub_unsqueeze") if is_opset_13_or_newer else helper.make_node("Unsqueeze", ["sub_out"], ["sub_unsqueeze_out"], "sub_unsqueeze", axes=[0]), helper.make_node( "Unsqueeze", ["concat_k_shape_slice_squeeze_out", "axes_0"], ["concat_k_shape_slice_squeeze_unsqueeze_out"], "concat_k_shape_slice_squeeze_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["concat_k_shape_slice_squeeze_out"], ["concat_k_shape_slice_squeeze_unsqueeze_out"], "concat_k_shape_slice_squeeze_unsqueeze", axes=[0], ), helper.make_node( "Slice", [ "undir_mask", "sub_unsqueeze_out", "concat_k_shape_slice_squeeze_unsqueeze_out", "axes_2", "steps_1", ], ["undir_mask_slice_out"], "undir_mask_slice", ), helper.make_node( "Slice", [ "undir_mask_slice_out", "starts_0", "concat_k_shape_slice_squeeze_unsqueeze_out", "axes_3", "steps_1", ], ["mask_slice_slice_out"], "mask_slice_slice", ), helper.make_node( "Cast", ["mask_slice_slice_out"], ["undir_mask_out"], "undir_mask_cast", to=9, ), # mask nodes helper.make_node( "Reshape", ["input_mask", "input_mask_shape"], ["input_mask_reshape_out"], "input_mask_reshape", ), helper.make_node( "Unsqueeze", ["input_mask_reshape_out", "axes_1"], ["unsqueeze0_out"], "unsqueeze0", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["input_mask_reshape_out"], ["unsqueeze0_out"], "unsqueeze0", axes=[1], ), helper.make_node("Unsqueeze", ["unsqueeze0_out", "axes_2"], ["unsqueeze1_out"], "unsqueeze1") if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["unsqueeze0_out"], ["unsqueeze1_out"], "unsqueeze1", axes=[2]), helper.make_node("Sub", ["sub_weight", "unsqueeze1_out"], ["mask_sub_out"], "sub_mask"), helper.make_node("Mul", ["mask_sub_out", "mul_weight"], ["mul_mask_out"], "mul_mask"), # qk nodes helper.make_node( "MatMul", ["transpose_q_out", "concat_k_transpose_out"], ["qk_out"], "matmul_qk", ), helper.make_node("Div", ["qk_out", "div_weight"], ["qk_norm_out"], "qk_norm"), helper.make_node( "Where", ["undir_mask_out", "qk_norm_out", "where_weight"], ["where_out"], "where", ), helper.make_node( "Add", reverse_if(["where_out", "mul_mask_out"], switch_add_inputs), ["add_mask_out"], "add_mask", ), helper.make_node("Softmax", ["add_mask_out"], ["softmax_out"], "softmax", axis=3), # qkv nodes helper.make_node( "MatMul", ["softmax_out", "concat_v_out"], ["matmul_qkv_1_out"], "matmul_qk_v", ), helper.make_node( "Transpose", ["matmul_qkv_1_out"], ["transpose_qkv_out"], "transpose_qkv", perm=[0, 2, 1, 3], ), helper.make_node( "Reshape", ["transpose_qkv_out", "reshape_weight_qkv"], ["reshape_qkv_out"], "reshape_qkv", ), helper.make_node("Shape", ["reshape_qkv_out"], ["qkv_shape"], "shape_qkv"), helper.make_node( "Slice", ["qkv_shape", "starts_n1", "ends_inf", "axes_0"], ["qkv_shape_slice_out"], "qkv_shape_slice", ), helper.make_node( "Squeeze", ["qkv_shape_slice_out", "axes_0"], ["qkv_shape_slice_squeeze_out"], "qkv_shape_slice_squeeze", ) if is_opset_13_or_newer else helper.make_node( "Squeeze", ["qkv_shape_slice_out"], ["qkv_shape_slice_squeeze_out"], "qkv_shape_slice_squeeze", axes=[0], ), helper.make_node( "Unsqueeze", ["qkv_shape_slice_squeeze_out", "axes_0"], ["qkv_shape_slice_squeeze_unsqueeze_out"], "qkv_shape_slice_squeeze_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["qkv_shape_slice_squeeze_out"], ["qkv_shape_slice_squeeze_unsqueeze_out"], "qkv_shape_slice_squeeze_unsqueeze", axes=[0], ), helper.make_node( "Concat", ["concat_n1", "qkv_shape_slice_squeeze_unsqueeze_out"], ["qkv_shape_slice_squeeze_unsqueeze_concat_out"], "qkv_shape_slice_squeeze_unsqueeze_concat", axis=0, ), helper.make_node( "Reshape", [ "reshape_qkv_out", "qkv_shape_slice_squeeze_unsqueeze_concat_out" ], ["qkv_reshape_out"], "qkv_reshape", ), helper.make_node( "Gemm", ["qkv_reshape_out", "gemm_weight", "gemm_bias"], ["gemm_out"], "gemm", alpha=1.0, beta=1.0, transA=0, transB=0, ), helper.make_node( "Gather", ["qkv_shape", "indices_1"], ["qkv_shape_1"], "shape_qkv_gather_1", axis=0, ), helper.make_node( "Gather", ["qkv_shape", "indices_0"], ["qkv_shape_0"], "shape_qkv_gather_0", axis=0, ), helper.make_node( "Unsqueeze", ["qkv_shape_1", "axes_0"], ["qkv_shape_1_unsqueeze_out"], "qkv_shape_1_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["qkv_shape_1"], ["qkv_shape_1_unsqueeze_out"], "qkv_shape_1_unsqueeze", axes=[0], ), helper.make_node( "Unsqueeze", ["qkv_shape_0", "axes_0"], ["qkv_shape_0_unsqueeze_out"], "qkv_shape_0_unsqueeze", ) if is_opset_13_or_newer else helper.make_node( "Unsqueeze", ["qkv_shape_0"], ["qkv_shape_0_unsqueeze_out"], "qkv_shape_0_unsqueeze", axes=[0], ), helper.make_node( "Concat", [ "qkv_shape_0_unsqueeze_out", "qkv_shape_1_unsqueeze_out", "qkv_hidden" ], ["shape_qkv_concat_out"], "shape_qkv_concat", axis=0, ), helper.make_node( "Reshape", ["gemm_out", "shape_qkv_concat_out"], ["gemm_reshape_out"], "gemm_reshape", ), helper.make_node( "Add", reverse_if(["gemm_reshape_out", "layernorm_input"], switch_add_inputs), ["skip_output"], "add_skip", ), helper.make_node( "LayerNormalization", ["skip_output", "layer_norm_weight", "layer_norm_bias"], ["output"], "layernorm2", epsion=0.000009999999747378752, ), ] head_size = int(hidden_size // num_heads) unidir_mask = (numpy.tril(numpy.ones( (max_seq_len, max_seq_len))).reshape([max_seq_len * max_seq_len ]).astype(numpy.uint8)) initializers = [ # initializers float_tensor("layer_norm_weight", [hidden_size]), float_tensor("layer_norm_bias", [hidden_size]), float_tensor("matmul_fc_weight", [hidden_size, 3 * hidden_size]), float_tensor("add_fc_weight", [3 * hidden_size]), float_tensor("gemm_weight", [hidden_size, hidden_size]), float_tensor("gemm_bias", [hidden_size]), helper.make_tensor( "undir_mask", TensorProto.UINT8, [1, 1, max_seq_len, max_seq_len], unidir_mask.tolist(), ), helper.make_tensor("div_weight", TensorProto.FLOAT, [], [math.sqrt(head_size)]), helper.make_tensor("sub_weight", TensorProto.FLOAT, [], [1.0]), helper.make_tensor("where_weight", TensorProto.FLOAT, [], [-10000.0]), helper.make_tensor("mul_weight", TensorProto.FLOAT, [], [-10000]), helper.make_tensor("input_mask_shape", TensorProto.INT64, [2], [0, -1]), helper.make_tensor("starts_0", TensorProto.INT64, [1], [0]), helper.make_tensor("concat_n1", TensorProto.INT64, [1], [-1]), helper.make_tensor("starts_n1", TensorProto.INT64, [1], [-1]), helper.make_tensor("ends_inf", TensorProto.INT64, [1], [9223372036854775807]), helper.make_tensor("starts_n2", TensorProto.INT64, [1], [-2]), helper.make_tensor("ends_n1", TensorProto.INT64, [1], [-1]), helper.make_tensor("axes_0", TensorProto.INT64, [1], [0]), helper.make_tensor("axes_2", TensorProto.INT64, [1], [2]), helper.make_tensor("axes_3", TensorProto.INT64, [1], [3]), helper.make_tensor("steps_1", TensorProto.INT64, [1], [1]), helper.make_tensor("indices_0", TensorProto.INT64, [], [0]), helper.make_tensor("indices_1", TensorProto.INT64, [], [1]), helper.make_tensor("qkv_hidden", TensorProto.INT64, [1], [hidden_size]), helper.make_tensor("reshape_x_shape", TensorProto.INT64, [4], [0, 0, num_heads, head_size]), helper.make_tensor("reshape_weight_qkv", TensorProto.INT64, [3], [0, 0, hidden_size]), ] if is_opset_13_or_newer: initializers.append( helper.make_tensor("split_1_1", TensorProto.INT64, [2], [1, 1])) initializers.append( helper.make_tensor( "split_q_k_v", TensorProto.INT64, [3], [hidden_size, hidden_size, hidden_size], )) initializers.append( helper.make_tensor("axes_1", TensorProto.INT64, [1], [1])) batch_size = 1 sequence_length = 3 past_sequence_length = 2 graph = helper.make_graph( [node for node in nodes if node], "GPT2", # name [ # inputs helper.make_tensor_value_info( "input_1", TensorProto.FLOAT, ["batch_size", "sequence_length", hidden_size], ), helper.make_tensor_value_info( "input_2", TensorProto.FLOAT, ["batch_size", "sequence_length", hidden_size], ), helper.make_tensor_value_info( "input_mask", TensorProto.FLOAT, ["batch_size", "past_sequence_length + sequence_length"], ), helper.make_tensor_value_info( "past", TensorProto.FLOAT, [ 2, "batch_size", num_heads, "past_sequence_length", head_size ], ), ], [ # outputs helper.make_tensor_value_info( "output", TensorProto.FLOAT, ["batch_size", "sequence_length", hidden_size], ), helper.make_tensor_value_info( "present", TensorProto.FLOAT, [ 2, "batch_size", num_heads, "past_sequence_length + sequence_length", head_size, ], ), ], initializers, ) model = helper.make_model(graph) return model
def version_9(cls, ctx, node, **kwargs): node_inputs = node.input num_segments_specified = False if node.type.endswith("WithNumSegments") or node.type.startswith( "Unsorted"): num_segments_specified = True num_segments = node_inputs.pop() node.type = node.type.replace("WithNumSegments", "") node.type = node.type.replace("Unsorted", "") if node.type.startswith("Sparse"): data_inp, indices_inp, segment_inp = node_inputs gather_node = ctx.make_node("Gather", [data_inp, indices_inp], attr={'axis': 0}) data_inp = gather_node.output[0] node.type = node.type.replace("Sparse", "") else: data_inp, segment_inp = node_inputs # Data has shape [n, a, b, ..., c] data_shape = ctx.get_shape(data_inp) data_rank = len(data_shape) if data_shape is not None else None data_dtype = ctx.get_dtype(data_inp) data_np_dtype = utils.map_onnx_to_numpy_type(data_dtype) seg_np_dtype = utils.map_onnx_to_numpy_type(ctx.get_dtype(segment_inp)) if num_segments_specified and ctx.get_dtype( segment_inp) != ctx.get_dtype(num_segments): num_segments = ctx.make_node("Cast", [num_segments], attr={ "to": ctx.get_dtype(segment_inp) }).output[0] data_is_float = np.dtype(data_np_dtype).kind == 'f' data_is_int = np.dtype(data_np_dtype).kind == 'i' utils.make_sure(data_is_float or data_is_int, "dtype for Segment ops must be float or int") if node.type in ["SegmentSum", "SegmentMean", "SegmentSqrtN"]: onnx_op = "ReduceSum" identity_value = np.array(0, dtype=data_np_dtype) elif node.type == "SegmentProd": onnx_op = "ReduceProd" identity_value = np.array(1, dtype=data_np_dtype) elif node.type == "SegmentMax": onnx_op = "ReduceMax" if data_is_float: identity_value = np.array('-inf', dtype=data_np_dtype) else: identity_value = np.iinfo(data_np_dtype).min elif node.type == "SegmentMin": onnx_op = "ReduceMin" if data_is_float: identity_value = np.array('inf', dtype=data_np_dtype) else: identity_value = np.iinfo(data_np_dtype).max if not num_segments_specified: max_segment = ctx.make_node("ReduceMax", [segment_inp], attr={ 'axes': [0], 'keepdims': 0 }) one_const = ctx.make_const(utils.make_name("const_one"), np.array(1, dtype=seg_np_dtype)) num_segments = ctx.make_node( "Add", [max_segment.output[0], one_const.output[0]]).output[0] # ORT doesn't support bool for OneHot so we use float32 and cast to bool onehot_values = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1], dtype=np.float32)) # one_hot_node has shape [s, n] (s is # segments) one_hot_node = ctx.make_node( "OneHot", [segment_inp, num_segments, onehot_values.output[0]], attr={'axis': 0}) if node.type == "SegmentMean": scaling_node_output = GraphBuilder(ctx).make_reduce_sum({ "data": one_hot_node.output[0], "axes": [1], "keepdims": 0, "noop_with_empty_axes": 1 }) elif node.type == "SegmentSqrtN": seg_cnts_node_output = GraphBuilder(ctx).make_reduce_sum({ "data": one_hot_node.output[0], "axes": [1], "keepdims": 0, "noop_with_empty_axes": 1 }) scaling_node_output = ctx.make_node( "Sqrt", [seg_cnts_node_output]).output[0] else: scaling_node_output = None if scaling_node_output is not None and num_segments_specified: # If empty segments are possible, we must avoid division by zero const_one_float = ctx.make_const( utils.make_name("const_one_float"), np.array(1, dtype=np.float32)) scaling_node_output = ctx.make_node( "Max", [scaling_node_output, const_one_float.output[0]]).output[0] if onnx_op == "ReduceSum": # If the op is a summation, we can use MatMul instead of Where, which is faster # Data shape is [n, a, b, ..., c] data_shape_node = ctx.make_node("Shape", [data_inp]) new_shape = ctx.make_const(utils.make_name("reshape_const"), np.array([0, -1], dtype=np.int64)) # Reshape the data from [n, a, b, ..., c] to [n, P] data_reshape = ctx.make_node("Reshape", [data_inp, new_shape.output[0]]) one_hot_cast = one_hot_node if data_dtype != onnx_pb.TensorProto.FLOAT: one_hot_cast = ctx.make_node("Cast", [one_hot_node.output[0]], attr={'to': data_dtype}) # Shapes [s, n] * [n, P] => [s, P] product = ctx.make_node( "MatMul", [one_hot_cast.output[0], data_reshape.output[0]], op_name_scope=node.name) if scaling_node_output is not None: scaling_node_unsqueeze = ctx.make_node("Unsqueeze", [scaling_node_output], attr={'axes': [1]}) product = ctx.make_node( "Div", [product.output[0], scaling_node_unsqueeze.output[0]]) # Create new shape [0, a, b, ..., c] max_int64 = int(utils.get_max_value(np.int64)) new_shape_slice = GraphBuilder(ctx).make_slice({ "data": data_shape_node.output[0], "ends": [max_int64], "starts": [1], "axes": [0] }) zero_const = ctx.make_const(utils.make_name("zero_const"), np.array([0], dtype=np.int64)) new_shape = ctx.make_node("Concat", [zero_const.output[0], new_shape_slice], attr={'axis': 0}) shapes = node.output_shapes dtypes = node.output_dtypes ctx.remove_node(node.name) # Reshape result from [s, P] to [s, a, b, ..., c] ctx.make_node("Reshape", [product.output[0], new_shape.output[0]], name=node.name, outputs=node.output, shapes=shapes, dtypes=dtypes) return identity_const = ctx.make_const(utils.make_name("const_identity"), identity_value) one_hot_bool = ctx.make_node("Cast", [one_hot_node.output[0]], attr={"to": onnx_pb.TensorProto.BOOL}) one_hot_unsqueeze = one_hot_bool # Make one_hot_unsqueeze have shape [s, n, 1, 1, ..., 1] if data_rank is None: # Unsqueeze requires known rank, but we can use Reshape if rank is unknown shape_node = ctx.make_node("Shape", [data_inp]) rank_node = ctx.make_node("Shape", [shape_node.output[0]]) one_const_int64 = ctx.make_const(utils.make_name("const_one"), np.array([1], dtype=np.int64)) num_unsqueeze_dims = ctx.make_node( "Sub", [rank_node.output[0], one_const_int64.output[0]]) one_tensor = helper.make_tensor("value", onnx_pb.TensorProto.INT64, dims=[1], vals=[1]) unsqueeze_dims = ctx.make_node( "ConstantOfShape", inputs=[num_unsqueeze_dims.output[0]], attr={"value": one_tensor}) # Zero indicates a dimension should be unchanged double_zero_const = ctx.make_const( utils.make_name("double_zero"), np.array([0, 0], dtype=np.int64)) expanded_shape = ctx.make_node( "Concat", [double_zero_const.output[0], unsqueeze_dims.output[0]], attr={'axis': 0}) one_hot_unsqueeze = ctx.make_node( "Reshape", [one_hot_bool.output[0], expanded_shape.output[0]]) elif data_rank > 1: new_dims = list(range(2, 2 + data_rank - 1)) one_hot_unsqueeze = ctx.make_node("Unsqueeze", [one_hot_bool.output[0]], attr={'axes': new_dims}) # Shape of data: [n, a, b, ..., c] # Shape of one_hot: [s, n, 1, 1, ..., 1] # Broadcast left-pads shape with 1s, so result is shape: [s, n, a, b, ..., c] where_node = ctx.make_node( "Where", [one_hot_unsqueeze.output[0], data_inp, identity_const.output[0]]) shapes = node.output_shapes dtypes = node.output_dtypes ctx.remove_node(node.name) # After reduction over axis 1, shape is: [s, a, b, ..., c] ctx.make_node(onnx_op, [where_node.output[0]], attr={ 'axes': [1], 'keepdims': 0 }, name=node.name, outputs=node.output, shapes=shapes, dtypes=dtypes)
def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): # Sometimes we can not fuse skiplayernormalization since the add before layernorm has an output that used by nodes outside skiplayernorm # Conceptually we treat add before layernorm as skiplayernorm node since they share the same pattern start_node = normalize_node if normalize_node.op_type == 'LayerNormalization': add_before_layernorm = self.model.match_parent(normalize_node, 'Add', 0) if add_before_layernorm is not None: start_node = add_before_layernorm else: return # SkipLayerNormalization has two inputs, and one of them is the root input for attention. qkv_nodes = self.model.match_parent_path(start_node, ['Add', 'MatMul', 'Reshape', 'Transpose', 'MatMul'], [None, None, 0, 0, 0]) einsum_node = None if qkv_nodes is not None: (_, matmul_qkv, reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes else: # Match Albert qkv_nodes = self.model.match_parent_path(start_node, ['Add', 'Einsum', 'Transpose', 'MatMul'], [1, None, 0, 0]) if qkv_nodes is not None: (_, einsum_node, transpose_qkv, matmul_qkv) = qkv_nodes else: return other_inputs = [] for i, input in enumerate(start_node.input): if input not in output_name_to_node: continue if input == qkv_nodes[0].output[0]: continue other_inputs.append(input) if len(other_inputs) != 1: return root_input = other_inputs[0] """ Match flaubert Mask | Mul --> LayerNormalization --> Attention --> MatMul --> Add | | | | +--------------------------------------------------------- """ mul_before_layernorm = self.model.match_parent(start_node, 'Mul', 0) if mul_before_layernorm is not None: mul_children = input_name_to_nodes[mul_before_layernorm.output[0]] if mul_children is not None and len(mul_children) == 2: layernorm_node = mul_children[1] if layernorm_node.op_type == 'LayerNormalization': root_input = layernorm_node.output[0] else: return elif mul_children is not None and len(mul_children) == 5: root_input = mul_before_layernorm.output[0] else: return elif normalize_node.op_type == 'LayerNormalization': children = input_name_to_nodes[root_input] for child in children: if child.op_type == "LayerNormalization": root_input = child.output[0] children = input_name_to_nodes[root_input] children_types = [child.op_type for child in children] if children_types.count('MatMul') != 3: return v_nodes = self.model.match_parent_path(matmul_qkv, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, None]) if v_nodes is None: logger.debug("fuse_attention: failed to match v path") return (_, _, add_v, matmul_v) = v_nodes is_distill = False is_distill_add = False qk_paths = { "path1": (['Softmax', 'Add', 'Div', 'MatMul'], [0, 0, None, 0]), "path2": (['Softmax', 'Add', 'Mul', 'MatMul'], [0, 0, None, 0]), "path3": (['Softmax', 'Where', 'MatMul', 'Div'], [0, 0, 2, 0]), "path4": (['Softmax', 'Add', 'Where', 'MatMul'], [0, 0, 0, 2]) } qk_nodes = None for k, v in qk_paths.items(): qk_nodes = self.model.match_parent_path(matmul_qkv, v[0], v[1]) if qk_nodes is None: continue if k == "path3": is_distill = True if k == "path4": is_distill_add = True break if qk_nodes is None: logger.debug("fuse_attention: failed to match qk path") return add_qk = None matmul_qk = None where_qk = None if is_distill: (_, where_qk, matmul_qk, _) = qk_nodes elif is_distill_add: (_, add_qk, where_qk, matmul_qk) = qk_nodes else: (_, add_qk, _, matmul_qk) = qk_nodes q_nodes = self.model.match_parent_path(matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [0, 0, 0, None]) if q_nodes is None: q_nodes = self.model.match_parent_path(matmul_qk, ['Div', 'Transpose', 'Reshape', 'Add', 'MatMul'], [0, 0, 0, 0, None]) if q_nodes is None: logger.debug("fuse_attention: failed to match q path") return reshape_q = q_nodes[-3] add_q = q_nodes[-2] matmul_q = q_nodes[-1] k_nodes = self.model.match_parent_path(matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, None]) if k_nodes is None: k_nodes = self.model.match_parent_path(matmul_qk, ['Transpose', 'Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, 0, None]) if k_nodes is None: logger.debug("fuse_attention: failed to match k path") return add_k = k_nodes[-2] matmul_k = k_nodes[-1] # Note that Cast might be removed by OnnxRuntime so we match two patterns here. mask_nodes = None add_qk_str = None if is_distill: _, mask_nodes, _ = self.model.match_parent_paths(where_qk, [(['Expand', 'Reshape', 'Equal'], [0, 0, 0]), (['Cast', 'Expand', 'Reshape', 'Equal'], [0, 0, 0, 0])], output_name_to_node) elif is_distill_add: _, mask_nodes, _ = self.model.match_parent_paths( where_qk, [(['Cast', 'Equal', 'Unsqueeze', 'Unsqueeze'], [0, 0, 0, 0]), (['Equal', 'Unsqueeze', 'Unsqueeze'], [0, 0, 0])], output_name_to_node) if add_qk is not None: add_qk_str = self.get_add_qk_str(add_qk) if add_qk_str is None: logger.debug(f"fuse_attention: failed to verify shape inference of {add_qk}") return else: _, mask_nodes, _ = self.model.match_parent_paths( add_qk, [(['Mul', 'Sub', 'Cast', 'Unsqueeze', 'Unsqueeze'], [None, 0, 1, 0, 0]), (['Mul', 'Sub', 'Unsqueeze', 'Unsqueeze'], [None, 0, 1, 0])], output_name_to_node) if mask_nodes is None: logger.debug("fuse_attention: failed to match mask path") return if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input: mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) attention_last_node = reshape_qkv if einsum_node is None else transpose_qkv q_num_heads, q_hidden_size = self.get_num_heads_and_hidden_size(reshape_q) # number of heads are same for all the paths, hence to create attention node, we pass the q_num_heads # the input_hidden_size represents the input hidden size, this is used as needed but hidden sizes for Q, K are extracted appropriately new_node = self.create_attention_node(mask_index, matmul_q, matmul_k, matmul_v, add_q, add_k, add_v, q_num_heads, self.hidden_size, root_input, attention_last_node.output[0], add_qk_str) if new_node is None: return self.nodes_to_add.append(new_node) self.node_name_to_graph_name[new_node.name] = self.this_graph_name if einsum_node is not None: unique_index = einsum_node.input[0] new_edge = "edge_modified_" + unique_index shape_tensor = helper.make_tensor(name="shape_modified_tensor" + unique_index, data_type=TensorProto.INT64, dims=[4], vals=np.int64([0, 0, q_num_heads, int(q_hidden_size / q_num_heads)]).tobytes(), raw=True) self.model.add_initializer(shape_tensor, self.this_graph_name) self.model.add_node( helper.make_node("Reshape", [attention_last_node.output[0], shape_tensor.name], [new_edge], "reshape_modified_" + unique_index), self.this_graph_name) einsum_node.input[0] = new_edge self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv]) self.nodes_to_remove.extend(qk_nodes) self.nodes_to_remove.extend(q_nodes) self.nodes_to_remove.extend(k_nodes) self.nodes_to_remove.extend(v_nodes) # Use prune graph to remove mask nodes since they are shared by all attention nodes. #self.nodes_to_remove.extend(mask_nodes) self.prune_graph = True
def create_net_const(self, shape, precision, ir_version): """ ONNX net IR net Input->Concat(+floored const)->Output => Input->Concat(+const) """ # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto import numpy as np concat_axis = 0 output_shape = shape.copy() output_shape[concat_axis] *= 2 input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) constant = np.random.randn(*shape).astype(np.float) node_const_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['const1'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=constant.shape, vals=constant.flatten(), ), ) node_def = onnx.helper.make_node('Floor', inputs=['const1'], outputs=['floor']) node_concat_def = onnx.helper.make_node('Concat', inputs=['input', 'floor'], outputs=['output'], axis=concat_axis) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const_def, node_def, node_concat_def], 'test_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model') # # Create reference IR net # constant = np.floor(constant) if precision == 'FP16': constant = constant.astype(np.float16) ref_net = None if check_ir_version(10, None, ir_version): nodes_attributes = { 'input': { 'kind': 'op', 'type': 'Parameter' }, 'input_data': { 'shape': shape, 'kind': 'data' }, 'input_const_data': { 'kind': 'data', 'value': constant.flatten() }, 'const': { 'kind': 'op', 'type': 'Const' }, 'const_data': { 'shape': shape, 'kind': 'data' }, 'concat': { 'kind': 'op', 'type': 'Concat', 'axis': concat_axis }, 'concat_data': { 'shape': output_shape, 'kind': 'data' }, 'result': { 'kind': 'op', 'type': 'Result' } } ref_net = build_graph(nodes_attributes, [('input', 'input_data'), ('input_const_data', 'const'), ('const', 'const_data'), ('input_data', 'concat'), ('const_data', 'concat'), ('concat', 'concat_data'), ('concat_data', 'result')]) return onnx_net, ref_net
def export_onnx(graph): ''' Export a XFlow graph to an ONNX graph @params graph is a XFlow graph @return A in-memory ONNX graph ''' opList = graph.get_operator_list() graph_nodes = list() graph_inputs = list() graph_initializers = list() graph_outputs = list() output_guids = dict() for op in opList: mytype = graph.get_operator_type(op) inedges = graph.get_input_edges(op) #print("op.guid={} mytype={} inedges={}".format(op['guid'], mytype, len(inedges))) inputs = list() for e in inedges: intype = graph.get_operator_type(e['srcOp']) inputs.append(_input_tensor_name(graph, e, op)) output_guids.pop((e['srcOp']['guid'], e['srcIdx']), None) if intype == 'Input' or intype == 'Weight': graph_inputs.append( helper.make_tensor_value_info( _input_tensor_name(graph, e, op), TensorProto.FLOAT, graph.get_input_dims(op, e['dstIdx']))) if intype == 'Weight': graph_initializers.append( helper.make_tensor(_input_tensor_name(graph, e, op), TensorProto.FLOAT, graph.get_input_dims(op, e['dstIdx']), graph.get_weight_value(e['srcOp']))) # add a second input for Reshape if mytype == 'Reshape': inputs.append('Reshape_attr{}'.format(op['guid'])) shape = graph.get_output_dims(op, 0) graph_inputs.append( helper.make_tensor_value_info( 'Reshape_attr{}'.format(op['guid']), TensorProto.INT64, [len(shape)])) graph_initializers.append( helper.make_tensor('Reshape_attr{}'.format(op['guid']), TensorProto.INT64, [len(shape)], shape)) outputs = list() for i in range(graph.get_num_outputs(op)): outputs.append(_output_tensor_name(graph, op, i)) output_guids[(op['guid'], i)] = op node = helper.make_node(mytype, inputs, outputs, '{}{}'.format(mytype, op['guid'])) _add_node_attribute(graph, node, op, mytype) graph_nodes.append(node) for guid, idx in output_guids: op = output_guids[(guid, idx)] graph_outputs.append( helper.make_tensor_value_info(_output_tensor_name(graph, op, idx), TensorProto.FLOAT, graph.get_output_dims(op, idx))) onnx_graph = helper.make_graph(graph_nodes, 'main', graph_inputs, graph_outputs, graph_initializers) onnx_model = helper.make_model(onnx_graph, producer_name='TASO Optimized Model') return onnx_model
def create_unsqueeze_net_const(self, axes, input_shape, output_shape, ir_version): """ ONNX net IR net Input->Concat(+unsqueezed const)->Output => Input->Concat(+const) """ # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto import numpy as np concat_axis = 1 concat_output_shape = output_shape.copy() concat_output_shape[concat_axis] *= 2 input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) const_number = np.prod(input_shape) constant = np.random.randint(-127, 127, const_number).astype(np.float) constant = np.reshape(constant, input_shape) node_const_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['const1'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=constant.shape, vals=constant.flatten(), ), ) node_squeeze_def = onnx.helper.make_node('Unsqueeze', inputs=['const1'], outputs=['unsqueeze1'], axes=axes) node_concat_def = onnx.helper.make_node('Concat', inputs=['input', 'unsqueeze1'], outputs=['output'], axis=concat_axis) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const_def, node_squeeze_def, node_concat_def], 'test_unsqueeze_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_unsqueeze_model') # # Create reference IR net # Please, specify 'type': 'Input' for input node # Moreover, do not forget to validate ALL layer attributes!!! # ref_net = None return onnx_net, ref_net
], "Loop_body", [ helper.make_tensor_value_info('iteration_num', TensorProto.INT64, [1]), helper.make_tensor_value_info('subgraph_keep_going_in', TensorProto.BOOL, [1]), helper.make_tensor_value_info('loop_state_in', TensorProto.FLOAT, [1]) ], [ helper.make_tensor_value_info('subgraph_keep_going_out', TensorProto.BOOL, [1]), helper.make_tensor_value_info('loop_state_out', TensorProto.FLOAT, [1]), ], [ helper.make_tensor('sub_graph_initializer', TensorProto.FLOAT, [1], [1.]) ]) # Create the main graph graph_proto = helper.make_graph([ helper.make_node("Loop", ["max_trip_count", "keep_going", "state_var_in"], ["state_var_out"], "Loop1", body=body) ], "Main_graph", [ helper.make_tensor_value_info('state_var_in', TensorProto.FLOAT, [1]), ], [ helper.make_tensor_value_info('state_var_out', TensorProto.FLOAT, [1]), ], [ helper.make_tensor('max_trip_count', TensorProto.INT64, [1], [1]), helper.make_tensor('main_graph_initializer', TensorProto.FLOAT, [1], [1.]),
def split_graph(model, split_edge_groups): ms_domain = 'com.microsoft' new_send_nodes = [] new_recv_nodes = [] for cut_index in range(len(split_edge_groups)): edgeIds = split_edge_groups[cut_index] # split the graph based on edgeIds upstream_nodes = [] upstream_nodes_output_index = [] output_shapes = [] element_types = [] for id in edgeIds: for node in model.graph.node: if len(node.output) >= 1: for i, j in enumerate(node.output): if j == id: upstream_nodes.append(node) upstream_nodes_output_index.append(i) # assuming all tensors are of type float element_types.append(1) for info in model.graph.value_info: if info.name == id: output_shapes.append(info.type) send_input_signal_name = 'send_input_signal' + str(cut_index) send_signal = model.graph.input.add() send_signal.CopyFrom(helper.make_tensor_value_info( send_input_signal_name, onnx.TensorProto.BOOL, None)) send_signal = helper.make_tensor( send_input_signal_name, TensorProto.BOOL, (), (True,)) model.graph.initializer.extend([send_signal]) recv_input_signal_name = 'recv_input_signal' + str(cut_index) recv_signal = model.graph.input.add() recv_signal.CopyFrom(helper.make_tensor_value_info( recv_input_signal_name, onnx.TensorProto.BOOL, None)) recv_signal = helper.make_tensor( recv_input_signal_name, TensorProto.BOOL, (), (True,)) model.graph.initializer.extend([recv_signal]) send_dst_rank_name = 'send_dst_rank' + str(cut_index) send_dst_rank = model.graph.input.add() send_dst_rank.CopyFrom(helper.make_tensor_value_info( send_dst_rank_name, onnx.TensorProto.INT64, None)) send_dst_rank = helper.make_tensor( send_dst_rank_name, TensorProto.INT64, (), (cut_index + 1,)) model.graph.initializer.extend([send_dst_rank]) recv_src_rank_name = 'recv_src_rank' + str(cut_index) recv_src_rank = model.graph.input.add() recv_src_rank.CopyFrom(helper.make_tensor_value_info( recv_src_rank_name, onnx.TensorProto.INT64, None)) recv_src_rank = helper.make_tensor( recv_src_rank_name, TensorProto.INT64, (), (cut_index,)) model.graph.initializer.extend([recv_src_rank]) # output signal from send after cut send_output_signal = model.graph.output.add() send_output_signal.CopyFrom(helper.make_tensor_value_info( 'send_output_signal' + str(cut_index), onnx.TensorProto.BOOL, None)) # output signal from receive after cut receive_output_signal = model.graph.output.add() receive_output_signal.CopyFrom(helper.make_tensor_value_info( 'receive_output_signal' + str(cut_index), onnx.TensorProto.BOOL, None)) new_send = model.graph.node.add() new_send.CopyFrom(helper.make_node( 'Send', inputs=[send_input_signal_name, send_dst_rank_name], outputs=['send_output_signal' + str(cut_index)], tag=0, domain=ms_domain, element_types=element_types, name='send')) new_receive = model.graph.node.add() new_receive.CopyFrom(helper.make_node( 'Recv', inputs=[recv_input_signal_name, recv_src_rank_name], outputs=['receive_output_signal' + str(cut_index)], tag=0, domain=ms_domain, element_types=element_types, name='receive')) for i in range(len(upstream_nodes)): n = upstream_nodes[i] idx = upstream_nodes_output_index[i] output_type = output_shapes[i] output_edge_name = n.output[idx] output_nodes = find_all_output_nodes_by_edge( model, output_edge_name) # deal with shape inference for newly added edge new_send_input_name = output_edge_name + '_send' + str(cut_index) add_expand_type(model, new_send_input_name, output_type) new_receive_output_name = output_edge_name + \ '_recv' + str(cut_index) add_expand_type(model, new_receive_output_name, output_type) # the order of data flow is: node-output -> record -> send -> recv -> wait -> node-input new_send.input.extend([output_edge_name]) new_receive.output.extend([new_receive_output_name]) for output_node in output_nodes: for i in range(len(output_node.input)): for edgeId in edgeIds: if output_node.input[i] == edgeId: output_node.input[i] = new_receive_output_name new_send_nodes.append(new_send) new_recv_nodes.append(new_receive) model = onnx.shape_inference.infer_shapes(model) return new_send_nodes, new_recv_nodes
def create_dequanize_linear(self, shape, y_scale: np.array, y_zero_point=None, axis=None, opset=10, ir_version='10'): """ ONNX net IR net Input->DequantizeLinear->Output => Input->Sub->Mul """ # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto self.inp_type = y_zero_point.dtype if y_zero_point is not None else np.uint8 onnx_type = TensorProto.UINT8 if self.inp_type == np.uint8 else TensorProto.INT8 input = helper.make_tensor_value_info('input', onnx_type, shape) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) nodes = [] inputs = ['input', 'y_scale'] node_scale_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['y_scale'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=y_scale.shape, vals=y_scale.flatten(), ), ) nodes.append(node_scale_def) if y_zero_point is not None: node_zero_point_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['y_zero_point'], value=helper.make_tensor( name='const_tensor', data_type=onnx_type, dims=y_zero_point.shape, vals=y_zero_point.flatten(), ), ) inputs.append('y_zero_point') nodes.append(node_zero_point_def) args = dict() if axis is not None: args['axis'] = axis node_def = onnx.helper.make_node( 'DequantizeLinear', inputs=inputs, outputs=['output'], **args ) nodes.append(node_def) # Create the graph (GraphProto) graph_def = helper.make_graph( nodes, 'test_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model', opset_imports=[helper.make_opsetid("", opset)]) onnx.checker.check_model(onnx_net) # # Create reference IR net # Please, specify 'type': 'Input' for input node # Moreover, do not forget to validate ALL layer attributes!!! # nodes_attributes = { 'input': {'kind': 'op', 'type': 'Parameter'}, 'input_data': {'shape': shape, 'kind': 'data'}, 'input_scale_data': {'kind': 'data', 'value': y_scale}, 'scale_const': {'kind': 'op', 'type': 'Const'}, 'scale_data': {'shape': np.ones(len(shape)), 'kind': 'data'}, 'mul': {'kind': 'op', 'type': 'Multiply'}, 'mul_data': {'shape': shape, 'kind': 'data'}, 'result': {'kind': 'op', 'type': 'Result'} } edges = [('input', 'input_data'), ('input_data', 'mul'), ('input_scale_data', 'scale_const'), ('scale_const', 'scale_data'), ('scale_data', 'mul'), ('mul', 'mul_data')] if y_zero_point is not None: nodes_attributes.update({ 'input_zero_data': {'kind': 'data', 'value': -y_scale * y_zero_point}, 'zero_const': {'kind': 'op', 'type': 'Const'}, 'zero_data': {'shape': np.ones(len(shape)), 'kind': 'data'}, 'sub': {'kind': 'op', 'type': 'Add'}, 'sub_data': {'shape': shape, 'kind': 'data'}, }) edges.extend([('mul_data', 'sub'), ('input_zero_data', 'zero_const'), ('zero_const', 'zero_data'), ('zero_data', 'sub'), ('sub', 'sub_data'), ('sub_data', 'result')]) else: edges.append(('mul_data', 'result')) ref_net = None if check_ir_version(10, None, ir_version): ref_net = build_graph(nodes_attributes, edges) return onnx_net, ref_net
def fuse(self, node, input_name_to_nodes, output_name_to_node): # already fused. Assumes that only one mebedding layer in a transformer model. if self.nodes_to_add: return if self.model.match_parent_path(node, ['Add', 'Gather'], [0, 0]) is None: return if self.model.find_first_child_by_type( node, 'Attention', input_name_to_nodes, recursive=False) is None: # In case user disables attention fusion, check whether subgraph looks like Attention. if node.output[0] not in input_name_to_nodes: return children = input_name_to_nodes[node.output[0]] children_types = sorted([child.op_type for child in children]) if children_types != [ 'MatMul', 'MatMul', 'MatMul', 'SkipLayerNormalization' ]: return # Assume the order of embeddings are word_embedding + position_embedding + segment_embedding normalize_node = node word_embedding_path = self.model.match_parent_path( normalize_node, ['Add', 'Gather'], [0, 0]) if word_embedding_path is None: logger.info("Failed to find word embedding") return add_node, word_embedding_gather = word_embedding_path input_ids = word_embedding_gather.input[1] position_embedding_expand = None position_embedding_shape = None position_embedding_path = self.model.match_parent_path( normalize_node, ['Reshape', 'Slice'], [1, 0]) if position_embedding_path is not None: _, position_embedding_weight_node = position_embedding_path else: position_embedding_path = self.model.match_parent_path( add_node, ['Gather', 'Expand', 'Shape'], [1, 1, 1]) if position_embedding_path is not None: position_embedding_weight_node, position_embedding_expand, position_embedding_shape = position_embedding_path else: position_embedding_path = self.model.match_parent_path( add_node, [ 'Gather', 'Expand', 'Concat', 'Unsqueeze', 'Gather', 'Shape' ], [1, 1, 1, 1, 0, 0]) if position_embedding_path is not None: position_embedding_weight_node, position_embedding_expand, _, _, _, position_embedding_shape = position_embedding_path else: # Here we will not try to get exact match. Instead, we only try identify position embedding weights. position_embedding_path = self.model.match_parent_path( add_node, ['Gather', 'Expand'], [1, 1]) if position_embedding_path is not None: position_embedding_weight_node, position_embedding_expand = position_embedding_path else: logger.info("Failed to find position embedding") return if position_embedding_shape is not None and position_embedding_shape.input[ 0] != input_ids: logger.info( "position and word embedding is expected to be applied on same input" ) return segment_embedding_path = self.model.match_parent_path( normalize_node, ['Gather'], [1]) if segment_embedding_path is None: segment_embedding_path = self.model.match_parent_path( normalize_node, ['Add', 'Gather'], [0, 1]) if segment_embedding_path is None: logger.info("Failed to find segment embedding") return _, segment_embedding_gather = segment_embedding_path else: segment_embedding_gather = segment_embedding_path[0] segment_ids = segment_embedding_gather.input[1] if position_embedding_expand and position_embedding_shape: input_parent = self.model.get_parent(position_embedding_shape, 0, output_name_to_node) subgraph_nodes = self.model.get_parent_subgraph_nodes( position_embedding_expand, [input_parent] if input_parent else [], output_name_to_node) self.nodes_to_remove.extend(subgraph_nodes) self.nodes_to_remove.extend(word_embedding_path) self.nodes_to_remove.extend(position_embedding_path) self.nodes_to_remove.extend(segment_embedding_path) self.nodes_to_remove.extend([normalize_node]) # store inputs for further processing if self.model.find_graph_input(input_ids): self.model.bert_inputs = [ input_ids, segment_ids ] if self.model.find_graph_input(segment_ids) else [input_ids] # Cast input_ids and segment_ids to int32. input_ids_cast_node = None if self.model.find_graph_input(input_ids): casted, input_ids = self.utils.cast_graph_input_to_int32(input_ids) else: input_ids, input_ids_cast_node = self.utils.cast_input_to_int32( input_ids) if self.model.find_graph_input(segment_ids): casted, segment_ids = self.utils.cast_graph_input_to_int32( segment_ids) else: segment_ids, segment_ids_cast_node = self.utils.cast_input_to_int32( segment_ids) # Cast might be removed by OnnxRuntime. _, segment_id_path, _ = self.model.match_parent_paths( segment_ids_cast_node, [([ 'ConstantOfShape', 'Concat', 'Unsqueeze', 'Gather', 'Shape', 'Cast' ], [0, 0, 1, 0, 0, 0]), ([ 'ConstantOfShape', 'Concat', 'Unsqueeze', 'Gather', 'Shape' ], [0, 0, 1, 0, 0])], output_name_to_node) if segment_id_path and input_ids_cast_node and input_ids_cast_node.input[ 0] == segment_id_path[-1].input[0]: logger.debug("Simplify semgent id path...") self.model.add_node( helper.make_node('Shape', inputs=[input_ids_cast_node.input[0]], outputs=["input_shape"])) self.model.add_node( helper.make_node('ConstantOfShape', inputs=["input_shape"], outputs=["zeros_for_input_shape"], value=helper.make_tensor( "value", onnx.TensorProto.INT32, [1], [1]))) segment_ids = "zeros_for_input_shape" embed_node = helper.make_node( 'EmbedLayerNormalization', inputs=[ input_ids, segment_ids, word_embedding_gather.input[0], position_embedding_weight_node.input[0], segment_embedding_gather.input[0], normalize_node.input[2], normalize_node.input[3] # gamma and beta ], outputs=["embed_output", "dummy_mask_index"], name="EmbedLayer") embed_node.domain = "com.microsoft" # Pass attribute "epsilon" from normalize node to EmbedLayerNormalization. for att in normalize_node.attribute: if att.name == 'epsilon': embed_node.attribute.extend([att]) # Set default value to 1e-12 if no attribute is found. if len(embed_node.attribute) == 0: embed_node.attribute.extend( [onnx.helper.make_attribute("epsilon", 1.0E-12)]) self.model.replace_input_of_all_nodes(normalize_node.output[0], 'embed_output') self.nodes_to_add.append(embed_node)
def convert_sklearn_ada_boost_classifier(scope, operator, container): """ Converter for AdaBoost classifier. This function goes through the list of estimators and uses TreeEnsembleClassifer op to calculate class probabilities for each estimator. Then it calculates the weighted sum across all the estimators depending on the algorithm picked during trainging (SAMME.R or SAMME) and normalises the probability score for the final result. Label is calculated by simply doing an argmax of the probability scores. """ if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( operator.raw_operator.__class__.__name__)) op = operator.raw_operator op_type = 'TreeEnsembleClassifier' options = container.get_options(op, dict(raw_scores=False)) use_raw_scores = options['raw_scores'] classes = op.classes_ class_type = onnx_proto.TensorProto.STRING if np.issubdtype(classes.dtype, np.floating): class_type = onnx_proto.TensorProto.INT32 classes = classes.astype('int') elif np.issubdtype(classes.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: classes = np.array([s.encode('utf-8') for s in classes]) argmax_output_name = scope.get_unique_variable_name('argmax_output') array_feature_extractor_result_name = scope.get_unique_variable_name( 'array_feature_extractor_result') classes_name = scope.get_unique_variable_name('classes') container.add_initializer(classes_name, class_type, classes.shape, classes) proba_names_list = [] classes_ind_name = None zero_name = None one_name = None classes_ind_name = None for i_est, estimator in enumerate(op.estimators_): label_name = scope.declare_local_variable('elab_name_%d' % i_est) proba_name = scope.declare_local_variable('eprob_name_%d' % i_est) op_type = sklearn_operator_name_map[type(estimator)] this_operator = scope.declare_local_operator(op_type) this_operator.raw_operator = estimator this_operator.inputs = operator.inputs this_operator.outputs.extend([label_name, proba_name]) if op.algorithm == 'SAMME.R': cur_proba_name = _samme_r_proba(scope, container, proba_name.onnx_name, len(classes)) else: # SAMME if _scikit_learn_before_022() and not use_raw_scores: weight_name = scope.get_unique_variable_name('weight') samme_proba_name = scope.get_unique_variable_name( 'samme_proba') container.add_initializer(weight_name, onnx_proto.TensorProto.FLOAT, [], [op.estimator_weights_[i_est]]) apply_mul(scope, [proba_name.onnx_name, weight_name], samme_proba_name, container, broadcast=1) cur_proba_name = samme_proba_name else: if classes_ind_name is None: classes_ind_name = scope.get_unique_variable_name( 'classes_ind3') container.add_initializer(classes_ind_name, onnx_proto.TensorProto.INT64, (1, len(classes)), list(range(len(classes)))) if zero_name is None: shape_name = scope.get_unique_variable_name('shape') container.add_node( 'Shape', proba_name.onnx_name, shape_name, name=scope.get_unique_operator_name('Shape')) zero_name = scope.get_unique_variable_name('zero') container.add_node( 'ConstantOfShape', shape_name, zero_name, name=scope.get_unique_operator_name('CoSA'), value=make_tensor("value", onnx_proto.TensorProto.FLOAT, (1, ), [0])) one_name = scope.get_unique_variable_name('one') container.add_node( 'ConstantOfShape', shape_name, one_name, name=scope.get_unique_operator_name('CoSB'), value=make_tensor("value", onnx_proto.TensorProto.FLOAT, (1, ), [1.])) cur_proba_name = _samme_proba(scope, container, proba_name.onnx_name, op.estimator_weights_[i_est], zero_name, classes_ind_name, one_name) proba_names_list.append(cur_proba_name) function = (_generate_raw_scores if use_raw_scores else _normalise_probability) class_prob_name = function(scope, container, operator, proba_names_list, op) container.add_node('ArgMax', class_prob_name, argmax_output_name, name=scope.get_unique_operator_name('ArgMax'), axis=1) container.add_node( 'ArrayFeatureExtractor', [classes_name, argmax_output_name], array_feature_extractor_result_name, op_domain='ai.onnx.ml', name=scope.get_unique_operator_name('ArrayFeatureExtractor')) if class_type == onnx_proto.TensorProto.INT32: reshaped_result_name = scope.get_unique_variable_name( 'reshaped_result') apply_reshape(scope, array_feature_extractor_result_name, reshaped_result_name, container, desired_shape=(-1, )) apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, container, to=onnx_proto.TensorProto.INT64) else: apply_reshape(scope, array_feature_extractor_result_name, operator.outputs[0].full_name, container, desired_shape=(-1, ))
def create_net(self, shape1, shape2, precision, ir_version): """ ONNX net IR net Input->MatMul with const->Output => Input->FullyConnected """ # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto max_len = max([len(shape1), len(shape2)]) extended_shape1 = np.concatenate([np.ones(max_len - len(shape1)), shape1], axis=0) extended_shape2 = np.concatenate([np.ones(max_len - len(shape2)), shape2], axis=0) output_shape = np.concatenate( [np.maximum(*[extended_shape1[0:-2], extended_shape2[0:-2]]), [shape1[-2], shape2[-1]]], axis=0).astype(np.int).tolist() input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) const = np.random.randn(*shape2).astype(np.float32) node_const_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['const'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=const.shape, vals=const.flatten(), ), ) node_def = onnx.helper.make_node( 'MatMul', inputs=['input', 'const'], outputs=['mm_output'] ) # to avoid mapping problems node_elu_def = onnx.helper.make_node( 'Elu', inputs=['mm_output'], outputs=['output'] ) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const_def, node_def, node_elu_def], 'test_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model') # # Create reference IR net # Please, spesify 'type': 'Input' for inpit node # Moreover, do not forget to validate ALL layer attributes!!! # if precision == 'FP16': const = const.astype(np.float16) ref_net = None return onnx_net, ref_net
def generate_qat_model(model_names): test_models = [] test_initializers = [] """ TEST_MODEL_CONFIG_1 """ # Main graph: # # [A] [input_bias] # \ / # Add [scale_zp_const] [input_weight] # | \ / # | QuantizeLinear_1 # QuantizeLinear_0 | # | DequantizeLinear_1 # | / # DequantizeLinear_0 Transpose # \ / # \ / <--- (actual graph: this branch is folded) # Matmul # | # | # [B] graph = helper.make_graph( [ # nodes helper.make_node("Add", ["A", "input_bias"], ["add_out"], "add0"), helper.make_node( "QuantizeLinear", ["add_out", "quant0_scale_const", "quant0_zp_const"], ["quant0_out"], "qlinear0", ), helper.make_node( "DequantizeLinear", ["quant0_out", "dequant0_scale_const", "dequant0_zp_const"], ["dequant0_out"], "dqlinear0", ), helper.make_node("MatMul", ["dequant0_out", "trans_out"], ["B"], "matmul"), ], "QAT_model_1", # name [helper.make_tensor_value_info("A", TensorProto.FLOAT, ["unk_1"]) ], # input [helper.make_tensor_value_info("B", TensorProto.FLOAT, [1024]) ], # output [ # initializers helper.make_tensor("quant0_scale_const", TensorProto.FLOAT, [], [0.01961481384932995]), helper.make_tensor("quant0_zp_const", TensorProto.INT8, [], [0]), helper.make_tensor("dequant0_scale_const", TensorProto.FLOAT, [], [0.01961481384932995]), helper.make_tensor("dequant0_zp_const", TensorProto.INT8, [], [0]), ], ) input_weight_1 = generate_input_initializer([1024, 1024], np.float32, "trans_out") input_bias_1 = generate_input_initializer([1024], np.float32, "input_bias") graph.initializer.add().CopyFrom(input_weight_1) graph.initializer.add().CopyFrom(input_bias_1) model_1 = onnx.helper.make_model( graph, opset_imports=[helper.make_opsetid("", 13)]) model_1.ir_version = 7 # use stable onnx ir version onnx.save(model_1, model_names[0]) test_models.extend([model_1]) initiazliers_1 = [input_weight_1, input_bias_1] test_initializers.append(initiazliers_1) """ TEST_MODEL_CONFIG_2 """ # Main graph: # # [A] # | # MaxPool # / \ # QuantizeLinear_0 QuantizeLinear_1 # | | # DequantizeLinear_0 DequantizeLinear_1 # | | # Conv_0-[weight,bias] Conv_1-[weight,bias] # \ / # \ / # Add # | # [B] graph = helper.make_graph( [ # nodes helper.make_node("MaxPool", ["A"], ["maxpool_out"], "maxpool", kernel_shape=[1, 1]), helper.make_node( "QuantizeLinear", ["maxpool_out", "quant0_scale_const", "quant0_zp_const"], ["quant0_out"], "qlinear0", ), helper.make_node( "DequantizeLinear", ["quant0_out", "dequant0_scale_const", "dequant0_zp_const"], ["dequant0_out"], "dqlinear0", ), helper.make_node( "Conv", ["dequant0_out", "conv_weight_0", "conv_bias_0"], ["conv0_out"], "conv0", ), helper.make_node( "QuantizeLinear", ["maxpool_out", "quant1_scale_const", "quant1_zp_const"], ["quant1_out"], "qlinear1", ), helper.make_node( "DequantizeLinear", ["quant1_out", "dequant1_scale_const", "dequant1_zp_const"], ["dequant1_out"], "dqlinear1", ), helper.make_node( "Conv", ["dequant1_out", "conv_weight_1", "conv_bias_1"], ["conv1_out"], "conv1", ), helper.make_node("Add", ["conv0_out", "conv1_out"], ["B"], "add"), ], "QAT_model_2", # name [ helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 64, 256, 256]) ], # input [ helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 256, 256, 256]) ], # output [ # initializers helper.make_tensor("quant0_scale_const", TensorProto.FLOAT, [], [0.2062656134366989]), helper.make_tensor("quant0_zp_const", TensorProto.UINT8, [], [165]), helper.make_tensor("dequant0_scale_const", TensorProto.FLOAT, [], [0.2062656134366989]), helper.make_tensor("dequant0_zp_const", TensorProto.UINT8, [], [165]), helper.make_tensor("quant1_scale_const", TensorProto.FLOAT, [], [0.10088317096233368]), helper.make_tensor("quant1_zp_const", TensorProto.UINT8, [], [132]), helper.make_tensor("dequant1_scale_const", TensorProto.FLOAT, [], [0.10088317096233368]), helper.make_tensor("dequant1_zp_const", TensorProto.UINT8, [], [132]), ], ) conv_weight_0 = generate_input_initializer([256, 64, 1, 1], np.float32, "conv_weight_0") conv_bias_0 = generate_input_initializer([256], np.float32, "conv_bias_0") graph.initializer.add().CopyFrom(conv_weight_0) graph.initializer.add().CopyFrom(conv_bias_0) conv_weight_1 = generate_input_initializer([256, 64, 1, 1], np.float32, "conv_weight_1") conv_bias_1 = generate_input_initializer([256], np.float32, "conv_bias_1") graph.initializer.add().CopyFrom(conv_weight_1) graph.initializer.add().CopyFrom(conv_bias_1) model_2 = onnx.helper.make_model( graph, opset_imports=[helper.make_opsetid("", 13)]) model_2.ir_version = 7 # use stable onnx ir version onnx.save(model_2, model_names[1]) test_models.extend([model_2]) initializers_2 = [conv_weight_0, conv_bias_0, conv_weight_1, conv_weight_1] test_initializers.append(initializers_2) return test_models, test_initializers
def _create_shape_tensor(cls, shape): return make_tensor(name=dummy_name(), data_type=TensorProto.INT64, dims=[len(shape)], vals=np.asarray(shape, dtype=np.int64).tobytes(), raw=True)
def to_onnx_model(inputs, y, model_name="sonnx"): """ get onnx model from singa computational graph Args: inputs: a list of input tensors (each is initialized with a name) y: a list of tensors, usually the outputs of the graph Return: the onnx model """ assert len(y) == 1 # assume there is only one output y = y[0] node = [] dependency, _ = autograd.infer_dependency(y.creator) input_ids = set(id(x) for x in inputs) X = [] for x in inputs: dtype = TensorProto.FLOAT if y.dtype == tensor.int32: dtype = TensorProto.INT X.append(helper.make_tensor_value_info(x.name, dtype, x.shape)) Y = [helper.make_tensor_value_info(y.name, TensorProto.FLOAT, y.shape)] ready = deque([y.creator]) while len(ready) > 0: op = ready.pop() assert not isinstance(op, autograd.Dummy) outputs = [op.output_name(idx) for yid, idx in op.y_id2idx.items()] inputs = [ srcop.output_name(srcop.y_id2idx[yid]) for (srcop, yid, _, _) in op.src ] opname = op.name optype = str(op).split(".")[-1].split(" ")[0] if isinstance(op, autograd.Concat): node.append( helper.make_node( "Concat", inputs=inputs, outputs=outputs, name=opname, axis=op.axis, ) ) elif isinstance(op, autograd._Conv2d): pads = [ op.handle.pad_h, op.handle.pad_w, op.handle.pad_w, op.handle.pad_h, ] stride = [op.handle.stride_h, op.handle.stride_w] k = [op.handle.kernel_h, op.handle.kernel_w] node.append( helper.make_node( "Conv", inputs=inputs, outputs=outputs, name=opname, kernel_shape=k, pads=pads, strides=stride, group=op.handle.group, ) ) elif isinstance(op, autograd._Pooling2d): k = [op.handle.kernel_h, op.handle.kernel_w] s = [op.handle.stride_h, op.handle.stride_w] p = [ op.handle.pad_h, op.handle.pad_w, op.handle.pad_w, op.handle.pad_h, ] if op.handle.is_max_pooling: node.append( helper.make_node( "MaxPool", inputs=inputs, outputs=outputs, name=opname, kernel_shape=k, pads=p, strides=s, ) ) else: node.append( helper.make_node( "AveragePool", inputs=inputs, outputs=outputs, name=opname, kernel_shape=k, pads=p, strides=s, ) ) elif isinstance(op, autograd._BatchNorm2d): node.append( helper.make_node( "BatchNormalization", inputs=inputs, outputs=outputs, name=opname, momentum=op.handle.factor, ) ) # [(<singa.autograd.Sigmoid object at 0x7fd5ec09cb90>, 140556764852432, None, False), # (<singa.autograd.Dummy object at 0x7fd5ec09c390>, 140556764824208, # <singa.tensor.Tensor object at 0x7fd5ec09c290>, True), # (<singa.autograd.Dummy object at 0x7fd5ec09c490>, 140556764824528, # <singa.tensor.Tensor object at 0x7fd5ec09c3d0>, True), # (<singa.autograd.Dummy object at 0x7fd5ec09c590>, 140556764824784, None, False), # (<singa.autograd.Dummy object at 0x7fd5ec09c690>, 140556764825040, None, False)]) # two dummy operators do not have values, so take the values from handle """ dummy0 = tensor.to_numpy( tensor.Tensor( device=op.running_mean.device(), data=op.running_mean ) ) dummy1 = tensor.to_numpy( tensor.Tensor( device=op.running_var.device(), data=op.running_var ) ) dummy0 = helper.make_node( "Constant", inputs=[], outputs=[inputs[3]], value=numpy_helper.from_array(dummy0), ) dummy1 = helper.make_node( "Constant", inputs=[], outputs=[inputs[4]], value=numpy_helper.from_array(dummy1), ) node.append(dummy0) node.append(dummy1) """ else: singa2onnx = { "SoftMax": "Softmax", "AddBias": "Add", "Add": "Add", "Matmul": "MatMul", "ReLU": "Relu", "ElemMatmul": "Mul", "Flatten": "Flatten", "Tanh": "Tanh", "Sigmoid": "Sigmoid" } assert optype in singa2onnx, "Unsupported op:{}".format(optype) onnx_op = singa2onnx[optype] node.append( helper.make_node( onnx_op, inputs=inputs, outputs=outputs, name=opname ) ) for srcop, yid, y, _ in op.src: dependency[srcop] -= 1 if dependency[srcop] == 0: if isinstance(srcop, autograd.Dummy): if yid not in input_ids: tmp = helper.make_node( "Constant", inputs=[], outputs=[srcop.output_name(0)], value=helper.make_tensor( name=opname, data_type=TensorProto.FLOAT, dims=y.shape, vals=tensor.to_numpy(y) .flatten() .astype(float), ), ) node.append(tmp) else: ready.append(srcop) # print(node) onnx_model = helper.make_model( helper.make_graph(node[::-1], model_name, X, Y) ) checker.check_model(onnx_model) return onnx_model
def create_net(self, shape, weights_shape, dilations, group, pads, strides, bias, ir_version, auto_pad=None): """ ONNX net IR net Input->Conv->Output => Input->Convolution """ # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto output_shape = np.array(shape) output_shape[1] = group _pads = np.array(pads).reshape([2, -1]) kernel_extent = np.array(dilations) * (np.array(weights_shape[2:]) - 1) + 1 spatial_val_wo_stride = shape[2:] + np.add(_pads[0, :], _pads[1, :]) - kernel_extent output_shape[2:] = (spatial_val_wo_stride.astype(np.float) / strides + 1).astype(np.int64) output_shape = output_shape.astype(np.int).tolist() input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) weights_const = np.random.randn(*weights_shape).astype(np.float32) node_weights_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['weights'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=weights_const.shape, vals=weights_const.flatten(), ), ) conv_args = dict(kernel_shape=weights_shape[2:], dilations=dilations, group=group, strides=strides) if pads and auto_pad not in ['SAME_UPPER', 'SAME_LOWER']: conv_args['pads'] = pads if auto_pad: conv_args['auto_pad'] = auto_pad if bias: bias_const = np.random.randint(-10, 10, weights_shape[0]).astype(np.float32) node_bias_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['bias'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=bias_const.shape, vals=bias_const.flatten(), ), ) node_def = onnx.helper.make_node( 'Conv', inputs=['input', 'weights', 'bias'], outputs=['output'], **conv_args ) nodes = [node_weights_def, node_bias_def, node_def] else: node_def = onnx.helper.make_node( 'Conv', inputs=['input', 'weights'], outputs=['output'], **conv_args ) nodes = [node_weights_def, node_def] # Create the graph (GraphProto) graph_def = helper.make_graph( nodes, 'test_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model') # # Create reference IR net # ref_net = None if check_ir_version(10, None, ir_version): if len(shape) == 3: input_shape = shape.copy() input_shape.insert(2, 1) node_shape = output_shape.copy() node_shape.insert(2, 1) nodes_attributes = { 'input': {'kind': 'op', 'type': 'Parameter'}, 'input_data': {'shape': shape, 'kind': 'data'}, 'before_shape_const_indata': {'shape': [len(input_shape)], 'value': input_shape, 'kind': 'data'}, 'before_shape_const': {'kind': 'op', 'type': 'Const'}, 'before_shape_const_data': {'shape': [len(input_shape)], 'kind': 'data'}, 'reshape_before': {'kind': 'op', 'type': 'Reshape'}, 'reshape_before_data': {'shape': input_shape, 'kind': 'data'}, 'kernel_indata': {'kind': 'data', 'shape': [len(weights_const.flatten())]}, 'kernel': {'kind': 'op', 'type': 'Const'}, 'kernel_data': {'kind': 'data', 'value': None}, 'node': {'kind': 'op', 'type': 'Convolution' if group == 1 else 'GroupConvolution', 'dilations': [1, dilations[0]], 'pads_begin': [0, _pads[0, 0]], 'pads_end': [0, _pads[1, 0]]}, 'node_data': {'shape': node_shape, 'kind': 'data'}, 'after_shape_const_indata': {'shape': [len(output_shape)], 'value': output_shape, 'kind': 'data'}, 'after_shape_const': {'kind': 'op', 'type': 'Const'}, 'after_shape_const_data': {'shape': [len(output_shape)], 'kind': 'data'}, 'reshape_after': {'kind': 'op', 'type': 'Reshape'}, 'reshape_after_data': {'shape': output_shape, 'kind': 'data'}, 'result': {'kind': 'op', 'type': 'Result'}} edges = [('input', 'input_data'), ('input_data', 'reshape_before'), ('before_shape_const_indata', 'before_shape_const'), ('before_shape_const', 'before_shape_const_data'), ('before_shape_const_data', 'reshape_before'), ('reshape_before', 'reshape_before_data'), ('reshape_before_data', 'node'), ('kernel_indata', 'kernel'), ('kernel', 'kernel_data'), ('kernel_data', 'node'), ('node', 'node_data'), ('node_data', 'reshape_after'), ('after_shape_const_indata', 'after_shape_const'), ('after_shape_const', 'after_shape_const_data'), ('after_shape_const_data', 'reshape_after'), ('reshape_after', 'reshape_after_data')] if bias: nodes_attributes.update({'const_indata': {'kind': 'data', 'value': bias_const.flatten()}, 'const': {'kind': 'op', 'type': 'Const'}, 'const_data': {'kind': 'data', 'shape': None}, 'bias': {'type': 'Add', 'kind': 'op'}, 'bias_data': {'kind': 'data', 'shape': output_shape}}) edges += [('reshape_after_data', 'bias'), ('const_indata', 'const'), ('const', 'const_data'), ('const_data', 'bias'), ('bias', 'bias_data'), ('bias_data', 'result')] else: edges += [('reshape_after_data', 'result')] ref_net = build_graph(nodes_attributes, edges) else: _weights_shape = weights_shape.copy() if group != 1: _weights_shape.insert(1, 1) nodes_attributes = { 'input': {'kind': 'op', 'type': 'Parameter'}, 'input_data': {'shape': shape, 'kind': 'data'}, 'kernel_indata': {'kind': 'data', 'value': weights_const.flatten()}, 'kernel': {'kind': 'op', 'type': 'Const'}, 'kernel_data': {'kind': 'data', 'shape': _weights_shape}, 'node': {'kind': 'op', 'type': 'Convolution' if group == 1 else 'GroupConvolution', 'dilations': dilations, 'pads_begin': _pads[0, :], 'pads_end': _pads[1, :]}, 'node_data': {'shape': output_shape, 'kind': 'data'}, 'result': {'kind': 'op', 'type': 'Result'}} edges = [('input', 'input_data'), ('input_data', 'node'), ('kernel_indata', 'kernel'), ('kernel', 'kernel_data'), ('kernel_data', 'node'), ('node', 'node_data')] if bias: nodes_attributes.update({'const_indata': {'kind': 'data', 'value': bias_const.flatten()}, 'const': {'kind': 'op', 'type': 'Const'}, 'const_data': {'kind': 'data', 'shape': None}, 'bias': {'type': 'Add', 'kind': 'op'}, 'bias_data': {'kind': 'data', 'shape': output_shape}}) edges += [('node_data', 'bias'), ('const_indata', 'const'), ('const', 'const_data'), ('const_data', 'bias'), ('bias', 'bias_data'), ('bias_data', 'result')] else: edges += [('node_data', 'result')] ref_net = build_graph(nodes_attributes, edges) return onnx_net, ref_net
def construct_model_conv_resize( self, output_model_path, conv_input_shape, conv_weight_shape, resize_input_shape, resize_output_shape, resize_attrs, resize_roi, resize_scales, resize_sizes, ): # (input) # \ # Conv # / \ # Identity Resize # / \ # (identity_out) (output) input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, conv_input_shape) conv_weight_arr = np.random.randint(-1, 2, conv_weight_shape).astype( np.float32) conv_weight_initializer = onnx.numpy_helper.from_array( conv_weight_arr, name="conv1_weight") conv_node = onnx.helper.make_node("Conv", ["input", "conv1_weight"], ["conv_output"], name="conv_node") identity_out = helper.make_tensor_value_info("identity_out", TensorProto.FLOAT, resize_input_shape) identity_node = helper.make_node("Identity", ["conv_output"], ["identity_out"], name="IdentityNode") initializers = [conv_weight_initializer] output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, resize_output_shape) resize_inputs = [ "conv_output" ] # resize_roi_name, resize_scales_name, resize_sizes_name] resize_node = helper.make_node("Resize", resize_inputs, ["output"], name="resize_node", **resize_attrs) if resize_roi is not None: resize_roi_name = "resize_roi" resize_roi_initializer = helper.make_tensor( resize_roi_name, TensorProto.FLOAT, [len(resize_roi)], resize_roi) initializers.extend([resize_roi_initializer]) resize_node.input.extend([resize_roi_name]) else: resize_node.input.extend([""]) if resize_scales is not None: resize_scales_name = "resize_scales" resize_scales_initializer = helper.make_tensor( resize_scales_name, TensorProto.FLOAT, [len(resize_scales)], resize_scales, ) initializers.extend([resize_scales_initializer]) resize_node.input.extend([resize_scales_name]) else: resize_node.input.extend([""]) if resize_sizes is not None: resize_sizes_name = "resize_sizes" resize_sizes_initializer = helper.make_tensor( resize_sizes_name, TensorProto.INT64, [len(resize_sizes)], resize_sizes) initializers.extend([resize_sizes_initializer]) resize_node.input.extend([resize_sizes_name]) graph = helper.make_graph( [conv_node, identity_node, resize_node], "TestOpQuantizerResize_test_model", [input_tensor], [identity_out, output_tensor], initializer=initializers, ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 # use stable onnx ir version onnx.save(model, output_model_path)
def convert_voting_classifier(scope, operator, container): """ Converts a *VotingClassifier* into *ONNX* format. *predict_proba* is not defined by *scikit-learn* when ``voting='hard'``. The converted model still defines a probability vector equal to the highest probability obtained for each class over all estimators. *scikit-learn* enables both modes, transformer and predictor for the voting classifier. *ONNX* does not make this distinction and always creates two outputs, labels and probabilities. """ if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( operator.raw_operator.__class__.__name__)) op = operator.raw_operator n_classes = len(op.classes_) classes_ind_name = scope.get_unique_variable_name('classes_ind') container.add_initializer(classes_ind_name, onnx_proto.TensorProto.INT64, (1, n_classes), list(range(n_classes))) probs_names = [] one_name = None for i, estimator in enumerate(op.estimators_): if estimator is None: continue op_type = sklearn_operator_name_map[type(estimator)] this_operator = scope.declare_local_operator(op_type) this_operator.raw_operator = estimator this_operator.inputs = operator.inputs label_name = scope.declare_local_variable('label_%d' % i) prob_name = scope.declare_local_variable('proba_%d' % i, FloatTensorType()) this_operator.outputs.append(label_name) this_operator.outputs.append(prob_name) if op.voting == 'hard': if one_name is None: shape_name = scope.get_unique_variable_name('shape') container.add_node( 'Shape', prob_name.onnx_name, shape_name, name=scope.get_unique_operator_name('Shape')) zero_name = scope.get_unique_variable_name('zero') container.add_node( 'ConstantOfShape', shape_name, zero_name, name=scope.get_unique_operator_name('CoSA'), value=make_tensor("value", onnx_proto.TensorProto.FLOAT, (1, ), [0.]), op_version=9) one_name = scope.get_unique_variable_name('one') container.add_node( 'ConstantOfShape', shape_name, one_name, name=scope.get_unique_operator_name('CoSB'), value=make_tensor("value", onnx_proto.TensorProto.FLOAT, (1, ), [1.]), op_version=9) argmax_output_name = scope.get_unique_variable_name( 'argmax_output') container.add_node('ArgMax', prob_name.onnx_name, argmax_output_name, name=scope.get_unique_operator_name('ArgMax'), axis=1) equal_name = scope.get_unique_variable_name('equal') container.add_node('Equal', [argmax_output_name, classes_ind_name], equal_name, name=scope.get_unique_operator_name('Equal')) max_proba_name = scope.get_unique_variable_name('probsmax') container.add_node('Where', [equal_name, one_name, zero_name], max_proba_name, name=scope.get_unique_operator_name('Where')) prob_name = max_proba_name else: prob_name = prob_name.onnx_name if op.weights is not None: val = op.weights[i] / op.weights.sum() else: val = 1. / len(op.estimators_) weights_name = scope.get_unique_variable_name('w%d' % i) container.add_initializer( weights_name, onnx_proto.TensorProto.FLOAT, [1], [val]) wprob_name = scope.get_unique_variable_name('wprob_name') apply_mul(scope, [prob_name, weights_name], wprob_name, container, broadcast=1) probs_names.append(wprob_name) if op.flatten_transform in (False, None): container.add_node('Sum', probs_names, operator.outputs[1].full_name, name=scope.get_unique_operator_name('Sum')) else: raise NotImplementedError( "flatten_transform==True is not implemented yet. " "You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues.") # labels label_name = scope.get_unique_variable_name('label_name') container.add_node('ArgMax', operator.outputs[1].full_name, label_name, name=scope.get_unique_operator_name('ArgMax'), axis=1) _finalize_converter_classes(scope, label_name, operator.outputs[0].full_name, container, op.classes_)
def create_net(self, shape1, shape2, op, precision, ir_version, opset=None): """ ONNX net IR net Input->Add/Mul with const->Output => Input->Eltwise """ # # Create ONNX model # from onnx import helper from onnx import TensorProto if op not in ['Add', 'Sub', 'Mul', 'Div']: raise ValueError( "Operation has to be either Add or Mul or Sub or Div") input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape1) min_val = 1 if op == 'Div' else -127 if shape2: const = np.random.randint(min_val, 127, shape2).astype(np.float) else: const = np.random.randint(min_val, 127, 1).astype(np.float) # TODO: add check when MO remove redundant layer (as Add/Sub if const = 0 or Mul/Div if const = 1) if const in [0, 1]: const = np.array([2], dtype=np.float) node_const_def = helper.make_node( 'Constant', inputs=[], outputs=['const'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=const.shape, vals=const.flatten(), ), ) node_def = helper.make_node(op, inputs=['input', 'const'], outputs=['output']) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const_def, node_def], 'test_model', [input], [output], ) # Create the model (ModelProto) args = dict(producer_name='test_model') if opset: args['opset_imports'] = [helper.make_opsetid("", opset)] onnx_net = helper.make_model(graph_def, **args) # Create reference IR net if op == 'Div': const = np.power(const, -1) elif op == 'Sub': const = -const ref_net = None return onnx_net, ref_net
def fuse_attention(self): output_name_to_node = self.output_name_to_node() nodes_to_remove = [] attention_count = 0 skip_layer_norm_nodes = self.get_nodes_by_op_type( "SkipLayerNormalization") for normalize_node in skip_layer_norm_nodes: # SkipLayerNormalization has two inputs, and one of them is the root input for attention. parent = self.get_parent(normalize_node, 1) if parent is None or parent.op_type not in [ "SkipLayerNormalization", "LayerNormalization", "Reshape" ]: parent = self.get_parent(normalize_node, 0) if parent is None or parent.op_type not in [ "SkipLayerNormalization", "LayerNormalization", "Reshape" ]: logger.debug("Failed to match parent of normalize_node") continue qkv_nodes = self.match_parent_path( normalize_node, ['Add', 'MatMul', 'Reshape', 'Transpose', 'MatMul'], [0, 0, 0, 0, 0]) if qkv_nodes is None: qkv_nodes = self.match_parent_path( normalize_node, ['MatMul', 'Reshape', 'Transpose', 'MatMul'], [1, 0, 0, 0]) if qkv_nodes is None: logger.debug("Failed to match qkv nodes") continue (reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes[-3:] v_nodes = self.match_parent_path( matmul_qkv, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, 0]) if v_nodes is None: logger.debug("Failed to match v path") continue (transpose_v, reshape_v, add_v, matmul_v) = v_nodes qk_nodes = self.match_parent_path( matmul_qkv, ['Softmax', 'Add', "Mul", 'MatMul'], [0, 0, 0, 0]) if qk_nodes is None: logger.debug("Failed to match qk_paths") continue (softmax_qk, add_qk, mul_qk, matmul_qk) = qk_nodes q_nodes = self.match_parent_path( matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [0, 0, 0, 0]) if q_nodes is None: logger.debug("Failed to match q path") continue (transpose_q, reshape_q, add_q, matmul_q) = q_nodes k_nodes = self.match_parent_path( matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, 0]) if k_nodes is None: logger.debug("Failed to match k path") continue (transpose_k, reshape_k, add_k, matmul_k) = k_nodes mask_nodes = self.match_parent_path(add_qk, ['Mul', 'Sub', 'Unsqueeze'], [1, 0, 1]) if mask_nodes is None: mask_nodes = self.match_parent_path( add_qk, ['Mul', 'Sub', 'Cast', 'Unsqueeze', 'Mul'], [1, 0, 1, 0, 0]) if mask_nodes is None: logger.debug("Failed to match mask path") continue if not self.has_constant_input(mask_nodes[1], 1): logger.debug( "Sub node expected to have an input with constant value 1.0." ) continue # add a squeeze node to convert a 3-d mask to 2-d squeeze_node = self.match_parent_path(mask_nodes[-1], ['Squeeze'], [0]) squeeze_node_name = "Squeeze_3d_to_2d_mask" squeeze_output_name = squeeze_node_name + "_output" if squeeze_node is None and len(mask_nodes) == 5: mask_input = mask_nodes[-1].input[1] self.add_node( helper.make_node("Squeeze", [mask_input], [squeeze_output_name], squeeze_node_name, axes=[1])) mask_nodes[-1].input[0] = squeeze_output_name is_same_root = self.check_attention_input(matmul_q, matmul_k, matmul_v, parent, output_name_to_node) if is_same_root: mask_index = self.attention_mask.process_mask( squeeze_output_name) logger.debug("Create an Attention node.") attention_node = self.attention_fusion.create_attention_node( mask_index, matmul_q, matmul_k, matmul_v, add_q, add_k, add_v, parent.output[0], reshape_qkv.output[0]) if parent.op_type == 'Reshape': # Temporary work around: we require the skiplayernorm and attention op be fed with 3-d input hidden_size = numpy_helper.to_array( self.get_initializer(parent.input[1]))[1] tensor = helper.make_tensor( name=parent.name + "_modified", data_type=TensorProto.INT64, dims=[3], vals=np.int64([[1, -1, hidden_size]]).tobytes(), raw=True) self.add_initializer(tensor) parent.input[1] = parent.name + "_modified" if attention_node is None: continue self.add_node(attention_node) attention_count += 1 nodes_to_remove.extend( [reshape_qkv, transpose_qkv, matmul_qkv]) nodes_to_remove.extend(qk_nodes) nodes_to_remove.extend(q_nodes) nodes_to_remove.extend(k_nodes) nodes_to_remove.extend(v_nodes) nodes_to_remove.extend(mask_nodes) else: logger.debug("Root node not matched.") continue self.remove_nodes(nodes_to_remove) self.update_graph() logger.info(f"Fused Attention count:{attention_count}")
def create_net_const(self, shape1, shape2, op, precision, ir_version, opset=None): """ ONNX net IR net Input->Concat with two added/multiplied consts->Output => Input->Concat """ # # Create ONNX model # from onnx import helper from onnx import TensorProto if op not in ['Add', 'Sub', 'Mul', 'Div']: raise ValueError("op has to be either Add or Mul") concat_axis = 0 output_shape = list(shape1) output_shape[concat_axis] *= 2 input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) const1 = np.random.randint(-127, 127, shape1).astype(np.float) min_val = 1 if op == 'Div' else -127 if shape2: const2 = np.random.randint(min_val, 127, shape2).astype(np.float) else: const2 = np.random.randint(min_val, 127, 1).astype(np.float) node_const1_def = helper.make_node( 'Constant', inputs=[], outputs=['const1'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=const1.shape, vals=const1.flatten(), ), ) node_const2_def = helper.make_node( 'Constant', inputs=[], outputs=['const2'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=const2.shape, vals=const2.flatten(), ), ) node_def = helper.make_node(op, inputs=['const1', 'const2'], outputs=['node_out']) node_concat_def = helper.make_node('Concat', inputs=['input', 'node_out'], outputs=['output'], axis=concat_axis) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const1_def, node_const2_def, node_def, node_concat_def], 'test_model', [input], [output], ) # Create the model (ModelProto) args = dict(producer_name='test_model') if opset: args['opset_imports'] = [helper.make_opsetid("", opset)] onnx_net = helper.make_model(graph_def, **args) # Create reference IR net if op == 'Add': constant_calculated = np.add(const1, const2) elif op == 'Sub': constant_calculated = np.subtract(const1, const2) elif op == 'Mul': constant_calculated = np.multiply(const1, const2) elif op == 'Div': constant_calculated = np.divide(const1, const2) if precision == 'FP16': constant_calculated = constant_calculated.astype(np.float16) ref_net = None return onnx_net, ref_net
helper.make_node("Reshape", ["SubgraphRoot", "concat_out"], ["Result"], "reshape"), ], "Reshape_Fusion", #name [ # inputs helper.make_tensor_value_info('SubgraphRoot', TensorProto.FLOAT, ['unk_0', 256, 'unk_2', 'unk_3']), ], [ # outputs helper.make_tensor_value_info( 'Result', TensorProto.FLOAT, ['unk_1', 128, 'unk_2', 'unk_3', 'unk_4']), helper.make_tensor_value_info('gather3_out', TensorProto.INT64, []), ], [ # initializers helper.make_tensor('a1', TensorProto.INT64, [1], [128]), helper.make_tensor('a4', TensorProto.INT64, [1], [-1]), helper.make_tensor('indices0', TensorProto.INT64, [], [0]), helper.make_tensor('indices2', TensorProto.INT64, [], [2]), helper.make_tensor('indices3', TensorProto.INT64, [], [3]), ]) save_model(graph, 'reshape_fusion_internal_nodes_reused.onnx') graph = helper.make_graph( [ # nodes helper.make_node("Shape", ["SubgraphRoot"], ["shape0_out"], "shape0"), helper.make_node("Shape", ["SubgraphRoot"], ["shape1_out"], "shape1"), helper.make_node("Gather", ["shape0_out", "indices0"], ["gather0_out"], "gather0", axis=0),
def version_9(cls, ctx, node, **kwargs): data_inp = node.input[0] segment_inp = node.input[1] data_shape = ctx.get_shape(data_inp) data_rank = len(data_shape) if data_shape is not None else None data_np_dtype = utils.map_onnx_to_numpy_type(ctx.get_dtype(data_inp)) seg_np_dtype = utils.map_onnx_to_numpy_type(ctx.get_dtype(segment_inp)) data_is_float = np.dtype(data_np_dtype).kind == 'f' data_is_int = np.dtype(data_np_dtype).kind == 'i' utils.make_sure(data_is_float or data_is_int, "dtype for Segment ops must be float or int") if node.type == "SegmentSum": onnx_op = "ReduceSum" identity_value = np.array(0, dtype=data_np_dtype) elif node.type == "SegmentProd": onnx_op = "ReduceProd" identity_value = np.array(1, dtype=data_np_dtype) elif node.type == "SegmentMax": onnx_op = "ReduceMax" if data_is_float: identity_value = np.array('-inf', dtype=data_np_dtype) else: identity_value = np.iinfo(data_np_dtype).min elif node.type == "SegmentMin": onnx_op = "ReduceMin" if data_is_float: identity_value = np.array('inf', dtype=data_np_dtype) else: identity_value = np.iinfo(data_np_dtype).max max_segment = ctx.make_node("ReduceMax", [segment_inp], attr={ 'axes': [0], 'keepdims': 0 }) one_const = ctx.make_const(utils.make_name("const_one"), np.array(1, dtype=seg_np_dtype)) identity_const = ctx.make_const(utils.make_name("const_identity"), identity_value) num_segments = ctx.make_node( "Add", [max_segment.output[0], one_const.output[0]]) # ORT doesn't support bool for OneHot so we use float32 and cast to bool onehot_values = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1], dtype=np.float32)) one_hot_node = ctx.make_node( "OneHot", [segment_inp, num_segments.output[0], onehot_values.output[0]], attr={'axis': 0}) one_hot_bool = ctx.make_node("Cast", [one_hot_node.output[0]], attr={"to": onnx_pb.TensorProto.BOOL}) one_hot_unsqueeze = one_hot_bool if data_rank is None: # Unsqueeze requires known rank, but we can use Reshape if rank is unknown shape_node = ctx.make_node("Shape", [data_inp]) rank_node = ctx.make_node("Shape", [shape_node.output[0]]) one_const_int64 = ctx.make_const(utils.make_name("const_one"), np.array([1], dtype=np.int64)) num_unsqueeze_dims = ctx.make_node( "Sub", [rank_node.output[0], one_const_int64.output[0]]) one_tensor = helper.make_tensor("value", onnx_pb.TensorProto.INT64, dims=[1], vals=[1]) unsqueeze_dims = ctx.make_node( "ConstantOfShape", inputs=[num_unsqueeze_dims.output[0]], attr={"value": one_tensor}) double_zero_const = ctx.make_const( utils.make_name("double_zero"), np.array([0, 0], dtype=np.int64)) expanded_shape = ctx.make_node( "Concat", [double_zero_const.output[0], unsqueeze_dims.output[0]], attr={'axis': 0}) one_hot_unsqueeze = ctx.make_node( "Reshape", [one_hot_bool.output[0], expanded_shape.output[0]]) elif data_rank > 1: new_dims = list(range(2, 2 + data_rank - 1)) one_hot_unsqueeze = ctx.make_node("Unsqueeze", [one_hot_bool.output[0]], attr={'axes': new_dims}) mul_node = ctx.make_node( "Where", [one_hot_unsqueeze.output[0], data_inp, identity_const.output[0]]) shapes = node.output_shapes dtypes = node.output_dtypes ctx.remove_node(node.name) ctx.make_node(onnx_op, [mul_node.output[0]], attr={ 'axes': [1], 'keepdims': 0 }, name=node.name, outputs=node.output, shapes=shapes, dtypes=dtypes)
def create_attention_node(self, mask_index: str, q_matmul: NodeProto, k_matmul: NodeProto, v_matmul: NodeProto, q_add: NodeProto, k_add: NodeProto, v_add: NodeProto, num_heads: int, hidden_size: int, input: str, output: str, add_qk_str: str) -> Union[NodeProto, None]: """ Create an Attention node. Args: mask_index (str): mask input q_matmul (NodeProto): MatMul node in fully connection for Q k_matmul (NodeProto): MatMul node in fully connection for K v_matmul (NodeProto): MatMul node in fully connection for V q_add (NodeProto): Add bias node in fully connection for Q k_add (NodeProto): Add bias node in fully connection for K v_add (NodeProto): Add bias node in fully connection for V num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning. hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning. input (str): input name output (str): output name Returns: Union[NodeProto, None]: the node created or None if failed. """ assert num_heads > 0 if hidden_size > 0 and (hidden_size % num_heads) != 0: logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}") return None q_weight = self.model.get_initializer(q_matmul.input[1]) k_weight = self.model.get_initializer(k_matmul.input[1]) v_weight = self.model.get_initializer(v_matmul.input[1]) q_bias = self.model.get_initializer(q_add.input[1]) or self.model.get_initializer(q_add.input[0]) k_bias = self.model.get_initializer(k_add.input[1]) or self.model.get_initializer(k_add.input[0]) v_bias = self.model.get_initializer(v_add.input[1]) or self.model.get_initializer(v_add.input[0]) if q_weight is None: print(f"{q_matmul.input[1]} is not initializer. Please set do_constant_folding=True in torch.onnx.export") return None if not (k_weight and v_weight and q_bias and k_bias): return None qw = NumpyHelper.to_array(q_weight) kw = NumpyHelper.to_array(k_weight) vw = NumpyHelper.to_array(v_weight) # assert q and k have same shape as expected assert qw.shape == kw.shape qw_in_size = qw.shape[0] kw_in_size = kw.shape[0] vw_in_size = vw.shape[0] assert qw_in_size == kw_in_size == vw_in_size if hidden_size > 0 and hidden_size != qw_in_size: logger.debug( f"Input hidden size {hidden_size} is not same as weight matrix dimension of q,k,v paths {qw_in_size}, provide correct input hidden size or pass 0" ) return None is_qkv_diff_dims = False if qw.shape != vw.shape: is_qkv_diff_dims = True # All the matrices can have the same shape or q, k matrics can have the same shape with v being different # For 2d weights, the shapes would be [in_size, out_size]. # For 3d weights, shape would be [in_size, a, b] where a*b = out_size qw_out_size = np.prod(qw.shape[1:]) kw_out_size = np.prod(qw.shape[1:]) vw_out_size = np.prod(vw.shape[1:]) qkv_weight_dim = 0 if is_qkv_diff_dims: qkv_weight = np.concatenate((qw, kw, vw), axis=1) qkv_weight_dim = qw_out_size + kw_out_size + vw_out_size else: qkv_weight = np.stack((qw, kw, vw), axis=1) qkv_weight_dim = 3 * qw_out_size qb = NumpyHelper.to_array(q_bias) kb = NumpyHelper.to_array(k_bias) vb = NumpyHelper.to_array(v_bias) q_bias_shape = np.prod(qb.shape) k_bias_shape = np.prod(kb.shape) v_bias_shape = np.prod(vb.shape) assert q_bias_shape == k_bias_shape == qw_out_size assert v_bias_shape == vw_out_size qkv_bias_dim = 0 if is_qkv_diff_dims: qkv_bias = np.concatenate((qb, kb, vb), axis=0) qkv_bias_dim = q_bias_shape + k_bias_shape + v_bias_shape else: qkv_bias = np.stack((qb, kb, vb), axis=0) qkv_bias_dim = 3 * q_bias_shape attention_node_name = self.model.create_node_name('Attention') weight = helper.make_tensor(name=attention_node_name + '_qkv_weight', data_type=TensorProto.FLOAT, dims=[qw_in_size, qkv_weight_dim], vals=qkv_weight.flatten().tolist()) # Sometimes weights and bias are stored in fp16 if q_weight.data_type == 10: weight.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(weight).astype(np.float16), weight.name)) self.model.add_initializer(weight, self.this_graph_name) bias = helper.make_tensor(name=attention_node_name + '_qkv_bias', data_type=TensorProto.FLOAT, dims=[qkv_bias_dim], vals=qkv_bias.flatten().tolist()) if q_bias.data_type == 10: bias.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(bias).astype(np.float16), bias.name)) self.model.add_initializer(bias, self.this_graph_name) attention_inputs = [input, attention_node_name + '_qkv_weight', attention_node_name + '_qkv_bias'] if mask_index is not None: attention_inputs.append(mask_index) if add_qk_str is not None: attention_inputs.append("") attention_inputs.append(add_qk_str) attention_node = helper.make_node('Attention', inputs=attention_inputs, outputs=[output], name=attention_node_name) attention_node.domain = "com.microsoft" attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)]) if is_qkv_diff_dims: attention_node.attribute.extend( [helper.make_attribute("qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size])]) return attention_node
def _make_module(direction, hidden_size, seq_length, batch_size, input_size, bias, sequence_lens, initial_h, initial_c, Y, Y_h, Y_c): nodes_inputs = [] nodes_outputs = [] initializers = [] attributes_dict = {} nodes = [] graph_inputs = [] graph_outputs = [] num_directions = 2 if direction == 'bidirectional' else 1 if direction is not None: attributes_dict['direction'] = direction attributes_dict['hidden_size'] = hidden_size # input input_shape = [seq_length, batch_size, input_size] input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) nodes_inputs.append('input') graph_inputs.append(input) w_shape = [num_directions, 4 * hidden_size, input_size] w_tensor = helper.make_tensor('W', TensorProto.FLOAT, dims=w_shape, vals=np.random.rand(*w_shape).astype( np.float32).flatten().tolist()) nodes_inputs.append('W') initializers.append(w_tensor) r_shape = [num_directions, 4 * hidden_size, hidden_size] r_tensor = helper.make_tensor('R', TensorProto.FLOAT, dims=r_shape, vals=np.random.rand(*r_shape).astype( np.float32).flatten().tolist()) nodes_inputs.append('R') initializers.append(r_tensor) # bias if bias is None: nodes_inputs.append('') else: bias_shape = [num_directions, 8 * hidden_size] bias_tensor = helper.make_tensor( 'B', TensorProto.FLOAT, dims=bias_shape, vals=np.random.rand(*bias_shape).astype( np.float32).flatten().tolist()) nodes_inputs.append('B') initializers.append(bias_tensor) if sequence_lens is None: nodes_inputs.append('') else: sequence_lens_shape = [batch_size] sequence_lens_tensor = helper.make_tensor( 'sequence_lens', TensorProto.INT32, dims=sequence_lens_shape, vals=np.full(sequence_lens_shape, seq_length).flatten().tolist()) nodes_inputs.append('sequence_lens') initializers.append(sequence_lens_tensor) if initial_h is None: nodes_inputs.append('') else: initial_h_shape = [num_directions, batch_size, hidden_size] initial_h_tensor = helper.make_tensor( 'initial_h', TensorProto.FLOAT, dims=initial_h_shape, vals=np.random.rand(*initial_h_shape).astype( np.float32).flatten().tolist()) nodes_inputs.append('initial_h') initializers.append(initial_h_tensor) if initial_c is None: nodes_inputs.append('') else: initial_c_shape = [num_directions, batch_size, hidden_size] initial_c_tensor = helper.make_tensor( 'initial_c', TensorProto.FLOAT, dims=initial_c_shape, vals=np.random.rand(*initial_c_shape).astype( np.float32).flatten().tolist()) nodes_inputs.append('initial_c') initializers.append(initial_c_tensor) # output if Y is None: nodes_outputs.append('') else: output_shape = [seq_length, num_directions, batch_size, hidden_size] output = helper.make_tensor_value_info('Y', TensorProto.FLOAT, output_shape) nodes_outputs.append('Y') graph_outputs.append(output) if Y_h is None: nodes_outputs.append('') else: h_shape = [num_directions, batch_size, hidden_size] y_h = helper.make_tensor_value_info('Y_h', TensorProto.FLOAT, h_shape) nodes_outputs.append('Y_h') graph_outputs.append(y_h) if Y_c is None: nodes_outputs.append('') else: c_shape = [num_directions, batch_size, hidden_size] y_c = helper.make_tensor_value_info('Y_c', TensorProto.FLOAT, c_shape) nodes_outputs.append('Y_c') graph_outputs.append(y_c) # lstm node node = onnx.helper.make_node('LSTM', inputs=nodes_inputs, outputs=nodes_outputs, **attributes_dict) nodes.append(node) # graph graph_def = helper.make_graph(nodes, 'test-model', graph_inputs, graph_outputs, initializer=initializers) model_def = helper.make_model(graph_def, producer_name='onnx') return model_def
def make_attribute( key, # type: Text value, # type: Any dtype=None, # type: [np.float32, np.float64] domain='', # type: Text doc_string=None # type: Optional[Text] ): # type: (...) -> AttributeProto """Makes an AttributeProto based on the value type.""" attr = AttributeProto() attr.name = key if doc_string: attr.doc_string = doc_string is_iterable = isinstance(value, collections.abc.Iterable) bytes_or_false = _to_bytes_or_false(value) use_float64 = dtype == np.float64 and domain not in ('', 'ai.onnx.ml') if isinstance(value, np.float32): attr.f = value attr.type = AttributeProto.FLOAT elif isinstance(value, (float, np.float64)): if use_float64: attr.type = AttributeProto.TENSOR attr.t.CopyFrom( make_tensor(key, TensorProto.DOUBLE, (1, ), [value])) else: attr.f = value attr.type = AttributeProto.FLOAT elif isinstance(value, np.int32): attr.i = value attr.type = AttributeProto.INT elif isinstance(value, np.int64): attr.i = value attr.type = AttributeProto.INT elif isinstance(value, numbers.Integral): attr.i = value attr.type = AttributeProto.INT # string elif bytes_or_false is not False: assert isinstance(bytes_or_false, bytes) attr.s = bytes_or_false attr.type = AttributeProto.STRING elif isinstance(value, TensorProto): attr.t.CopyFrom(value) attr.type = AttributeProto.TENSOR elif (SparseTensorProto is not None and isinstance(value, SparseTensorProto)): attr.sparse_tensor.CopyFrom(value) attr.type = AttributeProto.SPARSE_TENSOR elif isinstance(value, GraphProto): attr.g.CopyFrom(value) attr.type = AttributeProto.GRAPH # third, iterable cases elif is_iterable: byte_array = [_to_bytes_or_false(v) for v in value] if all(isinstance(v, np.float32) for v in value): attr.floats.extend(value) attr.type = AttributeProto.FLOATS elif all(isinstance(v, np.float64) for v in value): if use_float64: attr.type = AttributeProto.TENSOR attr.t.CopyFrom( make_tensor(key, TensorProto.DOUBLE, (len(value), ), value)) else: attr.floats.extend(value) attr.type = AttributeProto.FLOATS elif all(isinstance(v, float) for v in value): if use_float64: attr.type = AttributeProto.TENSOR attr.t.CopyFrom( make_tensor(key, TensorProto.DOUBLE, (len(value), ), value)) else: attr.floats.extend(value) attr.type = AttributeProto.FLOATS elif all(isinstance(v, np.int32) for v in value): attr.ints.extend(int(v) for v in value) attr.type = AttributeProto.INTS elif all(isinstance(v, np.int64) for v in value): attr.ints.extend(int(v) for v in value) attr.type = AttributeProto.INTS elif all(isinstance(v, numbers.Integral) for v in value): # Turn np.int32/64 into Python built-in int. attr.ints.extend(int(v) for v in value) attr.type = AttributeProto.INTS elif all( map(lambda bytes_or_false: bytes_or_false is not False, byte_array)): attr.strings.extend(cast(List[bytes], byte_array)) attr.type = AttributeProto.STRINGS elif all(isinstance(v, TensorProto) for v in value): attr.tensors.extend(value) attr.type = AttributeProto.TENSORS elif (SparseTensorProto is not None and all(isinstance(v, SparseTensorProto) for v in value)): attr.sparse_tensors.extend(value) attr.type = AttributeProto.SPARSE_TENSORS elif all(isinstance(v, GraphProto) for v in value): attr.graphs.extend(value) attr.type = AttributeProto.GRAPHS else: raise ValueError( "You passed in an iterable attribute but I cannot figure out " "its applicable type, key='{}', type={}, dtype={}, " "types={}.".format( key, type(value), dtype, [type(_) for _, __ in zip(value, range(0, 5))])) else: raise ValueError( "Value '{}' is not valid attribute data type for attribute " "'{}'.".format(value, key)) return attr
def convertToOnnx(net, onnxFile): inputShape = ['N', net[0]['c'], net[0]['h'], net[0]['w']] X = helper.make_tensor_value_info( 'X0', TensorProto.FLOAT, inputShape) # input tensor to the next layer inputs = [X] # graph inputs nodes = [] # graph nodes inits = [] # graph parameters (weights, biases) outputs = [] # graph outputs tensors = {} # temp storage for outputs in case they are used again for n, sec in enumerate(net): if sec['type'] in ['region', 'yolo']: outputs.append(X) elif sec['type'] == 'convolutional': # The conv and batch norm layers could be fused. The DarkNet formula for both layers is: # out = ((W*X-mean)/(sqrt(var)+.000001))*scale + bias # A fused conv layer would be: # out = W*X*S + bias-mean*S, S=scale/(sqrt(var)+.000001) W = helper.make_tensor( 'W' + str(n), TensorProto.FLOAT, [sec['filters'], sec['c'], sec['size'], sec['size']], sec['weights']) B = helper.make_tensor('B' + str(n), TensorProto.FLOAT, [sec['filters']], sec['biases']) Y = helper.make_tensor_value_info( 'Y' + str(n), TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) nodeInputs = [X.name, W.name] if not sec['batch_normalize']: nodeInputs.append(B.name) node = helper.make_node('Conv', nodeInputs, [Y.name], 'Conv' + str(n), kernel_shape=[sec['size'], sec['size']], pads=[ sec['padding'], sec['padding'], sec['padding'], sec['padding'] ], strides=[sec['stride'], sec['stride']]) nodes.append(node) inits.extend([W, B]) if sec['batch_normalize']: X = Y S = helper.make_tensor('scales' + str(n), TensorProto.FLOAT, [sec['filters']], sec['scales']) M = helper.make_tensor('mean' + str(n), TensorProto.FLOAT, [sec['filters']], sec['rolling_mean']) V = helper.make_tensor('var' + str(n), TensorProto.FLOAT, [sec['filters']], sec['rolling_variance']) Y = helper.make_tensor_value_info( 'Y' + str(n) + 'Norm', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node = onnx.helper.make_node( 'BatchNormalization', inputs=[X.name, S.name, B.name, M.name, V.name], outputs=[Y.name], name='BatchNormalization' + str(n), epsilon=0.000001) # DarkNet default nodes.append(node) inits.extend([S, M, V]) elif sec['type'] == 'maxpool': Y = helper.make_tensor_value_info( 'Y' + str(n), TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) pad1 = int(np.floor(sec['padding'] / 2)) pad2 = int(np.ceil(sec['padding'] / 2)) node = helper.make_node('MaxPool', [X.name], [Y.name], 'MaxPool' + str(n), kernel_shape=[sec['size'], sec['size']], pads=[pad1, pad1, pad2, pad2], strides=[sec['stride'], sec['stride']]) nodes.append(node) elif sec['type'] == 'route': if len( sec['layers'] ) == 1: # if there's only one layer being routed, just assign the tensor layerNum = sec['layers'][0] Y = tensors[net[layerNum]['outputName']] else: # if there are multiple layers, need to concatenate inputNames = [] for layerNum in sec['layers']: inputNames.append(net[layerNum]['outputName']) Y = helper.make_tensor_value_info( 'Y' + str(n), TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node = helper.make_node('Concat', inputNames, [Y.name], 'Concat' + str(n), axis=1) nodes.append(node) elif sec['type'] == 'upsample': Y = helper.make_tensor_value_info( 'Y' + str(n), TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) scale = sec['stride'] if scale < 0: scale = 1.0 / -scale Scales = helper.make_tensor('Scales' + str(n), TensorProto.FLOAT, [4], [1, 1, scale, scale]) node = helper.make_node( 'Resize', # this is the Resize-10 interface [X.name, Scales.name], [Y.name], 'Resize' + str(n)) nodes.append(node) inits.append(Scales) elif sec['type'] == 'reorg': # https://github.com/thtrieu/darkflow/issues/173 # channel_first = keras.layers.Permute((3, 1, 2))(input_tensor) # reshape_tensor = keras.layers.Reshape((c // (stride ** 2), h, stride, w, stride))(channel_first) # permute_tensor = keras.layers.Permute((3, 5, 1, 2, 4))(reshape_tensor) # target_tensor = keras.layers.Reshape((-1, h // stride, w // stride))(permute_tensor) # channel_last = keras.layers.Permute((2, 3, 1))(target_tensor) # return keras.layers.Reshape((h // stride, w // stride, -1))(channel_last) s = sec['stride'] h = sec['h'] w = sec['w'] c = sec['c'] if 0: Shape1 = helper.make_tensor('Reorg' + str(n) + '_1Shape', TensorProto.INT64, [6], [-1, c // (s**2), h, s, w, s]) Shape3 = helper.make_tensor('Reorg' + str(n) + '_3Shape', TensorProto.INT64, [4], [-1, c * (s**2), h // s, w // s]) Y1name = 'Reorg' + str(n) + '_1Y' Y2name = 'Reorg' + str(n) + '_2Y' Y = helper.make_tensor_value_info( 'Reorg' + str(n) + '_3Y', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node1 = helper.make_node('Reshape', [X.name, Shape1.name], [Y1name], 'Reorg' + str(n) + '_1Reshape') node2 = helper.make_node('Transpose', [Y1name], [Y2name], name='Reorg' + str(n) + '_2Transpose', perm=[0, 3, 5, 1, 2, 4]) node3 = helper.make_node('Reshape', [Y2name, Shape3.name], [Y.name], 'Reorg' + str(n) + '_3Reshape') nodes.extend([node1, node2, node3]) inits.extend([Shape1, Shape3]) elif 0: Shape1 = helper.make_tensor('Reorg' + str(n) + '_1Shape', TensorProto.INT64, [5], [-1, c // (s**2) * h, s, w, s]) Shape3 = helper.make_tensor('Reorg' + str(n) + '_3Shape', TensorProto.INT64, [4], [-1, c * (s**2), h // s, w // s]) Y1name = 'Reorg' + str(n) + '_1Y' Y2name = 'Reorg' + str(n) + '_2Y' Y = helper.make_tensor_value_info( 'Reorg' + str(n) + '_3Y', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node1 = helper.make_node('Reshape', [X.name, Shape1.name], [Y1name], 'Reorg' + str(n) + '_1Reshape') node2 = helper.make_node('Transpose', [Y1name], [Y2name], name='Reorg' + str(n) + '_2Transpose', perm=[0, 2, 4, 1, 3]) node3 = helper.make_node('Reshape', [Y2name, Shape3.name], [Y.name], 'Reorg' + str(n) + '_3Reshape') nodes.extend([node1, node2, node3]) inits.extend([Shape1, Shape3]) elif 0: Shape1 = helper.make_tensor('Reorg' + str(n) + '_1Shape', TensorProto.INT64, [5], [-1, c // (s**2) * h, s, w, s]) Shape5 = helper.make_tensor('Reorg' + str(n) + '_5Shape', TensorProto.INT64, [4], [-1, c * (s**2), h // s, w // s]) Y1name = 'Reorg' + str(n) + '_1Y' Y2name = 'Reorg' + str(n) + '_2Y' Y3name = 'Reorg' + str(n) + '_3Y' Y4name = 'Reorg' + str(n) + '_4Y' Y = helper.make_tensor_value_info( 'Reorg' + str(n) + '_5Y', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node1 = helper.make_node('Reshape', [X.name, Shape1.name], [Y1name], 'Reorg' + str(n) + '_1Reshape') node2 = helper.make_node('Transpose', [Y1name], [Y2name], name='Reorg' + str(n) + '_2Transpose', perm=[0, 1, 2, 4, 3]) node3 = helper.make_node('Transpose', [Y2name], [Y3name], name='Reorg' + str(n) + '_3Transpose', perm=[0, 2, 1, 3, 4]) node4 = helper.make_node('Transpose', [Y3name], [Y4name], name='Reorg' + str(n) + '_4Transpose', perm=[0, 1, 3, 2, 4]) node5 = helper.make_node('Reshape', [Y4name, Shape5.name], [Y.name], 'Reorg' + str(n) + '_5Reshape') nodes.extend([node1, node2, node3, node4, node5]) inits.extend([Shape1, Shape5]) else: Shape1 = helper.make_tensor('Reorg' + str(n) + '_1Shape', TensorProto.INT64, [4], [-1, c // (s**2) * h * s, w, s]) Shape3 = helper.make_tensor('Reorg' + str(n) + '_3Shape', TensorProto.INT64, [4], [-1, c // (s**2) * h, s, s * w]) Shape5 = helper.make_tensor('Reorg' + str(n) + '_5Shape', TensorProto.INT64, [5], [-1, s, c // (s**2) * h, s, w]) Shape7 = helper.make_tensor('Reorg' + str(n) + '_7Shape', TensorProto.INT64, [4], [-1, c * (s**2), h // s, w // s]) Y1name = 'Reorg' + str(n) + '_1Y' Y2name = 'Reorg' + str(n) + '_2Y' Y3name = 'Reorg' + str(n) + '_3Y' Y4name = 'Reorg' + str(n) + '_4Y' Y5name = 'Reorg' + str(n) + '_5Y' Y6name = 'Reorg' + str(n) + '_6Y' Y = helper.make_tensor_value_info( 'Reorg' + str(n) + '_7Y', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node1 = helper.make_node('Reshape', [X.name, Shape1.name], [Y1name], 'Reorg' + str(n) + '_1Reshape') node2 = helper.make_node('Transpose', [Y1name], [Y2name], name='Reorg' + str(n) + '_2Transpose', perm=[0, 1, 3, 2]) node3 = helper.make_node('Reshape', [Y2name, Shape3.name], [Y3name], 'Reorg' + str(n) + '_3Reshape') node4 = helper.make_node('Transpose', [Y3name], [Y4name], name='Reorg' + str(n) + '_4Transpose', perm=[0, 2, 1, 3]) node5 = helper.make_node('Reshape', [Y4name, Shape5.name], [Y5name], 'Reorg' + str(n) + '_5Reshape') node6 = helper.make_node('Transpose', [Y5name], [Y6name], name='Reorg' + str(n) + '_6Transpose', perm=[0, 1, 3, 2, 4]) node7 = helper.make_node('Reshape', [Y6name, Shape7.name], [Y.name], 'Reorg' + str(n) + '_7Reshape') nodes.extend([node1, node2, node3, node4, node5, node6, node7]) inits.extend([Shape1, Shape3, Shape5, Shape7]) elif sec['type'] == 'shortcut': # shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); Y = helper.make_tensor_value_info( 'Y' + str(n), TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node = helper.make_node('Add', [X.name, net[sec['index']]['outputName']], [Y.name], 'Add' + str(n)) nodes.append(node) else: raise NameError('unknown section type: {}'.format(sec['type'])) if 'activation' in sec: if sec['activation'] == 'leaky': X = Y Y = helper.make_tensor_value_info( 'Y' + str(n) + 'Relu', TensorProto.FLOAT, ['N', sec['out_c'], sec['out_h'], sec['out_w']]) node = onnx.helper.make_node( 'LeakyRelu', inputs=[X.name], outputs=[Y.name], name='LeakyRelu' + str(n), alpha=0.1) # DarkNet default is 0.1 nodes.append(node) elif sec['activation'] != 'linear': raise NameError('unsupported activation type: {}'.format( sec['activation'])) tensors[ Y. name] = Y # store this in case it is used again in a "route" or "shortcut" layer sec['outputName'] = Y.name X = Y # this layer's output is next layer's input graph_def = helper.make_graph(nodes, 'darknet conversion', inputs, outputs, inits) opset = helper.make_operatorsetid('', 10) model_def = helper.make_model(graph_def, producer_name='darknetToOnnx', opset_imports=[opset]) onnx.checker.check_model(model_def) onnx.save_model(model_def, onnxFile) print('saved onnx file: ' + onnxFile)
def _sample_0_elem_tensor(self): # type: () -> TensorProto np_array = np.random.randn(0, 3).astype(np.float32) return helper.make_tensor(name='test', data_type=TensorProto.FLOAT, dims=(0, 3), vals=np_array.reshape(0).tolist())
def create_resize_net(self, input_shape, output_shape, scales, sizes, coordinate_transformation_mode, cubic_coeff_a, mode, nearest_mode, precision, ir_version): import onnx from onnx import helper from onnx import TensorProto input_rank = len(input_shape) roi_node = onnx.helper.make_node( 'Constant', inputs=[], outputs=['roi'], value=helper.make_tensor( name='roi_consts', data_type=TensorProto.FLOAT, dims=[2 * input_rank], vals=np.array([*np.zeros(input_rank), *np.ones(input_rank)]) ) ) onnx_scales = scales if scales is None: onnx_scales = np.array(output_shape).astype(np.float) / np.array(input_shape).astype( np.float) scales_node = onnx.helper.make_node( 'Constant', inputs=[], outputs=['scales'], value=helper.make_tensor( name='scales_const', data_type=TensorProto.FLOAT, dims=[len(output_shape)], vals=onnx_scales ) ) nodes_list = [roi_node, scales_node] inputs_list = ['input', 'roi', 'scales'] if sizes is not None: sizes_node = onnx.helper.make_node( 'Constant', inputs=[], outputs=['sizes'], value=helper.make_tensor( name='sizes_const', data_type=TensorProto.INT64, dims=[len(output_shape)], vals=sizes ) ) nodes_list.append(sizes_node) inputs_list.append('sizes') args = dict() onnx_mode = mode or 'nearest' onnx_nearest_mode = nearest_mode or 'round_prefer_floor' cube_coeff = -0.75 if cubic_coeff_a is None else cubic_coeff_a onnx_coordinate_transformation_mode = coordinate_transformation_mode or 'half_pixel' args['nearest_mode'] = onnx_nearest_mode args['mode'] = onnx_mode args['cubic_coeff_a'] = cube_coeff args['coordinate_transformation_mode'] = onnx_coordinate_transformation_mode x = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) y = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) resize_node = onnx.helper.make_node( 'Resize', inputs=inputs_list, outputs=['output'], **args, ) nodes_list.append(resize_node) graph_def = onnx.helper.make_graph(nodes_list, 'test_model', [x], [y]) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model') onnx.checker.check_model(onnx_net) # # Create reference IR net # ref_net = None if check_ir_version(10, None, ir_version): if sizes is None and scales is None: return onnx_net, ref_net input_shape_as_array = int64_array(input_shape) if sizes is not None and scales is not None: shape_calculation_mode = 'sizes' sizes_value = int64_array(sizes) scales_value = np.array(scales).astype(np.float) elif sizes is not None and scales is None: shape_calculation_mode = 'sizes' sizes_value = int64_array(sizes) scales_value = sizes_value / input_shape_as_array else: shape_calculation_mode = 'scales' scales_value = np.array(scales).astype(np.float) sizes_value = np.floor(input_shape_as_array * scales_value + 1e-5).astype(np.int64) if precision == 'FP16': sizes_value = sizes_value.astype(np.float16) scales_value = scales_value.astype(np.float16) interp_mode = convert_onnx_mode(onnx_mode) interp_attrs = { 'type': 'Interpolate', 'kind': 'op', 'mode': interp_mode, 'shape_calculation_mode': shape_calculation_mode, 'coordinate_transformation_mode': onnx_coordinate_transformation_mode, 'nearest_mode': onnx_nearest_mode, 'antialias': 0, 'cube_coeff': cube_coeff, 'pads_begin': np.zeros(input_rank).astype(np.int64), 'pads_end': np.zeros(input_rank).astype(np.int64), 'version': 'opset4' } if shape_calculation_mode == 'scales': ref_net = create_ref_net_in_scales_mode(precision, input_shape_as_array, output_shape, sizes_value, scales_value, interp_attrs) else: ref_net = create_ref_net_in_sizes_mode(precision, input_shape_as_array, output_shape, sizes_value, scales_value, interp_attrs) return onnx_net, ref_net
def make_init(name, dtype, tensor): return helper.make_tensor(name=name, data_type=dtype, dims=tensor.shape, vals=tensor.reshape(tensor.size).tolist())
def create_net_const(self, shape, op, precision, ir_version): # # Create ONNX model # import onnx from onnx import helper from onnx import TensorProto assert op in [ 'Sin', 'Sinh', 'Asin', 'Cos', 'Cosh', 'Acos', 'Tan', 'Tanh', 'Atan' ] concat_axis = 0 output_shape = shape.copy() output_shape[concat_axis] *= 2 input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) constant = np.random.rand(*shape).astype(np.float) node_const_def = onnx.helper.make_node( 'Constant', inputs=[], outputs=['const'], value=helper.make_tensor( name='const_tensor', data_type=TensorProto.FLOAT, dims=constant.shape, vals=constant.flatten(), ), ) node_def = onnx.helper.make_node(op, inputs=['const'], outputs=['res']) node_concat_def = onnx.helper.make_node('Concat', inputs=['input', 'res'], outputs=['output'], axis=concat_axis) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_const_def, node_def, node_concat_def], 'test_model', [input], [output], ) # Create the model (ModelProto) onnx_net = helper.make_model(graph_def, producer_name='test_model') # # Create reference IR net # if op == 'Sin': constant = np.sin(constant) elif op == 'Sinh': constant = np.sinh(constant) elif op == 'Asin': constant = np.arcsin(constant) elif op == 'Cos': constant = np.cos(constant) elif op == 'Cosh': constant = np.cosh(constant) elif op == 'Acos': constant = np.arccos(constant) elif op == 'Tan': constant = np.tan(constant) elif op == 'Tanh': constant = np.tanh(constant) elif op == 'Atan': constant = np.arctan(constant) if precision == 'FP16': constant = constant.astype(np.float16) ref_net = None if check_ir_version(10, None, ir_version): nodes_attributes = { 'input': { 'kind': 'op', 'type': 'Parameter' }, 'input_data': { 'shape': shape, 'kind': 'data' }, 'input_const_data': { 'kind': 'data', 'value': constant.flatten() }, 'const': { 'kind': 'op', 'type': 'Const' }, 'const_data': { 'shape': shape, 'kind': 'data' }, 'concat': { 'kind': 'op', 'type': 'Concat', 'axis': concat_axis }, 'concat_data': { 'shape': output_shape, 'kind': 'data' }, 'result': { 'kind': 'op', 'type': 'Result' } } ref_net = build_graph(nodes_attributes, [('input', 'input_data'), ('input_const_data', 'const'), ('const', 'const_data'), ('input_data', 'concat'), ('const_data', 'concat'), ('concat', 'concat_data'), ('concat_data', 'result')]) return onnx_net, ref_net
def _sample_float_tensor(self): np_array = np.random.randn(2, 3).astype(np.float32) return helper.make_tensor(name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tolist())
# Options: # # https://github.com/onnx/onnx/blob/master/docs/Operators.md import onnx from onnx_tf.backend import prepare import onnx.helper as oh import numpy as np # build based on number of actions... cdfmat_arr=np.array([[1, 1, 1], [0, 1, 1], [0, 0, 1]]) cdfmat=oh.make_tensor("cdfmat", onnx.TensorProto.FLOAT, [3,3], cdfmat_arr.flatten().astype(float)) epsilon=oh.make_tensor("epsilon", onnx.TensorProto.FLOAT, [1], np.asarray([0.3])) input_tensors = [oh.make_tensor_value_info("scores", onnx.TensorProto.FLOAT, [1,3]), oh.make_tensor_value_info("cdfmat", onnx.TensorProto.FLOAT, [3,3]), oh.make_tensor_value_info("epsilon", onnx.TensorProto.FLOAT, [1])] output_tensors = [oh.make_tensor_value_info("pdf", onnx.TensorProto.FLOAT, [3]), # oh.make_tensor_value_info("top_action", onnx.TensorProto.INT32, [1]), oh.make_tensor_value_info("chosen_action", onnx.TensorProto.INT32, [1]), oh.make_tensor_value_info("cdf", onnx.TensorProto.FLOAT, [3])] one = oh.make_node( 'Constant', inputs=[], outputs=['one'], value=oh.make_tensor('one_tensor', onnx.TensorProto.FLOAT, [1], np.asarray([1]))) one_int = oh.make_node(