def _create_gemm(cls, n): (A, B, C) = n.inputs (Y,) = n.outputs alpha = n.attrs.get('alpha', 1.) beta = n.attrs.get('beta', 1.) ops = [] if alpha != 1: scaled_A = dummy_name() ops.append(core.CreateOperator('Scale', [A], [scaled_A], scale=alpha)) A = scaled_A if beta != 1: scaled_C = dummy_name() ops.append(core.CreateOperator('Scale', [C], [scaled_C], scale=beta)) C = scaled_C AB = dummy_name() ops.append(core.CreateOperator('MatMul', [A, B], [AB], trans_a=n.attrs.get('transA', 0), trans_b=n.attrs.get('transB', 0))) ops.append(core.CreateOperator('Add', [AB, C], [Y], broadcast=n.attrs.get('broadcast', 0))) return ops
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) cls._inplace_rewrite(init_model.graph) predict_model = ModelProto() predict_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) cls._inplace_rewrite(predict_model.graph) init_net = caffe2_pb2.NetDef() predict_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' predict_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend(cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name(cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(predict_model.graph)) for net, model in ( (init_net, init_model), (predict_net, predict_model) ): net.device_option.CopyFrom(device_option) for node in model.graph.node: net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version)) net.external_output.extend( value_info.name for value_info in model.graph.output) net.external_input.extend( value_info.name for value_info in model.graph.input) return init_net, predict_net
def _create_channel_shuffle(cls, op_def, shapes): x, = op_def.input y, = op_def.output n, c, h, w = shapes[x] args = {arg.name: arg for arg in op_def.arg} g = args['group'].i assert c % g == 0 nodes = [] tmp1 = dummy_name() nodes.append(helper.make_node( 'Reshape', inputs=[x], outputs=[tmp1], shape=[n, g, c // g, h, w], )) tmp2 = dummy_name() nodes.append(helper.make_node( 'Transpose', inputs=[tmp1], outputs=[tmp2], perm=[0, 2, 1, 3, 4], )) nodes.append(helper.make_node( 'Reshape', inputs=[tmp2], outputs=[y], shape=[n, c, h, w], )) return nodes
def _create_gemm(cls, init_model, pred_model, n, opset_version): (A, B, C) = n.inputs (Y, ) = n.outputs alpha = n.attrs.get('alpha', 1.) beta = n.attrs.get('beta', 1.) ops = [] if alpha != 1: scaled_A = dummy_name() ops.append( core.CreateOperator('Scale', [A], [scaled_A], scale=alpha)) A = scaled_A if beta != 1: scaled_C = dummy_name() ops.append( core.CreateOperator('Scale', [C], [scaled_C], scale=beta)) C = scaled_C trans_a = n.attrs.get('transA', 0) trans_b = n.attrs.get('transB', 0) broadcast = n.attrs.get('broadcast', 0) if not trans_a and trans_b and broadcast: ops.append(core.CreateOperator('FC', [A, B, C], [Y])) else: AB = dummy_name() ops.append( core.CreateOperator('MatMul', [A, B], [AB], trans_a=trans_a, trans_b=trans_b)) ops.append( core.CreateOperator('Add', [AB, C], [Y], broadcast=broadcast)) return ops
def _create_lstm(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert LSTMs without access to the full model" assert pred_model is not None, "cannot convert LSTMs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for LSTM input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input_blob, sequence_lens, [initial_h, initial_c], input_size, hidden_size, name, forward_only=True, return_params=True ) # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "_bias_i2h" Br = name + "_bias_gates" init_net.Slice(B, Bi, starts=[0*hidden_size], ends=[4*hidden_size]) init_net.Slice(B, Br, starts=[4*hidden_size], ends=[8*hidden_size]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W, params['input'] ['weights'], [(0, input_size)]), (R, params['recurrent']['weights'], [(0, hidden_size)]), (Bi, params['input'] ['biases'], []), (Br, params['recurrent']['biases'], [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_gemm(cls, op_def, shapes): x, w, b = op_def.input args = {arg.name: arg for arg in op_def.arg} y, = op_def.output x_shape = list(shapes[x]) nodes = [] if 'axis' in args: axis = args['axis'].i outer = np.prod(x_shape[:axis]).astype(int) inner = np.prod(x_shape[axis:]).astype(int) reshaped_x = dummy_name() nodes.append( helper.make_node( 'Reshape', inputs=[x], outputs=[reshaped_x], shape=[outer, inner], )) x = reshaped_x if 'axis_w' in args: axis_w = args['axis_w'].i w_shape = shapes[w] outer = np.prod(w_shape[:axis_w]).astype(int).item() inner = np.prod(w_shape[axis_w:]).astype(int).item() reshaped_w = dummy_name() nodes.append( helper.make_node( 'Reshape', inputs=[w], outputs=[reshaped_w], shape=[outer, inner], )) w = reshaped_w gemm_y_output = dummy_name() if 'axis' in args else y nodes.append( helper.make_node( 'Gemm', inputs=[x, w, b], outputs=[gemm_y_output], name=op_def.name, transB=1, broadcast=1, )) if 'axis' in args: axis = args['axis'].i nodes.append( helper.make_node( 'Reshape', inputs=[gemm_y_output], outputs=[y], shape=x_shape[:axis] + [-1], )) return nodes
def _create_slice(cls, op_def, shapes): node = cls._common_caffe2_op_to_onnx_node(op_def, shapes) attrs = {attr.name: attr for attr in node.attribute} nodes = [] data = node.input[0] n_dims = len(shapes[data]) if 'starts' in attrs: assert 'ends' in attrs assert len(node.input) == 1 starts = dummy_name() ends = dummy_name() axes = dummy_name() nodes.append( helper.make_node('Constant', inputs=[], outputs=[starts], value=helper.make_tensor( name=axes, data_type=TensorProto.INT64, dims=(n_dims, ), vals=attrs.pop('starts').ints, ))) nodes.append( helper.make_node('Constant', inputs=[], outputs=[ends], value=helper.make_tensor( name=axes, data_type=TensorProto.INT64, dims=(n_dims, ), vals=attrs.pop('ends').ints, ))) else: assert len(node.input) == 3 starts, ends = node.input[1:] axes = dummy_name() nodes.append( helper.make_node('Constant', inputs=[], outputs=[axes], value=helper.make_tensor( name=axes, data_type=TensorProto.INT32, dims=(n_dims, ), vals=list(range(n_dims)), ))) node.input[:] = [data, axes, starts, ends] del node.attribute[:] node.attribute.extend(attrs.values()) nodes.append(node) return nodes
def onnx_graph_to_caffe2_net(cls, graph_def, device="CPU", opset_version=_known_opset_version): device_option = get_device_option(Device(device)) cls._inplace_rewrite(graph_def) if graph_def.initializer: init_net = cls.onnx_initializer_to_caffe2_init_net( graph_def.initializer) initialized = {init.name for init in graph_def.initializer} else: init_net = caffe2_pb2.NetDef() initialized = set() dummy_name(cls._all_names_in_graph(graph_def) | initialized) predict_net = caffe2_pb2.NetDef() predict_net.name = graph_def.name for node in graph_def.node: predict_net.op.extend( cls._onnx_node_to_caffe2_op(node, opset_version)) predict_net.external_input.extend(value_info.name for value_info in graph_def.input) predict_net.external_output.extend(value_info.name for value_info in graph_def.output) # Caffe2 predictor requires all input blobs (including the # real model inputs) are initialized in init_net for value_info in graph_def.input: if value_info.name in initialized: continue op_def = caffe2_pb2.OperatorDef() op_def.output.extend([value_info.name]) op_def.type = 'GivenTensorFill' shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim) # TODO: Putting this in the init net will make it run faster, but it # causes some tests to fail... # shape = (1,) shape_arg = op_def.arg.add() shape_arg.name = 'shape' shape_arg.ints.extend(shape) values_arg = op_def.arg.add() values_arg.name = 'values' values_arg.floats.extend(np.ones(shape).flatten().tolist()) init_net.op.extend([op_def]) # Set the device option for the init_net and predict_net. init_net.device_option.CopyFrom(device_option) predict_net.device_option.CopyFrom(device_option) return init_net, predict_net
def test_dummy_name(self): dummy_name([]) names_1 = [dummy_name() for _ in range(3)] dummy_name([]) names_2 = [dummy_name() for _ in range(3)] self.assertEqual(names_1, names_2) dummy_name(names_1) names_3 = [dummy_name() for _ in range(3)] self.assertFalse(set(names_1) & set(names_3))
def _create_concat(cls, n, opset_version): # TODO: Caffe2 Concat has an extra output. It should be only # used when doing training, so we should change Caffe2 to allow # 1 output. op = cls._common_onnx_node_to_caffe2_op(n, opset_version) assert len(op.output) == 1 op.output.append(dummy_name()) return op
def onnx_graph_to_caffe2_net(cls, graph_def): cls._inplace_rewrite(graph_def) if graph_def.initializer: init_net = cls.onnx_initializer_to_caffe2_init_net( graph_def.initializer) initialized = {init.name for init in graph_def.initializer} else: init_net = caffe2_pb2.NetDef() initialized = set() dummy_name(cls._all_names_in_graph(graph_def) | initialized) predict_net = caffe2_pb2.NetDef() predict_net.name = graph_def.name for node in graph_def.node: predict_net.op.extend(cls._onnx_node_to_caffe2_op(node)) predict_net.external_input.extend(value_info.name for value_info in graph_def.input) predict_net.external_output.extend(value_info.name for value_info in graph_def.output) # Caffe2 predictor requires all input blobs (including the # real model inputs) are initialized in init_net for value_info in graph_def.input: if value_info.name in initialized: continue op_def = caffe2_pb2.OperatorDef() op_def.output.extend([value_info.name]) op_def.type = 'GivenTensorFill' shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim) shape_arg = op_def.arg.add() shape_arg.name = 'shape' shape_arg.ints.extend(shape) values_arg = op_def.arg.add() values_arg.name = 'values' values_arg.floats.extend(np.ones(shape).flatten().tolist()) init_net.op.extend([op_def]) return init_net, predict_net
def _create_logsoftmax(cls, init_model, pred_model, n, opset_version): # NB: this implementation is not backward stable. (A, ) = n.inputs (Y, ) = n.outputs axis = n.attrs.get('axis', 1) ops = [] softmax_A = dummy_name() ops.append(core.CreateOperator('Softmax', [A], [softmax_A], axis=axis)) ops.append(core.CreateOperator('Log', [softmax_A], [Y])) return ops
def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends=[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends=[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends=[direction_offset + 1, -1, -1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def _create_rnn(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') activation = attrs.pop('activations')[0] assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError( "best-effort shape inference for RNN input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "/i2h_b" Br = name + "/gates_t_b" init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[1 * hidden_size]) init_net.Slice(B, Br, starts=[1 * hidden_size], ends=[2 * hidden_size]) hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(pred_mh, input_blob, sequence_lens, [initial_h], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation) init_net.Copy(W, name + '/i2h_w') init_net.Copy(R, name + '/gates_t_w') pred_mh.net = pred_mh.net.Clone("dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_optimized_rnn(cls, n, opset_version): # TODO: we cheat and rely on the fact that ONNX weight layout matches # CuDNN's. Properly we should extract the weight tensor and invoke # RecurrentParamSet exposed by C2 # TODO: fix Caffe2 to accept initial_h and initial_c as optional inputs assert len(n.inputs) == 4, 'All inputs need to be specified for now' assert len(n.outputs) == 3, 'All outputs need to be specified for now' (w, x, in_h, in_c) = n.inputs (y, out_h, out_c) = n.outputs op = core.CreateOperator( 'Recurrent', [x, in_h, in_c, w], [y, out_h, out_c, dummy_name(), dummy_name()], rnn_mode=n.attrs['cell_type'], bidirectional=n.attrs.get('directions', 1) - 1, hidden_size=n.attrs['hidden_size'], num_layers=n.attrs.get('num_layers', 1), input_mode='skip' if n.attrs.get('skip_input_transform', 0) else 'linear') return op
def _create_reshape(cls, n): c2_op = cls._common_onnx_node_to_caffe2_op(n) # Caffe2 has an extra output c2_op.output.append(dummy_name()) return c2_op
def test_dummy_name(self): n1 = dummy_name() n2 = dummy_name() assert n1 != n2, "Got same names in different calls: {}".format(n1)
def make_gru(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 6 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 3 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 3 * hidden_size], ends =[bias_offset + 6 * hidden_size]) weight_offset = 3 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def _create_gru(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert GRUs without access to the full model" assert pred_model is not None, "cannot convert GRUs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') linear_before_reset = attrs.pop('linear_before_reset', 0) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported GRU attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for GRU input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_gru(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 6 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 3 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 3 * hidden_size], ends =[bias_offset + 6 * hidden_size]) weight_offset = 3 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_gru(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_gru(0) hidden_t_all_b, hidden_t_last_b = make_gru(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def make_lstm(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 8 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 4 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 4 * hidden_size], ends =[bias_offset + 8 * hidden_size]) weight_offset = 4 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', [(0, -1)]), (R_, 'gates_t_w', [(0, -1)]), (Bi, 'i2h_b' , []), (Br, 'gates_t_b', [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) initial_c_sliced = name + '/initial_c' init_net.Slice(initial_c, initial_c_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input, sequence_lens, [initial_h_sliced, initial_c_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, return_params=True ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def _create_slice(cls, init_model, pred_model, n, opset_version): op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version) args = {arg.name: arg for arg in op.arg} starts_vals = np.array(args.pop('starts').ints, dtype=np.int64).tolist() ends_vals = np.array( [i - 1 if i < 0 else i for i in args.pop('ends').ints], dtype=np.int64).tolist() if 'axes' in args: axes_vals = np.array(args.pop('axes').ints, dtype=np.int32).tolist() else: ndims = len(starts_vals) axes_vals = np.array(range(ndims), dtype=np.int32).tolist() data, = op.input ops = [] shape_tensor = dummy_name() ops.append(core.CreateOperator('Shape', [data], [shape_tensor])) axes_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorIntFill', [], [axes_tensor], shape=[len(axes_vals)], values=axes_vals, ), ]) starts_vals_tensor = dummy_name() starts_tensor = dummy_name() casted_starts_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorInt64Fill', [], [starts_vals_tensor], shape=[len(starts_vals)], values=starts_vals, ), core.CreateOperator( 'ConstantFill', [shape_tensor], [starts_tensor], dtype=caffe2_pb2.TensorProto.INT64, value=0, ), core.CreateOperator( 'ScatterAssign', [starts_tensor, axes_tensor, starts_vals_tensor], [starts_tensor], ), # Slice only accepts starts as int core.CreateOperator( 'Cast', [starts_tensor], [casted_starts_tensor], to=caffe2_pb2.TensorProto.INT32, ), ]) ends_vals_tensor = dummy_name() ends_tensor = dummy_name() casted_ends_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorInt64Fill', [], [ends_vals_tensor], shape=[len(ends_vals)], values=ends_vals, ), core.CreateOperator( 'ConstantFill', [shape_tensor], [ends_tensor], dtype=caffe2_pb2.TensorProto.INT64, value=-1, ), core.CreateOperator( 'ScatterAssign', [ends_tensor, axes_tensor, ends_vals_tensor], [ends_tensor], ), # Slice only accepts ends as int core.CreateOperator( 'Cast', [ends_tensor], [casted_ends_tensor], to=caffe2_pb2.TensorProto.INT32, ), ]) op.input[:] = [data, casted_starts_tensor, casted_ends_tensor] del op.arg[:] op.arg.extend(args.values()) ops.append(op) return ops
def caffe2_net_to_onnx_graph(cls, predict_net, init_net=None, value_info=None): if value_info is None: value_info = {} if not isinstance(value_info, dict): raise ValueError('Please pass value_info as a ' 'name -> (type, shape) dictionary') cls._ssa_rewrite(predict_net, init_net, value_info) if init_net: initializer = cls.caffe2_init_net_to_initializer(init_net) value_info.update({ init.name: (init.data_type, init.dims) for init in initializer }) else: initializer = [] # Check whether we have got type shape info of all input missing = (set(list(predict_net.external_input)) - set(value_info.keys())) if missing: raise RuntimeError( 'Could not find value info of inputs: {}'.format( ', '.join(missing))) inputs = {} for name in predict_net.external_input: elem_type, shape = value_info[name] inputs[name] = np.random.randn(*shape).astype( mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type]) ws, outputs = c2_native_run_net(init_net, predict_net, inputs) for name in predict_net.external_output: output = outputs[name] elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype] shape = output.shape value_info[name] = (elem_type, shape) graph_def = GraphProto() graph_def.name = predict_net.name graph_def.initializer.extend(initializer) # This is a mapping from Caffe2 names to ONNX names graph_def.input.extend( make_tensor_value_info(name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_input) dummy_name( cls._all_names_in_net(predict_net) | cls._all_names_in_net(init_net)) for op in predict_net.op: shapes = {} for name in itertools.chain(op.input, op.output): blob = ws.FetchBlob(name) if hasattr(blob, 'shape'): shapes[name] = blob.shape graph_def.node.extend(cls.caffe2_op_to_onnx_node(op, shapes=shapes)) all_output = set( sum((list(node.output) for node in graph_def.node), [init.name for init in graph_def.initializer])) redundant_output = set(vi.name for vi in graph_def.output) - all_output if redundant_output: logger.warning( 'There are graph output not produced by any node or initializer: {}' '! Will drop them.'.format(', '.join(redundant_output))) graph_def.output.extend( make_tensor_value_info(name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_output if name in all_output) cls._annotate_consumed(graph_def) checker.check_graph(graph_def) return graph_def
def _create_gru(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert GRUs without access to the full model" assert pred_model is not None, "cannot convert GRUs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') linear_before_reset = attrs.pop('linear_before_reset') assert not attrs, "unsupported GRU attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError( "best-effort shape inference for GRU input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input_blob, sequence_lens, [initial_h], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset) # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "_bias_i2h" Br = name + "_bias_gates" init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[3 * hidden_size]) init_net.Slice(B, Br, starts=[3 * hidden_size], ends=[6 * hidden_size]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h # # TODO implement support for return_params in gru_cell.GRU. # Until then, hardcode blob names. reforms = ((W, 'i2h_w', True, [ (0, input_size) ]), (R, 'gate_t_w', False, [(0, hidden_size)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = [ '%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output') ] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i + 1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) pred_mh.net = pred_mh.net.Clone("dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_rnn(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') activation = force_unicode(attrs.pop('activations', ('tanh',))[0]) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for RNN input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends =[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_rnn(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_rnn(0) hidden_t_all_b, hidden_t_last_b = make_rnn(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_reshape(cls, init_model, pred_model, n, opset_version): c2_op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version) # Caffe2 has an extra output c2_op.output.append(dummy_name()) return c2_op
def _create_slice(cls, n): op = cls._common_onnx_node_to_caffe2_op(n) data, axes, orig_starts, orig_ends = op.input ops = [] data_shape = dummy_name() ops.append(core.CreateOperator( 'Shape', [data], [data_shape] )) tmp_starts = dummy_name() starts = dummy_name() ops.extend([ core.CreateOperator( 'ConstantFill', [data_shape], [tmp_starts], dtype=caffe2_pb2.TensorProto.INT64, value=0, ), core.CreateOperator( 'ScatterAssign', [tmp_starts, axes, orig_starts], [tmp_starts], ), # Slice only accepts starts as int core.CreateOperator( 'Cast', [tmp_starts], [starts], to=caffe2_pb2.TensorProto.INT32, ), ]) tmp_ends = dummy_name() ends = dummy_name() ops.extend([ core.CreateOperator( 'ConstantFill', [data_shape], [tmp_ends], dtype=caffe2_pb2.TensorProto.INT64, value=-1, ), core.CreateOperator( 'ScatterAssign', [tmp_ends, axes, orig_ends], [tmp_ends], ), # Slice only accepts ends as int core.CreateOperator( 'Cast', [tmp_ends], [ends], to=caffe2_pb2.TensorProto.INT32, ), ]) op.input[:] = [data, starts, ends] ops.append(op) return ops