def convert_ops(self, graph_def): for n in graph_def.node: node = OnnxNode(n) mace_check( node.op_type in self._op_converters, "Mace does not support onnx op type %s yet" % node.op_type) self._op_converters[node.op_type](node)
def convert_folded_batchnorm(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.BatchNorm.name scale_op = None for consumer in self._caffe_net.get_consumers(caffe_op.layer.top[0]): if consumer.type == 'Scale': scale_op = consumer mace_check(scale_op is not None, "batchnorm is not followed by scale") self._skip_ops.append(scale_op) epsilon_value = caffe_op.layer.batch_norm_param.eps mace_check(caffe_op.blobs[2][0] != 0, "batchnorm scalar is zero") mean_value = (1. / caffe_op.blobs[2][0]) * caffe_op.blobs[0] var_value = (1. / caffe_op.blobs[2][0]) * caffe_op.blobs[1] gamma_value = scale_op.blobs[0] beta_value = np.zeros_like(mean_value) if len(scale_op.blobs) == 2: beta_value = scale_op.blobs[1] scale_value = ( (1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) * gamma_value).reshape(-1) offset_value = ((-mean_value * scale_value) + beta_value).reshape(-1) input_names = [op.name + '_scale', op.name + '_offset'] self.add_tensor(input_names[0], scale_value.reshape(-1).shape, mace_pb2.DT_FLOAT, scale_value) self.add_tensor(input_names[1], offset_value.reshape(-1).shape, mace_pb2.DT_FLOAT, offset_value) op.input.extend([name for name in input_names]) op.output[:] = scale_op.layer.top[:]
def add_min_max_const_node(self, this_op, tensor_name, add_min=True, add_max=True, diff_port=True): op, port = get_op_and_port_from_tensor(tensor_name) mace_check(port == 0, 'port should be 0 to add min max tensor then.') if tensor_name in self._quantize_activation_info: quantize_info = self._quantize_activation_info[tensor_name] minval = quantize_info.minval maxval = quantize_info.maxval is_activation = True elif tensor_name in self._consts: tensor = self._consts[tensor_name] minval = tensor.minval maxval = tensor.maxval is_activation = False else: raise Exception('Quantize info not found: ', tensor_name) if add_min: if is_activation and diff_port: min_tensor_name = op + ':1' else: min_tensor_name = op + '_min:0' self.add_min_max(min_tensor_name, minval) this_op.input.extend([min_tensor_name]) if add_max: if is_activation and diff_port: max_tensor_name = op + ':2' else: max_tensor_name = op + '_max:0' self.add_min_max(max_tensor_name, maxval) this_op.input.extend([max_tensor_name])
def convert_tensors(self, graph_def): initializer = graph_def.initializer if initializer: for init in initializer: tensor = self._mace_net_def.tensors.add() tensor.name = init.name onnx_tensor = numpy_helper.to_array(init) tensor.dims.extend(list(init.dims)) data_type = onnx_dtype(init.data_type) if data_type == np.float32 or data_type == np.float64: tensor.data_type = mace_pb2.DT_FLOAT tensor.float_data.extend( onnx_tensor.astype(np.float32).flat) elif data_type == np.int32: tensor.data_type = mace_pb2.DT_INT32 tensor.int32_data.extend(onnx_tensor.astype(np.int32).flat) elif data_type == np.int64: tensor.data_type = mace_pb2.DT_INT32 tensor.int32_data.extend(onnx_tensor.astype(np.int32).flat) else: mace_check(False, "Not supported tensor type: %s" % data_type) self._consts[tensor.name] = tensor
def convert_ops(self): for tf_op in self._tf_graph.get_operations(): mace_check(tf_op.type in self._op_converters, "Mace does not support tensorflow op type %s yet" % tf_op.type) self._op_converters[tf_op.type](tf_op) self.convert_tensors()
def convert_folded_batchnorm(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.FoldedBatchNorm.name scale_op = None for consumer in self._caffe_net.get_consumers(caffe_op.layer.top[0]): if consumer.type == 'Scale': scale_op = consumer mace_check(scale_op is not None, "batchnorm is not followed by scale") self._skip_ops.append(scale_op) epsilon_value = caffe_op.layer.batch_norm_param.eps mace_check(caffe_op.blobs[2][0] != 0, "batchnorm scalar is zero") mean_value = (1. / caffe_op.blobs[2][0]) * caffe_op.blobs[0] var_value = (1. / caffe_op.blobs[2][0]) * caffe_op.blobs[1] gamma_value = scale_op.blobs[0] beta_value = np.zeros_like(mean_value) if len(scale_op.blobs) == 2: beta_value = scale_op.blobs[1] scale_value = ( (1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) * gamma_value).reshape(-1) offset_value = ((-mean_value * scale_value) + beta_value).reshape(-1) input_names = [op.name + '_scale', op.name + '_offset'] self.add_tensor(input_names[0], scale_value.reshape(-1).shape, mace_pb2.DT_FLOAT, scale_value) self.add_tensor(input_names[1], offset_value.reshape(-1).shape, mace_pb2.DT_FLOAT, offset_value) op.input.extend([name for name in input_names]) op.output[:] = scale_op.layer.top[:]
def add_input_output_node(self): mace_check( self._model.op[0].type == HexagonOp.QuantizeINPUT_f_to_8.name, "Not started with Quantize op.") quantize_input_op = self._model.op[0] del quantize_input_op.input[:] mace_check( self._model.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name, "Not ended with Dequantize op.") dequantize_output_op = self._model.op[-1] del dequantize_output_op.output_shape[:] del dequantize_output_op.output_type[:] del dequantize_output_op.out_max_byte_size[:] if self._option.device == DeviceType.HTA.value: # replace QuantizeINPUT_f_to_8 with INPUT quantize_input_op.type = HexagonOp.INPUT.name del quantize_input_op.output_shape[1:] del quantize_input_op.output_type[1:] del quantize_input_op.out_max_byte_size[1:] # replace first op's input min max with constant self.add_constant_min_max_for_first_op(self._model.op[1]) # replace DequantizeOUTPUT_8tof with OUTPUT dequantize_output_op.type = HexagonOp.OUTPUT.name del dequantize_output_op.input[1:]
def transpose_const(tensor): shape = tensor.dims mace_check(len(shape) == 2, "gemm only supports 2-dim input.") tensor_data = np.array(tensor.float_data).reshape(shape[0], shape[1]) tensor_data = tensor_data.transpose(1, 0) tensor.float_data[:] = tensor_data.flat tensor.dims[:] = tensor_data.shape
def convert_fused_batchnorm(self, node): op = self.convert_general_op(node) op.type = MaceOp.BatchNorm.name if "epsilon" in node.attrs: epsilon_value = node.attrs["epsilon"] else: epsilon_value = 1e-5 mace_check(len(node.inputs) == 5, "batch norm should have 5 inputs.") gamma_value = np.array(self._consts[node.inputs[1]].float_data) beta_value = np.array(self._consts[node.inputs[2]].float_data) mean_value = np.array(self._consts[node.inputs[3]].float_data) var_value = np.array(self._consts[node.inputs[4]].float_data) scale_name = node.name + 'scale' offset_name = node.name + 'offset' scale_value = ((1.0 / np.sqrt(var_value + epsilon_value)) * gamma_value) offset_value = (-mean_value * scale_value) + beta_value self.add_tensor(scale_name, scale_value.shape, mace_pb2.DT_FLOAT, scale_value) self.add_tensor(offset_name, offset_value.shape, mace_pb2.DT_FLOAT, offset_value) del op.input[1:] op.input.extend([scale_name, offset_name]) del op.output[1:] del op.output_shape[1:]
def convert_conv2d(self, node): op = self.convert_general_op(node) self.add_stride_pad_kernel_arg(node.attrs, op) group_arg = op.arg.add() group_arg.name = MaceKeyword.mace_group_str if 'group' in node.attrs: group_val = node.attrs["group"] else: group_val = 1 group_arg.i = group_val is_depthwise = False if group_val > 1: filter_shape = self._graph_shapes_dict[node.inputs[1]] mace_check(group_val == filter_shape[0] and filter_shape[1] == 1, "Mace does not support group convolution yet") filter_tensor = self._consts[node.inputs[1]] new_shape = [filter_shape[1], filter_shape[0], filter_shape[2], filter_shape[3]] del filter_tensor.dims[:] filter_tensor.dims.extend(new_shape) is_depthwise = True if is_depthwise: op.type = MaceOp.DepthwiseConv2d.name else: op.type = MaceOp.Conv2D.name dilation_arg = op.arg.add() dilation_arg.name = MaceKeyword.mace_dilations_str if 'dilations' in node.attrs: dilation_val = node.attrs["dilations"] else: dilation_val = [1, 1] dilation_arg.ints.extend(dilation_val)
def convert_general_op(self, tf_op): op = self._mace_net_def.op.add() op.name = tf_op.name op.type = tf_op.type op.input.extend([tf_input.name for tf_input in tf_op.inputs]) op.output.extend([tf_output.name for tf_output in tf_op.outputs]) for tf_output in tf_op.outputs: output_shape = op.output_shape.add() output_shape.dims.extend(self.infer_tensor_shape(tf_output)) data_type_arg = op.arg.add() data_type_arg.name = 'T' try: dtype = tf_op.get_attr('T') if dtype == tf.int32: data_type_arg.i = mace_pb2.DT_INT32 elif dtype == tf.float32: data_type_arg.i = self._option.data_type else: mace_check(False, "data type %s not supported" % dtype) except ValueError: data_type_arg.i = self._option.data_type ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) return op
def convert_general_op(self, tf_op): op = self._mace_net_def.op.add() op.name = tf_op.name op.type = tf_op.type op.input.extend([tf_input.name for tf_input in tf_op.inputs]) op.output.extend([tf_output.name for tf_output in tf_op.outputs]) for tf_output in tf_op.outputs: output_shape = op.output_shape.add() self.infer_tensor_shape(output_shape, tf_output) data_type_arg = op.arg.add() data_type_arg.name = 'T' try: dtype = tf_op.get_attr('T') if dtype == tf.int32: data_type_arg.i = mace_pb2.DT_INT32 elif dtype == tf.float32: data_type_arg.i = self._option.data_type else: mace_check(False, "data type %s not supported" % dtype) except ValueError: try: dtype = tf_op.get_attr('SrcT') if dtype == tf.int32 or dtype == tf.bool: data_type_arg.i = mace_pb2.DT_INT32 elif dtype == tf.float32: data_type_arg.i = self._option.data_type else: mace_check(False, "data type %s not supported" % dtype) except ValueError: data_type_arg.i = self._option.data_type ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) return op
def infer_shape_general(self, op): if len(op.input) > 0: mace_check( op.input[0] in self._output_shape_cache, "Op %s input %s does not exist" % (op.name, op.input[0])) input_shape = self._output_shape_cache[op.input[0]] self.add_output_shape(op, [input_shape])
def infer_shape_conv_pool_shape(self, op): input_shape = self._output_shape_cache[op.input[0]] output_shape = np.zeros_like(input_shape) if op.type == MaceOp.Pooling: filter_shape = list( ConverterUtil.get_arg(op, MaceKeyword.mace_kernel_str).ints) if ConverterUtil.data_format(op) == DataFormat.NCHW: filter_shape = [input_shape[1], input_shape[1]] + filter_shape if ConverterUtil.get_arg(op, MaceKeyword.mace_global_pooling_str) \ is not None: filter_shape[2] = input_shape[2] filter_shape[3] = input_shape[3] else: # NHWC filter_shape = filter_shape + [input_shape[1], input_shape[1]] if ConverterUtil.get_arg(op, MaceKeyword.mace_global_pooling_str) \ is not None: filter_shape[0] = input_shape[1] filter_shape[1] = input_shape[2] else: filter_shape = self._output_shape_cache[op.input[1]] paddings = ConverterUtil.get_arg(op, MaceKeyword.mace_padding_values_str).ints # noqa strides = ConverterUtil.get_arg(op, MaceKeyword.mace_strides_str).ints dilations_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dilations_str) if dilations_arg is not None: dilations = dilations_arg.ints else: dilations = [1, 1] if op.type == MaceOp.Pooling: round_func = math.ceil else: round_func = math.floor output_shape[0] = input_shape[0] if ConverterUtil.data_format(op) == DataFormat.NCHW \ and ConverterUtil.filter_format(self._net) == FilterFormat.OIHW: # noqa # filter format: OIHW if op.type == MaceOp.DepthwiseConv2d.name: output_shape[1] = filter_shape[0] * filter_shape[1] else: output_shape[1] = filter_shape[0] output_shape[2] = int( round_func((input_shape[2] + paddings[0] - filter_shape[2] - (filter_shape[2] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1 output_shape[3] = int( round_func((input_shape[3] + paddings[1] - filter_shape[3] - (filter_shape[3] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1 else: mace_check(False, "Mace can only infer shape for" " NCHW input and OIHW filter") self.add_output_shape(op, [output_shape])
def convert_ops(self): for layer in self._caffe_layers.layer: caffe_op = self._caffe_net.get_op(layer.name) if caffe_op not in self._skip_ops: mace_check(layer.type in self._op_converters, "Mace does not support caffe op type %s yet" % layer.type) self._op_converters[layer.type](caffe_op)
def infer_shape_crop(self, op): mace_check(len(op.input) == 2, "crop layer needs two inputs") output_shape = self._output_shape_cache[op.input[0]] input1_shape = self._output_shape_cache[op.input[1]] offsets = ConverterUtil.get_arg(op, MaceKeyword.mace_offset_str).ints for i in range(len(offsets)): if offsets[i] >= 0: output_shape[i] = input1_shape[i] self.add_output_shape(op, [output_shape])
def infer_shape_fully_connected(self, op): input_shape = self._output_shape_cache[op.input[0]] weight_shape = self._output_shape_cache[op.input[1]] if ConverterUtil.data_format(op) == DataFormat.NCHW: output_shape = [input_shape[0], weight_shape[0], 1, 1] else: mace_check(False, "format %s is not supported" % ConverterUtil.data_format(op)) self.add_output_shape(op, [output_shape])
def add_output_shape(self, op, shapes): mace_check(len(op.output) == len(shapes), "Op %s (%s) output count is different from " "output shape count" % ( op.name, op.type)) for i in six.moves.range(len(shapes)): output_name = op.output[i] output_shape = op.output_shape.add() output_shape.dims.extend(shapes[i]) self._output_shape_cache[output_name] = shapes[i]
def convert_concat(self, node): op = self.convert_general_op(node) op.type = MaceOp.Concat.name mace_check('axis' in node.attrs, 'Concat op should have axis attribute.') axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = node.attrs['axis'] axis_arg.i = 4 + axis_arg.i if axis_arg.i < 0 else axis_arg.i mace_check(axis_arg.i == 1, "only support concat at channel dimension")
def add_int_list_tensor_from_arg(self, op, keyword): list_value_arg = ConverterUtil.get_arg(op, keyword) mace_check(list_value_arg.ints is not None, op.name + ': ' + keyword + ' value ints should not be None') list_value_tensor = self._model.tensors.add() list_value_tensor.name = op.name + '/' + keyword + ':0' list_value_tensor.data_type = mace_pb2.DT_INT32 list_value_tensor.dims.extend([len(list_value_arg.ints)]) list_value_tensor.int32_data.extend(list_value_arg.ints) op.input.extend([list_value_tensor.name])
def add_int_tensor_from_arg(self, op, keyword): int_value_arg = ConverterUtil.get_arg(op, keyword) mace_check(int_value_arg.i is not None, op.name + ': ' + keyword + ' value i should not be None') int_value_tensor = self._model.tensors.add() int_value_tensor.name = op.name + '/' + keyword + ':0' int_value_tensor.data_type = mace_pb2.DT_INT32 int_value_tensor.dims.extend([1]) int_value_tensor.int32_data.extend([int_value_arg.i]) op.input.extend([int_value_tensor.name])
def add_output_shape(self, op, shapes): mace_check(len(op.output) == len(shapes), "Op %s (%s) output count is different from " "output shape count" % ( op.name, op.type)) for i in xrange(len(shapes)): output_name = op.output[i] output_shape = op.output_shape.add() output_shape.dims.extend(shapes[i]) self._output_shape_cache[output_name] = shapes[i]
def convert_deconv2d(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.convolution_param is_depthwise = False if param.HasField(caffe_group_str) and param.group > 1: filter_data = caffe_op.blobs[0] mace_check( param.group == filter_data.shape[0] and filter_data.shape[1] == 1, "Mace does not support group deconvolution yet") is_depthwise = True caffe_op.blobs[0] = filter_data.reshape(1, filter_data.shape[0], filter_data.shape[2], filter_data.shape[3]) # mace_check(is_depthwise is False, # "Mace do not support depthwise deconvolution yet") if is_depthwise: op.type = MaceOp.DepthwiseDeconv2d.name else: op.type = MaceOp.Deconv2D.name from_caffe_arg = op.arg.add() from_caffe_arg.name = MaceKeyword.mace_from_caffe_str from_caffe_arg.i = 1 self.add_stride_pad_kernel_arg(param, op) # dilation is specific for convolution in caffe dilations = [1, 1] if len(param.dilation) > 0: dilation_arg = op.arg.add() dilation_arg.name = MaceKeyword.mace_dilations_str if len(param.dilation) == 1: dilations = [param.dilation[0], param.dilation[0]] elif len(param.dilation) == 2: dilations = [param.dilation[0], param.dilation[1]] mace_check(dilations[0] == 1 and dilations[1] == 1, "Mace only supports dilation == 1 deconvolution.") dilation_arg.ints.extend(dilations) filter_tensor_name = op.name + '_filter' filter_data = caffe_op.blobs[0] self.add_tensor(filter_tensor_name, filter_data.shape, mace_pb2.DT_FLOAT, filter_data) op.input.extend([filter_tensor_name]) if len(caffe_op.blobs) == 2: bias_tensor_name = op.name + '_bias' bias_data = caffe_op.blobs[1] # caffe of old version has 4-dimension bias, so reshape it # to single dimension self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape, mace_pb2.DT_FLOAT, bias_data) op.input.extend([bias_tensor_name])
def add_size_tensor_from_arg(self, op, keyword): size_value_arg = ConverterUtil.get_arg(op, keyword) mace_check( len(size_value_arg.ints) == 2, op.name + ': ' + keyword + ' value does not have size 2') size_value_tensor = self._model.tensors.add() size_value_tensor.name = op.name + '/' + keyword + ':0' size_value_tensor.data_type = mace_pb2.DT_INT32 size_value_tensor.dims.extend([2]) size_value_tensor.int32_data.extend(size_value_arg.ints) op.input.extend([size_value_tensor.name])
def convert_depth_space(self, node): op = self.convert_general_op(node) if op.type == OnnxOpType.DepthToSpace.name: op.type = MaceOp.DepthToSpace.name else: op.type = MaceOp.SpaceToDepth.name mace_check(('block_size' in node.attrs), "depth to space op should have block size attribute.") block_size = node.attrs['block_size'] size_arg = op.arg.add() size_arg.name = MaceKeyword.mace_space_depth_block_size_str size_arg.i = block_size
def convert_interp(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.interp_param mace_check( param.HasField("height") and param.HasField("width"), 'Only support bilinear interp with height and width') op.type = MaceOp.ResizeBilinear.name size_arg = op.arg.add() size_arg.name = MaceKeyword.mace_resize_size_str size_value = np.array([param.height, param.width], dtype=np.int32) size_arg.ints.extend(size_value)
def convert_dynamic_lstm(self, node): op = self.convert_general_op(node) op.type = MaceOp.DynamicLSTM.name if 'prev_out_delay' in node.attrs: prev_out_delay = node.attrs['prev_out_delay'] mace_check(prev_out_delay < 0, "dynamic's prev_out_delay should <= 0.") prev_out_delay_arg = op.arg.add() prev_out_delay_arg.name = 'prev_out_delay' prev_out_delay_arg.i = prev_out_delay if 'prev_cell_delay' in node.attrs: prev_cell_delay = node.attrs['prev_cell_delay'] mace_check(prev_cell_delay < 0, "dynamic's prev_cell_delay should < 0.") prev_cell_delay_arg = op.arg.add() prev_cell_delay_arg.name = 'prev_cell_delay' prev_cell_delay_arg.i = prev_cell_delay if 'prev_out_offset' in node.attrs: prev_out_offset = node.attrs['prev_out_offset'] mace_check(prev_out_offset >= 0, "dynamic's prev_out_offset should >= 0.") prev_out_offset_arg = op.arg.add() prev_out_offset_arg.name = 'prev_out_offset' prev_out_offset_arg.i = prev_out_offset if 'prev_out_dim' in node.attrs: prev_out_dim = node.attrs['prev_out_dim'] mace_check(prev_out_dim > 0, "dynamic's prev_out_dim should > 0.") prev_out_dim_arg = op.arg.add() prev_out_dim_arg.name = 'prev_out_dim' prev_out_dim_arg.i = prev_out_dim if 'prev_cell_dim' in node.attrs: prev_cell_dim = node.attrs['prev_cell_dim'] mace_check(prev_cell_dim > 0, "dynamic's prev_cell_dim should > 0.") prev_cell_dim_arg = op.arg.add() prev_cell_dim_arg.name = 'prev_cell_dim' prev_cell_dim_arg.i = prev_cell_dim if 'bias_a' in node.attrs: bias_a = node.attrs['bias_a'] bias_a_arg = op.arg.add() bias_a_arg.name = 'bias_a' bias_a_arg.i = bias_a if 'bias_b' in node.attrs: bias_b = node.attrs['bias_b'] bias_b_arg = op.arg.add() bias_b_arg.name = 'bias_b' bias_b_arg.i = bias_b if 'scale' in node.attrs: scale = node.attrs['scale'] scale_arg = op.arg.add() scale_arg.name = 'scale' scale_arg.f = scale
def add_padding_tensor_from_arg(self, op): padding_value_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_padding_values_str) mace_check( len(padding_value_arg.ints) == 4, op.name + ': padding value does not have size 4') padding_value_tensor = self._model.tensors.add() padding_value_tensor.name = op.name + '/padding:0' padding_value_tensor.data_type = mace_pb2.DT_INT32 padding_value_tensor.dims.extend([4]) padding_value_tensor.int32_data.extend(padding_value_arg.ints) op.input.extend([padding_value_tensor.name])
def convert_timeoffset(self, node): op = self.convert_general_op(node) mace_check('offset' in node.attrs, 'Offset attribute required in Offset Node.') offset = node.attrs['offset'] if offset == 0: op.type = MaceOp.Identity.name else: op.type = MaceOp.TimeOffset.name offset_arg = op.arg.add() offset_arg.name = 'offset' offset_arg.i = offset
def infer_shape_resize_bilinear(self, op): input_shape = self._output_shape_cache[op.input[0]] size = ConverterUtil.get_arg( op, MaceKeyword.mace_resize_size_str).ints if ConverterUtil.data_format(op) == DataFormat.NCHW: output_shape = [input_shape[0], input_shape[1], size[0], size[1]] elif ConverterUtil.data_format(op) == DataFormat.NHWC: output_shape = [input_shape[0], size[0], size[1], input_shape[3]] else: output_shape = [] mace_check(False, "format %s is not supported" % ConverterUtil.data_format(op)) self.add_output_shape(op, [output_shape])
def convert_ops(self): layer_names = set() for layer in self._caffe_layers.layer: caffe_op = self._caffe_net.get_op(layer.name) if caffe_op not in self._skip_ops: mace_check(layer.name not in layer_names, "There is duplicate layer name '%s' in your model" % layer.name) mace_check(layer.type in self._op_converters, "Mace does not support caffe op type %s yet" % layer.type) layer_names.add(layer.name) self._op_converters[layer.type](caffe_op)
def convert_slice(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.Split.name if caffe_op.layer.HasField('slice_param'): param = caffe_op.layer.slice_param mace_check(not param.HasField('axis') or param.axis == 1 or param.axis == -3, "Mace do not support slice with axis %d" % param.axis) mace_check(len(param.slice_point) == 0, "Mace do not support slice with slice_point") axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = 1
def convert_concat(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.concat_param op.type = MaceOp.Concat.name axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = 1 if param.HasField('axis'): axis_arg.i = param.axis elif param.HasField('concat_dim'): axis_arg.i = param.concat_dim axis_arg.i = 4 + axis_arg.i if axis_arg.i < 0 else axis_arg.i mace_check(axis_arg.i == 1, "only support concat at channel dimension")
def convert_cast(self, tf_op): op = self.convert_general_op(tf_op) op.type = MaceOp.Cast.name try: dtype = tf_op.get_attr('DstT') if dtype == tf.int32: op.output_type.extend([mace_pb2.DT_INT32]) elif dtype == tf.float32: op.output_type.extend([self._option.data_type]) else: mace_check(False, "data type %s not supported" % dtype) except ValueError: op.output_type.extend([self._option.data_type])
def convert_slice(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.Slice.name if caffe_op.layer.HasField('slice_param'): param = caffe_op.layer.slice_param mace_check(not param.HasField('axis') or param.axis == 1 or param.axis == -3, "Mace do not support slice with axis %d" % param.axis) mace_check(len(param.slice_point) == 0, "Mace do not support slice with slice_point") axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = 1
def convert_conv2d(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.convolution_param is_depthwise = False if param.HasField(caffe_group_str): filter_data = caffe_op.blobs[0] mace_check(param.group == filter_data.shape[0] and filter_data.shape[1] == 1, "Mace do not support group convolution yet") is_depthwise = True caffe_op.blobs[0] = filter_data.reshape(1, filter_data.shape[0], filter_data.shape[2], filter_data.shape[3]) if is_depthwise: op.type = MaceOp.DepthwiseConv2d.name else: op.type = MaceOp.Conv2D.name self.add_stride_pad_kernel_arg(param, op) # dilation is specific for convolution in caffe dilations = [1, 1] if len(param.dilation) > 0: dilation_arg = op.arg.add() dilation_arg.name = MaceKeyword.mace_dilations_str if len(param.dilation) == 1: dilations = [param.dilation[0], param.dilation[0]] elif len(param.dilation) == 2: dilations = [param.dilation[0], param.dilation[1]] dilation_arg.ints.extend(dilations) filter_tensor_name = op.name + '_filter' filter_data = caffe_op.blobs[0] self.add_tensor(filter_tensor_name, filter_data.shape, mace_pb2.DT_FLOAT, filter_data) op.input.extend([filter_tensor_name]) if len(caffe_op.blobs) == 2: bias_tensor_name = op.name + '_bias' bias_data = caffe_op.blobs[1] # caffe of old version has 4-dimension bias, so reshape it # to single dimension self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape, mace_pb2.DT_FLOAT, bias_data) op.input.extend([bias_tensor_name])
def convert_tensors(self): for tf_op in self._tf_graph.get_operations(): if tf_op.type != TFOpType.Const.name: continue output_name = tf_op.outputs[0].name if output_name not in self._skip_tensor: tensor = self._mace_net_def.tensors.add() tensor.name = tf_op.outputs[0].name tf_tensor = tf_op.outputs[0].eval() tensor.dims.extend(list(tf_tensor.shape)) tf_dt = tf_op.get_attr('dtype') if tf_dt == tf.float32: tensor.data_type = mace_pb2.DT_FLOAT tensor.float_data.extend(tf_tensor.astype(np.float32).flat) elif tf_dt == tf.int32: tensor.data_type = mace_pb2.DT_INT32 tensor.int32_data.extend(tf_tensor.astype(np.int32).flat) else: mace_check(False, "Not supported tensor type: %s" % tf_dt.name)
def convert_fully_connected(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.inner_product_param op.type = MaceOp.FullyConnected.name mace_check((param.axis == 1 or param.axis == -3) and not param.transpose, "Do not support non-default axis and transpose") mace_check(caffe_op.blobs[0].ndim in [2, 4], "Unexpected fc weigth ndim.") if caffe_op.blobs[0].ndim == 4: mace_check(list(caffe_op.blobs[0].shape[:2]) == [1, 1], "Do not support 4D weight with shape [1, 1, *, *]") weight_tensor_name = op.name + '_weight' weight_data = caffe_op.blobs[0].reshape(param.num_output, -1) self.add_tensor(weight_tensor_name, weight_data.shape, mace_pb2.DT_FLOAT, weight_data) op.input.extend([weight_tensor_name]) if len(caffe_op.blobs) == 2: bias_tensor_name = op.name + '_bias' bias_data = caffe_op.blobs[1] self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape, mace_pb2.DT_FLOAT, bias_data) op.input.extend([bias_tensor_name])
def run(self): ops = self._tf_graph.get_operations() dsp_ops = DspOps() resolved_ops = set() mace_check(len(self._option.input_nodes) == 1 and len(self._option.output_nodes) == 1, 'dsp only support single input and output') input_node = self._option.input_nodes.values()[0].name output_node = self._option.output_nodes.values()[0].name # convert const node unresolved_ops = [op for op in ops if op.type == 'Const'] with tf.Session() as session: while len(unresolved_ops) > 0: convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, dsp_ops) # convert op node unresolved_ops = [op for op in ops if op.type != 'Const'] while len(unresolved_ops) > 0: convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, dsp_ops) add_output_node(self._mace_net_def, output_node) net_def = reverse_batch_to_space_and_biasadd(self._mace_net_def) net_def = fuse_quantize(net_def) sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__') net_def_with_node_id = add_node_id(sorted_net_def) dtype = mace_pb2.DT_FLOAT final_net_def = add_input_output_info( net_def_with_node_id, input_node, output_node, self._tf_graph, dtype) return final_net_def
def main(unused_args): if not os.path.isfile(FLAGS.model_file): print("Input graph file '" + FLAGS.model_file + "' does not exist!") sys.exit(-1) model_checksum = file_checksum(FLAGS.model_file) if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum: print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum)) sys.exit(-1) weight_checksum = None if FLAGS.platform == 'caffe': if not os.path.isfile(FLAGS.weight_file): print("Input weight file '" + FLAGS.weight_file + "' does not exist!") sys.exit(-1) weight_checksum = file_checksum(FLAGS.weight_file) if FLAGS.weight_checksum != "" and \ FLAGS.weight_checksum != weight_checksum: print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum)) sys.exit(-1) if FLAGS.platform not in ['tensorflow', 'caffe']: print ("platform %s is not supported." % FLAGS.platform) sys.exit(-1) if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'cpu+gpu']: print ("runtime %s is not supported." % FLAGS.runtime) sys.exit(-1) option = cvt.ConverterOption() if FLAGS.graph_optimize_options: option.transformer_option = FLAGS.graph_optimize_options.split(',') option.winograd = FLAGS.winograd option.quantize = FLAGS.quantize option.quantize_range_file = FLAGS.quantize_range_file input_node_names = FLAGS.input_node.split(',') input_node_shapes = FLAGS.input_shape.split(':') if len(input_node_names) != len(input_node_shapes): raise Exception('input node count and shape count do not match.') for i in xrange(len(input_node_names)): input_node = cvt.NodeInfo() input_node.name = input_node_names[i] input_node.shape = parse_int_array_from_str(input_node_shapes[i]) option.add_input_node(input_node) output_node_names = FLAGS.output_node.split(',') for i in xrange(len(output_node_names)): output_node = cvt.NodeInfo() output_node.name = output_node_names[i] option.add_output_node(output_node) option.build() print("Transform model to one that can better run on device") if FLAGS.runtime == 'dsp': mace_check(FLAGS.platform == 'tensorflow', 'DSP only supports tensorflow') from mace.python.tools.converter_tool import tf_dsp_converter converter = tf_dsp_converter.TensorflowDspConverter( option, FLAGS.model_file) output_graph_def = converter.run() else: if FLAGS.platform == 'tensorflow': from mace.python.tools.converter_tool import tensorflow_converter converter = tensorflow_converter.TensorflowConverter( option, FLAGS.model_file) elif FLAGS.platform == 'caffe': from mace.python.tools.converter_tool import caffe_converter converter = caffe_converter.CaffeConverter(option, FLAGS.model_file, FLAGS.weight_file) else: print("Mace do not support platorm %s yet." & FLAGS.platform) exit(1) output_graph_def = converter.run() if FLAGS.runtime == 'cpu+gpu': cpu_graph_def = copy.deepcopy(output_graph_def) option.device = cvt.DeviceType.GPU.value option.data_type = parse_data_type( FLAGS.data_type, cvt.DeviceType.GPU.value) mace_gpu_transformer = transformer.Transformer( option, output_graph_def) output_graph_def = mace_gpu_transformer.run() print "start optimize gpu memory." memory_optimizer.optimize_gpu_memory(output_graph_def) print "GPU memory optimization done." option.device = cvt.DeviceType.CPU.value option.data_type = parse_data_type( FLAGS.data_type, cvt.DeviceType.CPU.value) option.disable_transpose_filters() mace_cpu_transformer = transformer.Transformer( option, cpu_graph_def) cpu_graph_def = mace_cpu_transformer.run() print "start optimize cpu memory." memory_optimizer.optimize_cpu_memory(cpu_graph_def) print "CPU memory optimization done." print "Merge cpu and gpu ops together" output_graph_def.op.extend(cpu_graph_def.op) output_graph_def.mem_arena.mem_block.extend( cpu_graph_def.mem_arena.mem_block) output_graph_arg_names = set() for arg in output_graph_def.arg: output_graph_arg_names.add(arg.name) for arg in cpu_graph_def.arg: if arg.name not in output_graph_arg_names: output_graph_def.arg.extend(arg) print "Merge done" else: option.device = device_type_map[FLAGS.runtime] option.data_type = parse_data_type( FLAGS.data_type, option.device) mace_transformer = transformer.Transformer( option, output_graph_def) output_graph_def = mace_transformer.run() print "start optimize memory." if FLAGS.runtime == 'gpu': memory_optimizer.optimize_gpu_memory(output_graph_def) elif FLAGS.runtime == 'cpu': memory_optimizer.optimize_cpu_memory(output_graph_def) else: mace_check(False, "runtime only support [gpu|cpu|dsp]") print "Memory optimization done." model_saver.save_model( output_graph_def, model_checksum, weight_checksum, FLAGS.template_dir, FLAGS.obfuscate, FLAGS.model_tag, FLAGS.output_dir, FLAGS.runtime, FLAGS.embed_model_data, FLAGS.winograd, FLAGS.data_type, FLAGS.model_graph_format)
def run(self): for op in self._net.op: mace_check(op.type in self._op_shape_inference, "Mace does not support caffe op type %s yet" % op.type) self._op_shape_inference[op.type](op)
def infer_shape_general(self, op): if len(op.input) > 0: mace_check(op.input[0] in self._output_shape_cache, "%s does not exist" % op.input[0]) input_shape = self._output_shape_cache[op.input[0]] self.add_output_shape(op, [input_shape])
def get_blob(self, index): mace_check(index < len(self._blobs), "blob out of index") return self._blobs[index]