def testNestedNumpyArrayWithDType(self): t = tensor_util.make_tensor_proto([10.0, 20.0, np.array(30.0)], dtype=dtypes.float32) a = tensor_util.MakeNdarray(t) self.assertEqual(np.float32, a.dtype) self.assertAllClose(np.array([10.0, 20.0, 30.0], dtype=np.float32), a)
def _convert_layers_batchnorm(self, source_node): IR_node = self.IR_graph.node.add() TensorflowParser2._copy_and_reop(source_node, IR_node, 'BatchNorm') is_transformed = False test = self.get_parent(source_node.name, [0]) if test.type == 'Mul': is_transformed = True # ssd model is transformed if is_transformed: # Ax - (Au - b) # A input_mul_A = self.get_parent(source_node.name, [0, 1]) tensor_content = input_mul_A.get_attr('value') A_content = tensor_util.MakeNdarray(tensor_content) self.set_weight(source_node.name, 'A', A_content) # b input_sub = self.get_parent(source_node.name, [1]) tensor_content = input_sub.get_attr('value') sub_content = tensor_util.MakeNdarray(tensor_content) # print(sub_content) self.set_weight(source_node.name, 'b', sub_content) input_node = self.get_parent(source_node.name, [0]) IR_node.input.append(input_node.real_name) IR_node.attr["_output_shapes"].list.shape.pop() IR_node.attr["_output_shapes"].MergeFromString(input_node.layer.attr['_output_shapes'].SerializeToString()) else: # epsilon epsilon = self.get_parent(source_node.name, [1]) IR_node.attr['epsilon'].f = epsilon.layer.attr['value'].tensor.float_val[0] # moving variance (var) /read moving_variance = self.get_parent(source_node.name, [0]) if moving_variance.type == 'Identity': moving_variance_read = self.src_graph.get_parent(moving_variance.name, [0]) tensor_content = moving_variance_read.get_attr('value') moving_variance_content = tensor_util.MakeNdarray(tensor_content) self.set_weight(source_node.name, 'var', moving_variance_content) else: print(moving_variance.layer) assert False # gamma (scale) Rsqrt = self.get_son(source_node.name, [0], True) # print(Rsqrt.out_edges) if len(Rsqrt.out_edges) == 2: IR_node.attr['scale'].b = False output_node = self.get_son(Rsqrt.name, [0, 0], True) if output_node.type == 'Sub': output_node = self.get_son(Rsqrt.name, [1, 0], True) Mul = self.get_son(Rsqrt.name, [0], True) else: Mul = self.get_son(Rsqrt.name, [1], True) else: IR_node.attr['scale'].b = True son = self.get_son(Rsqrt.name, [0, 0], True) gamma_from = self.get_parent(son.name, [1, 1], True) gamma = self.check_const(gamma_from) gamma_tensor = gamma.get_attr('value') scale = tensor_util.MakeNdarray(gamma_tensor) self.set_weight(source_node.name, 'scale', scale) output_node = self.get_son(source_node.name, [0, 0, 0, 0], True) if output_node.type == 'Sub': output_node = self.get_son(source_node.name, [0, 0, 0, 0, 0], True) Mul = self.get_son(Rsqrt.name, [0, 0], True) else: Mul = self.get_son(Rsqrt.name, [0, 1], True) # beta (bias) beta = self.get_parent(output_node.name, [1, 0, 0], True).get_attr('value') bias = tensor_util.MakeNdarray(beta) IR_node.attr['bias'].b = True self.set_weight(source_node.name, 'bias', bias) # moving mean (mean) moving_mean = self.get_parent(Mul.name, [0, 0]).get_attr('value') mean = tensor_util.MakeNdarray(moving_mean) self.set_weight(source_node.name, 'mean', mean) # input node assert output_node.type == 'Add' input_node = self.get_parent(output_node.name, [0, 0]) IR_node.input.append(input_node.real_name) IR_node.attr["_output_shapes"].list.shape.pop() IR_node.attr["_output_shapes"].MergeFromString(input_node.layer.attr['_output_shapes'].SerializeToString()) output_node.real_name = source_node.name
import os import tensorflow as tf from tensorflow.python.framework import tensor_util summary_dir = 'tmp/summaries' summary_writer = tf.summary.create_file_writer('tmp/summaries') with summary_writer.as_default(): tf.summary.scalar('loss', 0.1, step=42) tf.summary.scalar('loss', 0.2, step=43) tf.summary.scalar('loss', 0.3, step=44) tf.summary.scalar('loss', 0.4, step=45) from tensorflow.core.util import event_pb2 from tensorflow.python.lib.io import tf_record def my_summary_iterator(path): for r in tf_record.tf_record_iterator(path): yield event_pb2.Event.FromString(r) for filename in os.listdir(summary_dir): path = os.path.join(summary_dir, filename) for event in my_summary_iterator(path): for value in event.summary.value: t = tensor_util.MakeNdarray(value.tensor) print(value.tag, event.step, t, type(t))
# [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. start_time = time.time() request = predict_pb2.PredictRequest() request.model_spec.name = 'face_detector' request.model_spec.signature_name = 'predict_output' request.inputs['image_tensor'].CopyFrom( tf.contrib.util.make_tensor_proto(image_np_expanded, shape=list(image_np_expanded.shape))) result = stub.Predict(request, 10.0) # 5 seconds boxes = tensor_util.MakeNdarray(result.outputs['boxes']) scores = tensor_util.MakeNdarray(result.outputs['scores']) classes = tensor_util.MakeNdarray(result.outputs['classes']) num_detections = tensor_util.MakeNdarray(result.outputs['num_detections']) # print(boxes.shape) box = find_face_bounding_box(boxes[0], scores[0]) elapsed_time = time.time() - start_time print('face_detector time cost: {}'.format(elapsed_time)) if box is not None: ymin, xmin, ymax, xmax = box # print('box found: {} {} {} {}'.format(ymin, xmin, ymax, xmax)) (left, right, top, bottom) = (xmin * frame_width, xmax * frame_width, ymin * frame_height, ymax * frame_height)
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_list): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] uint8_type = dtypes.quint8.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum for index, node in enumerate(input_graph_def.node): if index in fuse_op_list: input_node = input_node_map[node.input[0]] if input_node.op == 'QuantizeV2': new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" for _, value in enumerate(node.input): new_node.input.append(value) weights_node_name = node.input[1] bias_node_name = node.input[2] min_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[1])] max_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[2])] requantize_node = input_graph_def.node[index + 3] frozen_max_node = input_graph_def.node[index + 2] frozen_min_node = input_graph_def.node[index + 1] new_node.name = requantize_node.name min_filter_node_name = node.input[5] max_filter_node_name = node.input[6] new_node.input.append(frozen_min_node.name) new_node.input.append(frozen_max_node.name) min_filter_node = input_node_map[min_filter_node_name] max_filter_node = input_node_map[max_filter_node_name] new_node.attr["T1"].CopyFrom(node.attr['T1']) new_node.attr["T2"].CopyFrom(node.attr['T2']) min_input_value = (tensor_util.MakeNdarray( min_input_node.attr['value'].tensor)) max_input_value = (tensor_util.MakeNdarray( max_input_node.attr['value'].tensor)) min_filter_value = (tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor)) max_filter_value = (tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor)) weights_tensor = tensor_util.MakeNdarray( input_node_map[weights_node_name].attr['value'].tensor) bias_tensor = tensor_util.MakeNdarray( input_node_map[bias_node_name].attr['value'].tensor) bias_scale = 255.0 * 127.0 / ( max(abs(max_input_value), abs(min_input_value)) * max(abs(max_filter_value), abs(min_filter_value))) QaAmin = 255 * min_input_value / (max_input_value - min_input_value) int32_bias = [] for bias_index, value in enumerate( np.sum(np.array(weights_tensor, dtype=np.int32), axis=0, dtype=np.int32)): int32_bias.append( int(bias_tensor[bias_index] * bias_scale + value * QaAmin)) bias_node = self.check_node_existence( output_graph_def, bias_node_name) if not bias_node: bias_node = input_node_map[bias_node_name] bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( int32_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) output_graph_def.node.extend( [new_node, frozen_max_node, frozen_min_node]) elif input_node.op == "Requantize": new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = input_graph_def.node[index + 3].name for _, value in enumerate(node.input): new_node.input.append(value) weights_node_name = node.input[1] bias_node_name = node.input[2] min_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[3])] max_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[4])] requantize_node = input_graph_def.node[index + 3] frozen_max_node = input_graph_def.node[index + 2] frozen_min_node = input_graph_def.node[index + 1] skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) new_node.input.append(frozen_min_node.name) new_node.input.append(frozen_max_node.name) min_filter_node_name = node.input[5] max_filter_node_name = node.input[6] min_filter_node = input_node_map[min_filter_node_name] max_filter_node = input_node_map[max_filter_node_name] new_node.attr["T1"].CopyFrom(node.attr['T1']) new_node.attr["T2"].CopyFrom(node.attr['T2']) min_input_value = (tensor_util.MakeNdarray( min_input_node.attr['value'].tensor)) max_input_value = (tensor_util.MakeNdarray( max_input_node.attr['value'].tensor)) min_filter_value = (tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor)) max_filter_value = (tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor)) bias_tensor = tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor) bias_scale = 255.0 * 127.0 / ( max(abs(max_input_value), abs(min_input_value)) * max(abs(max_filter_value), abs(min_filter_value))) bias_int32 = [int(i * bias_scale) for i in bias_tensor] bias_node = self.check_node_existence( output_graph_def, bias_node_name) if not bias_node: bias_node = input_node_map[bias_node_name] bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_int32, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) output_graph_def.node.extend( [new_node, frozen_max_node, frozen_min_node]) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def Apply(self): self.internal_result = self.istub.Predict(self.internal_request, 10.0) rpredictions = [ tensor_util.MakeNdarray( self.internal_result.outputs['predictions0']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions1']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions2']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions3']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions4']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions5']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions6']) ] rlocalisations = [ tensor_util.MakeNdarray( self.internal_result.outputs['localisations0']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations1']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations2']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations3']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations4']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations5']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations6']) ] rbbox_img = tensor_util.MakeNdarray( self.internal_result.outputs['bbox_img']) self.rclasses, self.rscores, self.rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, SSD.ssd_anchors, select_threshold=SSD.thres, img_shape=SSD.net_shape, num_classes=SSD.total_classes, decode=True) self.rbboxes = np_methods.bboxes_clip(rbbox_img, self.rbboxes) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_sort( self.rclasses, self.rscores, self.rbboxes, top_k=400) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_nms( self.rclasses, self.rscores, self.rbboxes, nms_threshold=SSD.nms_thres) self.rbboxes = np_methods.bboxes_resize(rbbox_img, self.rbboxes)
def rename_Transpose(self, source_node): IR_node = self._convert_identity_operation(source_node, 1) perm = self.get_parent(source_node.name, [1]).layer.attr['value'].tensor perm = tensor_util.MakeNdarray(perm).tolist() assign_IRnode_values(IR_node, {'perm': perm})
def _update_bias(self): """ Convert the bias from float to int. """ for node_name in self.node_mapping: current_node = self.node_mapping[node_name] current_node_op = current_node.op if current_node_op in self.fused_requantized_bias_op: done = False another_conv_node = None original_conv_node = current_node while not done: current_node = self.node_mapping[ self.get_node_name_from_input(current_node.input[0])] if current_node.op in self.offset_map: another_conv_node = current_node done = True elif current_node.op == "QuantizedConcatV2": if current_node.name not in self.rerange_concat_node: done = True elif current_node.op not in ("QuantizedMaxPool", "QuantizedAvgPool"): done = True if not another_conv_node: continue bias_node = self.node_mapping[self.get_node_name_from_input( original_conv_node.input[2])] bias_node_type = original_conv_node.attr['Tbias'] if bias_node_type.type != dtypes.float32 or bias_node_type.type == dtypes.qint32: continue min_filter_node = self.node_mapping[ original_conv_node.input[5]] max_filter_node = self.node_mapping[ original_conv_node.input[6]] channel_size = 1 if not min_filter_node.attr[ 'value'].tensor.tensor_shape.dim else min_filter_node.attr[ 'value'].tensor.tensor_shape.dim[0].size if channel_size == 1: max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter_node.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter_node.attr['value'].tensor.float_val)[0]) else: max_filter_tensor = tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor) offset_value = self.offset_map[another_conv_node.op] min_freezed_output_node = self.node_mapping[ another_conv_node.input[offset_value]] max_freezed_output_node = self.node_mapping[ another_conv_node.input[offset_value + 1]] min_input = min_freezed_output_node.attr[ 'value'].tensor.float_val[0] max_input = max_freezed_output_node.attr[ 'value'].tensor.float_val[0] bias_tensor = (tensor_util.MakeNdarray( bias_node.attr['value'].tensor)) bias_length = bias_tensor.shape[0] scales = [] activation_range = 127.0 if current_node.attr['out_type'].type == dtypes.qint8 \ else 255.0 weights_range = 127.0 for i in range(channel_size): scales.append(activation_range * weights_range / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[0])) original_conv_node.attr['Tbias'].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.qint32.as_datatype_enum)) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.qint32.as_datatype_enum)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( int32_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr[ 'value'].tensor.dtype = dtypes.qint32.as_datatype_enum
def Apply(self): if (not self.has_input): self.probs = [] else: self.internal_request = predict_pb2.PredictRequest() self.internal_request.model_spec.name = 'actdet_acam' self.internal_request.model_spec.signature_name = 'predict_images' tube_num = len(self.actor_boxes) nptmp1 = np.zeros(tube_num) nptmp2 = np.arange(tube_num) self.internal_request.inputs['updated_frames'].CopyFrom( tf.contrib.util.make_tensor_proto(self.frames, dtype=tf.float32, shape=self.frames.shape)) self.internal_request.inputs['temporal_rois'].CopyFrom( tf.contrib.util.make_tensor_proto( self.temporal_rois, dtype=tf.float32, shape=self.temporal_rois.shape)) self.internal_request.inputs[ 'temporal_roi_batch_indices'].CopyFrom( tf.contrib.util.make_tensor_proto(nptmp1, dtype=tf.int32, shape=nptmp1.shape)) self.internal_request.inputs['rois'].CopyFrom( tf.contrib.util.make_tensor_proto(self.norm_rois, dtype=tf.float32, shape=self.norm_rois.shape)) self.internal_request.inputs['roi_batch_indices'].CopyFrom( tf.contrib.util.make_tensor_proto(nptmp2, dtype=tf.int32, shape=nptmp2.shape)) self.internal_result = self.istub.Predict(self.internal_request, 10.0) self.probs = tensor_util.MakeNdarray( self.internal_result.outputs['output']) if (not len(self.probs)): abstr = "None" resstr = "None" else: abstr = "" for ab in self.actor_boxes: abstr += "%d|%d|%d|%d|%d-" % (ab['box'][0][0], ab['box'][0][1], ab['box'][0][2], ab['box'][0][3], ab['tid']) abstr = abstr[:-1] resstr = "" for i in xrange(len(self.actor_boxes)): act_probs = self.probs[i] order = np.argsort(act_probs)[::-1] for pp in range(PRINT_TOP_K): resstr += "%s|%s|" % (str(act.ACTION_STRINGS[order[pp]]), str(act_probs[order[pp]])) resstr = resstr[:-1] resstr += '-' resstr = resstr[:-1] self.output = "%s@%s" % (abstr, resstr)
def _bf16_convert(self, bf16_node_name): self._parse_graph() self.converted_ops.append(bf16_node_name) bf16_node_detail = self.node_name_mapping[bf16_node_name] bf16_node = bf16_node_detail.node bf16_node_inputs = list(bf16_node.input) for each_input in bf16_node_inputs: each_input_detail = self.node_name_mapping[each_input] each_input_node = each_input_detail.node # Const + Cast => Const optimization if each_input_node.op == "Const": if each_input_node.attr["dtype"] == attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum): fp32_value = tensor_util.MakeNdarray( each_input_node.attr.get('value').tensor) helper.set_attr_dtype(each_input_node, "dtype", dtypes.bfloat16) each_input_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( fp32_value, dtypes.bfloat16, fp32_value.shape))) self.converted_ops.append(each_input) # Cast + Cast => O optimization elif (each_input_node.op == "Cast" and each_input_node.attr["SrcT"] == attr_value_pb2.AttrValue( type=dtypes.bfloat16.as_datatype_enum)): cast_input_name = each_input_node.input[0] for index, input_name in enumerate(bf16_node.input): if input_name == each_input_node.name: bf16_node.input[index] = cast_input_name if len(each_input_detail.output) == 1: self.input_graph.node.remove(each_input_node) del each_input_node elif (each_input not in self.expand_fp32_ops + self.converted_ops and each_input_node.op in BF16Convert.WHITE_LIST + BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST): if len(each_input_detail.output) == 1: self._bf16_convert(each_input) # TODO: Consider multi-output case elif each_input in self.converted_ops: pass else: if each_input + "_FP32toBF16" not in list( self.node_name_mapping.keys()): input_cast_node = helper.create_node( "Cast", each_input + "_FP32toBF16", [each_input]) helper.set_attr_dtype(input_cast_node, "DstT", dtypes.bfloat16) helper.set_attr_dtype(input_cast_node, "SrcT", dtypes.float32) helper.set_attr_bool(input_cast_node, "Truncate", False) else: input_cast_node = self.node_name_mapping[ each_input + "_FP32toBF16"].node for index, input_name in enumerate(bf16_node.input): if input_name == each_input: bf16_node.input[index] = input_cast_node.name self.input_graph.node.extend([input_cast_node]) # TODO: Need consider different op type helper.set_attr_dtype(bf16_node, "T", dtypes.bfloat16) bf16_node_outputs = bf16_node_detail.output for each_output in bf16_node_outputs: each_output_detail = self.node_name_mapping[each_output] each_output_node = each_output_detail.node # Need consider output node op type if (each_output_node.op == "Cast" and each_output_node.attr["DstT"] == attr_value_pb2.AttrValue( type=dtypes.bfloat16.as_datatype_enum)): for cast_output in each_output_detail.output: cast_output_node = self.node_name_mapping[cast_output].node for index, input_name in enumerate(cast_output_node.input): if each_output == input_name: cast_output_node.input[index] = bf16_node.name del each_output_node elif (each_output not in self.expand_fp32_ops + self.converted_ops and each_output_node.op in BF16Convert.WHITE_LIST + BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST): # TODO: Consider multi node inputs case, check others inputs whether converted to BF16 self._bf16_convert(each_output) elif each_output in self.converted_ops: pass else: if bf16_node_name + "_BF16toFP32" not in list( self.node_name_mapping.keys()): output_cast_node = helper.create_node( "Cast", bf16_node_name + "_BF16toFP32", [bf16_node_name]) helper.set_attr_dtype(output_cast_node, "DstT", dtypes.float32) helper.set_attr_dtype(output_cast_node, "SrcT", dtypes.bfloat16) helper.set_attr_bool(output_cast_node, "Truncate", False) else: output_cast_node = self.node_name_mapping[ bf16_node_name + "_BF16toFP32"].node for index, input_name in enumerate(each_output_node.input): if bf16_node_name == input_name: each_output_node.input[index] = output_cast_node.name self.input_graph.node.extend([output_cast_node]) return
def getValues(self, op): """ Function to find underlying constants/variables representing operation Arguments: op: (tf.op) to get values of Returns: values: (np array) of scalars or variable numbers depending on op """ input_ops = [i.op for i in op.inputs] ### Operations not requiring new variables ### if op.node_def.op == 'Identity': return self.getValues(input_ops[0]) if op.node_def.op == 'Squeeze': print("Squeeze inputs_ops = ", input_ops) prevValues = self.getValues(input_ops[0]) print("Squeeze prevValues = ", prevValues) print("Squeeze prevValues.shape = ", prevValues.shape) squeeze_dims = op.node_def.attr["squeeze_dims"].list.i print("Squeeze squeeze_dims = ", squeeze_dims) axis = op.node_def.attr["axis"].list.i print("Squeeze axis = ", axis) assert (len(axis) == 0 or len(squeeze_dims) == 0) prevValues_shape = prevValues.shape squeeze = axis if len(axis) > 0 else squeeze_dims new_shape = [] i = 0 for val in prevValues_shape: print("i", i, "val", val) if i in squeeze: print("removing", i, "val", val) i += 1 continue new_shape.append(val) i += 1 print(new_shape) # TODO: check about "negative number for axis" (counted backward from the end) return prevValues.reshape(new_shape) if op.node_def.op == 'ExpandDims': # print ("ExpandDims inputs_ops = ",input_ops[1]) dim = self.getValues(input_ops[1]) prevValues = self.getValues(input_ops[0]) print("ExpandDims inputs[1] (dim) = ", dim) print("ExpandDims inputs[0] (values) = ", prevValues) print("ExpandDims values shape = ", prevValues.shape) prevValues_shape = prevValues.shape print("ExpandDims op.inputs[0].shape.dims = ", op.inputs[0].shape.dims) new_shape = [] i = 0 for val in prevValues_shape: if i == dim: new_shape.append(1) new_shape.append(val) i += 1 print(new_shape) # TODO:need also to support - "if you specify a negative number for axis it is counted backward from the end" return prevValues.reshape(new_shape) if op.node_def.op in ['Reshape']: if input_ops[1].node_def.op == 'Pack': prevValues = self.getValues(input_ops[0]) input_dims = op.inputs[0].shape.dims input_size = np.prod( np.array([d.value for d in input_dims])[1:]) shape = (-1, input_size) else: prevValues = [self.getValues(i) for i in input_ops] shape = prevValues[1] return np.reshape(prevValues[0], shape) if op.node_def.op == 'ConcatV2': prevValues = [self.getValues(i) for i in input_ops] N = op.node_def.attr["N"].i values = prevValues[0:N] print("ConcatV2 values = ", prevValues) print("concat attr.N = ", N) axis = prevValues[N] # print ("axis = ",axis) return np.concatenate(values, axis=axis) if op.node_def.op == 'Split': # print("--------------------------------------Split--------------------------------------") cur_op = op.node_def.op prevValues = [self.getValues(i) for i in input_ops] # print(np.split(prevValues[1], indices_or_sections=2, axis = 1)) return np.split(prevValues[1], indices_or_sections=2, axis=1) if op.node_def.op == 'Const': tproto = op.node_def.attr['value'].tensor return tensor_util.MakeNdarray(tproto) if op.node_def.op in ['StridedSlice']: prevValues = [self.getValues(i) for i in input_ops] assert (len(prevValues) == 4) ## or len(prevValues) == 3) input_ = prevValues[0] print(tf.shape(op.inputs[0])) print("inputs (actual name = " + input_ops[0].name + ")") print(input_) input_shape = input_.shape print(input_shape) begin = prevValues[1] print("begin (actual name = " + input_ops[1].name + ")") print(begin) assert (len(begin) == 3) # Todo: support any shape end = prevValues[2] print("end (actual name = " + input_ops[2].name + ")") print(end) assert (len(end) == 3) strides = prevValues[3] print("strides (actual name = " + input_ops[3].name + ")") print(strides) assert (len(strides) == 3) for stride in strides: assert (stride == 1) # only stride = 1 is supported # values = input_[begin[0]:end[0],begin[1]:end[1],begin[2]:end[2]] # print(values) def to_reversed_bit_array(num): return (format(num, '03b')[::-1]) begin_mask = op.node_def.attr["begin_mask"].i print("begin_mask =", begin_mask) begin_mask_ba = to_reversed_bit_array(begin_mask) print("begin_mask =", begin_mask) ellipsis_mask = op.node_def.attr["ellipsis_mask"].i print("ellipsis_mask =", ellipsis_mask) ellipsis_mask_ba = to_reversed_bit_array(ellipsis_mask) print("ellipsis_mask_ba =", ellipsis_mask_ba) end_mask = op.node_def.attr["end_mask"].i print("end_mask =", end_mask) end_mask_ba = to_reversed_bit_array(end_mask) print("end_mask_ba =", end_mask_ba) new_axis_mask = op.node_def.attr["new_axis_mask"].i print("new_axis_mask =", new_axis_mask) new_axis_mask_ba = to_reversed_bit_array(new_axis_mask) print("new_axis_mask_ba =", new_axis_mask_ba) shrink_axis_mask = op.node_def.attr["shrink_axis_mask"].i print("shrink_axis_mask =", shrink_axis_mask) shrink_axis_mask_ba = to_reversed_bit_array(shrink_axis_mask) print("shrink_axis_mask_ba =", shrink_axis_mask_ba) print() actual_begin = begin.copy() actual_end = end.copy() dims = len(input_shape) for i in range(len(begin)): # if begin[i]<0: # actual_end[i] = len(begin) + begin[i] # if end[i]<0: # actual_begin[i] = len(end) + end[i] if begin_mask_ba[i] == '1': actual_begin[i] = 0 if end_mask_ba[i] == '1': actual_end[i] = input_shape[i] if shrink_axis_mask_ba[i] == '1': dims -= 1 if begin[i] >= 0: actual_begin = begin[i] actual_end = actual_begin[i] + 1 else: actual_begin[i] = input_shape[i] + begin[i] actual_end[i] = actual_begin[i] + 1 print("actual_begin", actual_begin) print("actual_end", actual_end) values = input_[actual_begin[0]:actual_end[0], actual_begin[1]:actual_end[1], actual_begin[2]:actual_end[2]] print(values) if dims == 3: return values if dims == 2: return values[0] if dims == 1: return values[0][0] if dims == 0: return values[0][0][0] # return self.getValues(input_ops[0]) ### END operations not requiring new variables ### if op.node_def.op in [ 'MatMul', 'BiasAdd', 'Add', 'Sub', 'Relu', 'MaxPool', 'Conv2D', 'Placeholder', 'Mul' ]: # need to create variables for these return self.opToVarArray(op) raise NotImplementedError
def parse_tflite_graph(tflite_g, opcodes_map, model, input_prefix=''): """ Returns a Graph object along with some op count stats. All tflite op types are prefixed with "TFL_". Names of graph inputs are optionally prefixed with a string to prevent name conflicts in subgraphs. Quantizatized tensors are surrounded with quantize/dequantize ops """ op_cnt = collections.Counter() attr_cnt = collections.Counter() onnx_nodes = [] output_shapes = {} dtypes = {} tensor_names = {} # Map tensor name to tflite Tensor object so we can fetch quantization info as needed name_to_tensor = {} # If a node takes a quantized tensor as input, we must add a dequantize op after it. # Store a mapping so we only need to make at most one dequantize op per tensor. tensor_name_to_dequant_output = {} # tflite uses generic names (arg0, arg1, etc.) for inputs but full names for other tensors, so # prefixing just the inputs should be fine. Other tensors are prefixed when we do inlining. input_indices = { tflite_g.Inputs(i) for i in range(tflite_g.InputsLength()) } for i in range(tflite_g.TensorsLength()): tensor = tflite_g.Tensors(i) name = tensor.Name().decode() if i in input_indices: name = input_prefix + name tensor_names[i] = name name_to_tensor[name] = tensor if tensor.ShapeIsNone(): output_shapes[name] = None elif tensor.ShapeSignatureIsNone(): # The shape signature uses -1 to signify unknown dims. Old models don't have this and use Shape instead. output_shapes[name] = tensor.ShapeAsNumpy().tolist() else: output_shapes[name] = tensor.ShapeSignatureAsNumpy().tolist() buf = model.Buffers(tensor.Buffer()) dtypes[name] = map_tflite_dtype_to_onnx(tensor.Type()) if not buf.DataIsNone(): # For const values we use TF to decode the binary data from the buffer t = tensor_pb2.TensorProto() t.tensor_content = buf.DataAsNumpy().tobytes() if output_shapes[name] is None: output_shapes[name] = [] for d in output_shapes[name]: t.tensor_shape.dim.add().size = d t.dtype = map_tflite_dtype_to_tf(tensor.Type()) np_data = tensor_util.MakeNdarray(t) onnx_tensor = numpy_helper.from_array(np_data, name=name) onnx_node = helper.make_node("Const", [], outputs=[name], name=name, value=onnx_tensor) onnx_nodes.append(onnx_node) op_cnt["Const"] += 1 def get_dequant(tensor_name): """Creates a dequantize op for the provided tensor if needed and returns the output of the op, or the original tensor name if no dequantization is needed""" quant = name_to_tensor[tensor_name].Quantization() if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone(): return tensor_name if tensor_name in tensor_name_to_dequant_output: return tensor_name_to_dequant_output[tensor_name] dequant_name = tensor_name + "_dequant" attr = {} attr['scale'] = quant.ScaleAsNumpy().tolist() attr['zero_point'] = quant.ZeroPointAsNumpy().tolist() attr['quantized_dimension'] = quant.QuantizedDimension() onnx_node = helper.make_node("TFL_DEQUANTIZE", [tensor_name], [dequant_name], name=dequant_name, **attr) onnx_nodes.append(onnx_node) tensor_name_to_dequant_output[tensor_name] = dequant_name output_shapes[dequant_name] = output_shapes[tensor_name].copy() dtypes[dequant_name] = onnx_pb.TensorProto.FLOAT return dequant_name def get_prequant(tensor_name): """Called by nodes with the name of the tensor they must output. If the output is supposed to be quantized, creates a Quantize op outputting the tensor. Returns the name that should be used for the "prequantized" tensor, or the original tensor if no quantization is needed""" quant = name_to_tensor[tensor_name].Quantization() if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone(): return tensor_name prequant_name = tensor_name + "_prequant" quantize_name = tensor_name + "_quantize" attr = {} attr['scale'] = quant.ScaleAsNumpy().tolist() attr['zero_point'] = quant.ZeroPointAsNumpy().tolist() attr['quantized_dimension'] = quant.QuantizedDimension() onnx_node = helper.make_node("TFL_QUANTIZE", [prequant_name], [tensor_name], name=quantize_name, **attr) onnx_nodes.append(onnx_node) output_shapes[prequant_name] = output_shapes[tensor_name].copy() dtypes[prequant_name] = onnx_pb.TensorProto.FLOAT return prequant_name for i in range(tflite_g.OperatorsLength()): op = tflite_g.Operators(i) optype = opcodes_map[op.OpcodeIndex()] op_cnt[optype] += 1 attr = {} options_type_name = lookup_enum(op.BuiltinOptionsType(), 'BuiltinOptions') option_class = get_options_class(options_type_name) wants_dequantized_input = True has_prequantized_output = True if optype == 'QUANTIZE': out_tensor = tflite_g.Tensors(op.Outputs(0)) quant = out_tensor.Quantization() has_prequantized_output = False if quant is not None and not quant.ScaleIsNone( ) and not quant.ZeroPointIsNone(): attr['scale'] = quant.ScaleAsNumpy().tolist() attr['zero_point'] = quant.ZeroPointAsNumpy().tolist() attr['quantized_dimension'] = quant.QuantizedDimension() elif optype == 'DEQUANTIZE': in_tensor = tflite_g.Tensors(op.Inputs(0)) quant = in_tensor.Quantization() wants_dequantized_input = False if quant is not None and not quant.ScaleIsNone( ) and not quant.ZeroPointIsNone(): attr['scale'] = quant.ScaleAsNumpy().tolist() attr['zero_point'] = quant.ZeroPointAsNumpy().tolist() attr['quantized_dimension'] = quant.QuantizedDimension() if option_class is not None: options = option_class() options.Init(op.BuiltinOptions().Bytes, op.BuiltinOptions().Pos) # All flatbuffer objects have these properties. block_list = [ options_type_name + 'BufferHasIdentifier', 'Init', 'GetRootAs' + options_type_name ] # The rest of the properties of the options class provide its attribute names attr_names = { opt for opt in dir(options) if not opt.startswith('_') and opt not in block_list } for a in list(attr_names): # Flatbufffer list properties have 3 functions: *Length, *IsNone, and *AsNumpy if a + 'Length' in attr_names: attr_names.remove(a + 'Length') attr_names.remove(a + 'IsNone') attr_names.remove(a) for a in attr_names: if a.endswith('AsNumpy'): value = getattr(options, a)().tolist() a = a[:-len('AsNumpy')] else: # For enums we use a string with the value name, not enum index value = getattr(options, a)() if a in NODE_ATTR_NAME_TO_ENUM_TYPE: value = lookup_enum(value, NODE_ATTR_NAME_TO_ENUM_TYPE[a]) elif a in FUNCTION_ATTRS: value = model.Subgraphs(value).Name().decode() attr_cnt[a] += 1 attr[proper_to_snake_case(a)] = value input_names = [ tensor_names[op.Inputs(i)] for i in range(op.InputsLength()) if op.Inputs(i) != -1 ] if wants_dequantized_input: input_names = [get_dequant(inp) for inp in input_names] output_names = [ tensor_names[op.Outputs(i)] for i in range(op.OutputsLength()) if op.Outputs(i) != -1 ] if has_prequantized_output: output_names = [get_prequant(out) for out in output_names] onnx_node = helper.make_node("TFL_" + optype, input_names, output_names, name=output_names[0], **attr) onnx_nodes.append(onnx_node) inputs = [ tensor_names[tflite_g.Inputs(i)] for i in range(tflite_g.InputsLength()) ] outputs = [ tensor_names[tflite_g.Outputs(i)] for i in range(tflite_g.OutputsLength()) ] # TODO: Allow input/outputs to be overridden for inp in inputs: onnx_node = helper.make_node("Placeholder", [], outputs=[inp], name=inp) onnx_nodes.append(onnx_node) graph_name = (tflite_g.Name() or b'tflite graph').decode() return onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, inputs, outputs, graph_name
def _intel_cpu_quantize_weight_eightbit(self, parent, input_node, per_channel, quantization_mode=b"SCALED"): base_name = input_node.name + "_" qint8_const_name = base_name + "qint8_const" min_name = base_name + "min" max_name = base_name + "max" float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) epsilon = 1e-4 # Needs to be set empirically if accuracy is not satisfactory if parent in ("Conv2D", "MatMul"): if per_channel: ranges = np.abs(float_tensor).max(axis=(0, 1, 2)) min_value = -ranges max_value = ranges # nudging min-max values outside epsilon radius around zero ranges[ranges < epsilon] = epsilon min_value[np.abs(min_value) < epsilon] = -epsilon max_value[np.abs(max_value) < epsilon] = epsilon qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8) else: min_value = np.min(float_tensor.flatten()) max_value = np.max(float_tensor.flatten()) # Same processing of min-max as in quantize_weight_eightbit # function. if min_value > 0.0: min_value = 0.0 if min_value == max_value: if abs(min_value) < 0.000001: max_value = min_value + 1.0 elif min_value > 0: max_value = 2 * min_value else: max_value = min_value / 2.0 sess = tf.compat.v1.Session() with sess.as_default(): quantize_op = array_ops.quantize_v2( float_tensor, min_value, max_value, dtypes.qint8, mode=quantization_mode, round_mode="HALF_TO_EVEN") qint8_tensor = quantize_op[0].numpy( ) if tf.executing_eagerly() else quantize_op[0].eval() # Updated min-max values should be passed to the next # feeding node. min_value = quantize_op[1].numpy() if tf.executing_eagerly( ) else quantize_op[1].eval() max_value = quantize_op[2].numpy() if tf.executing_eagerly( ) else quantize_op[2].eval() sess.close() elif parent == "DepthwiseConv2dNative": # get the max values based on dim 0 and 1 for depthwise conv # since, the output channel will be dim 2 * dim 3 ranges = np.abs(float_tensor).max(axis=(0, 1)) ranges = ranges.flatten() min_value = -ranges max_value = ranges # nudging min-max values outside epsilon radius around zero ranges[ranges < epsilon] = epsilon min_value[np.abs(min_value) < epsilon] = -epsilon max_value[np.abs(max_value) < epsilon] = epsilon # Since output channel will be 1 dim which is dim 2 * dim 3 # When divide by range, qint8_tensor needs to be 3 dim # where, 3rd dim should be same dim of ranges a, b, c, d = float_tensor.shape qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 / ranges).astype(np.int8) # get the shape back to 4 dim qint8_tensor = qint8_tensor.reshape(a, b, c, d) shape = tensor_util.TensorShapeProtoToList( input_node.attr["value"].tensor.tensor_shape) qint8_const_node = helper.create_constant_node(qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape) min_node = helper.create_constant_node(min_name, min_value, dtypes.float32, device=self.device) max_node = helper.create_constant_node(max_name, max_value, dtypes.float32, device=self.device) self.add_output_graph_node(qint8_const_node) self.add_output_graph_node(min_node) self.add_output_graph_node(max_node) return qint8_const_node.name, min_node.name, max_node.name
def _convert_layers_batchnorm(self, source_node): # name, op IR_node = self.IR_graph.node.add() TensorflowParser2._copy_and_reop(source_node, IR_node, 'BatchNorm') # epsilon epsilon = self.get_parent(source_node.name, [1]) IR_node.attr['epsilon'].f = epsilon.layer.attr[ 'value'].tensor.float_val[0] # moving variance (var) /read moving_variance = self.get_parent(source_node.name, [0]) if moving_variance.type == 'Identity': moving_variance_read = self.src_graph.get_parent( moving_variance.name, [0]) tensor_content = moving_variance_read.get_attr('value') moving_variance_content = tensor_util.MakeNdarray(tensor_content) self.set_weight(source_node.name, 'var', moving_variance_content) else: print(moving_variance.layer) assert False # gamma (scale) Rsqrt = self.get_son(source_node.name, [0], True) if len(Rsqrt.out_edges) == 2: IR_node.attr['scale'].b = False output_node = self.get_son(source_node.name, [0, 0, 0], True) Mul = self.get_son(source_node.name, [0, 1], True) else: IR_node.attr['scale'].b = True son = self.get_son(source_node.name, [0, 0, 0], True) gamma_from = self.get_parent(son.name, [1, 1], True) gamma = self.check_const(gamma_from) # gamma = self.get_parent(son.name, [1, 1, 0, 0, 0, 1], True) gamma_tensor = gamma.get_attr('value') scale = tensor_util.MakeNdarray(gamma_tensor) self.set_weight(source_node.name, 'scale', scale) output_node = self.get_son(source_node.name, [0, 0, 0, 0], True) # print(output_node.layer) Mul = self.get_son(source_node.name, [0, 0, 1], True) # print(Mul.layer) # beta (bias) beta = self.get_parent(output_node.name, [1, 0, 0], True).get_attr('value') bias = tensor_util.MakeNdarray(beta) #(96,) IR_node.attr['bias'].b = True self.set_weight(source_node.name, 'bias', bias) # moving mean (mean) moving_mean = self.get_parent(Mul.name, [0, 0]).get_attr('value') mean = tensor_util.MakeNdarray(moving_mean) self.set_weight(source_node.name, 'mean', mean) # input node assert output_node.type == 'Add' input_node = self.get_parent(output_node.name, [0, 0]) IR_node.input.append(input_node.real_name) # output node output_node.real_name = source_node.name
def get_tf_tensor_data(tensor): """Get data from tensor.""" make_sure(isinstance(tensor, tensor_pb2.TensorProto), "Require TensorProto") np_data = tensor_util.MakeNdarray(tensor) make_sure(isinstance(np_data, np.ndarray), "{} isn't ndarray".format(np_data)) return np_data
def training_visualization(paths): string = "logs" dict_list = [] mpl.style.use('seaborn-darkgrid') for path in paths: training_dict = {} for event in summary_iterator(os.path.join(string, path)): if "step" in training_dict: if event.step not in training_dict["step"]: training_dict["step"].append(event.step) else: if event.step != 0: training_dict["step"] = [event.step] for value in event.summary.value: key = value.tag item = tensor_util.MakeNdarray(value.tensor) if key in training_dict: training_dict[key].append(item.item()) else: training_dict[key] = [item.item()] dict_list.append(training_dict) # plotting fig, axes = plt.subplots(nrows=4, ncols=2, sharex=True, num=1, figsize=(6.2, 10)) for dict_t in dict_list: axes[0, 0].plot(dict_t["step"], dict_t["training/loss_u"], lw=2) axes[0, 0].set_yscale("log") axes[0, 0].set_title(r"$L_C$", fontsize=15, fontweight='bold', fontname='Calibri') axes[0, 0].set_ylabel(r"$MSE$", fontsize=15, fontweight='bold', fontname='Calibri') # axes[0, 0].set_yticks([0.2, 0.1]) axes[0, 0].tick_params(axis="y", labelsize=12) axes[0, 1].plot(dict_t["step"], dict_t["training/loss_r"], lw=2) axes[0, 1].set_yscale("log") axes[0, 1].set_title(r"$L_r$", fontsize=15, fontweight='bold', fontname='Calibri') axes[0, 1].tick_params(axis="y", labelsize=12) axes[1, 0].plot(dict_t["step"], dict_t["training/loss_b"], lw=2) axes[1, 0].set_yscale("log") axes[1, 0].set_title(r"$L_b$", fontsize=15, fontweight='bold', fontname='Calibri') axes[1, 0].set_ylabel(r"$MSE$", fontsize=15, fontweight='bold', fontname='Calibri') axes[1, 0].tick_params(axis="y", labelsize=12) axes[1, 1].plot(dict_t["step"], dict_t["training/loss_reg"], lw=2) axes[1, 1].set_yscale("log") axes[1, 1].set_title(r"$L_{reg}$", fontweight="bold", fontsize=15, fontname='Calibri') axes[1, 1].tick_params(axis="y", labelsize=12) max_f = 0.0 max_vp = 0.0 max_ve = 0.0 max_ps = 0.0 for dict_t in dict_list: max_f = max( dict_t["vars/f"]) if max(dict_t["vars/f"]) > max_f else max_f max_vp = max( dict_t["vars/vp"]) if max(dict_t["vars/vp"]) > max_vp else max_vp max_ve = max( dict_t["vars/ve"]) if max(dict_t["vars/ve"]) > max_ve else max_ve max_ps = max( dict_t["vars/ps"]) if max(dict_t["vars/ps"]) > max_ps else max_ps max_f += 0.1 * max_f max_vp += 0.1 * max_vp max_ve += 0.1 * max_ve max_ps += 0.1 * max_ps for dict_t in dict_list: axes[2, 0].plot(dict_t["step"], dict_t["vars/f"], lw=2) axes[2, 0].set_ylim(ymin=0.0, ymax=max_f) axes[2, 0].set_title(r"$F_p$", fontsize=15, fontweight='bold', fontname='Calibri') axes[2, 0].set_ylabel(r"$\mu_{value}$", fontsize=15, fontweight='bold', fontname='Calibri') axes[2, 0].tick_params(axis="y", labelsize=12) axes[2, 1].plot(dict_t["step"], dict_t["vars/vp"], lw=2) axes[2, 1].set_ylim(ymin=0.0, ymax=max_vp) axes[2, 1].set_title(r"$v_{p}$", fontsize=15, fontweight='bold', fontname='Calibri') axes[2, 1].tick_params(axis="y", labelsize=12) axes[3, 1].plot(dict_t["step"], dict_t["vars/ve"], lw=2) axes[3, 1].set_ylim(ymin=0.0, ymax=max_ve) axes[3, 1].set_title(r"$v_{e}$", fontsize=15, fontweight='bold', fontname='Calibri') axes[3, 1].set_xlabel("epoch", fontsize=15, fontweight='bold', fontname='Calibri') axes[3, 1].tick_params(axis="y", labelsize=12) axes[3, 0].plot(dict_t["step"], dict_t["vars/ps"], lw=2) axes[3, 0].set_ylim(ymin=0.0, ymax=max_ps) axes[3, 0].set_xlabel("epoch", fontsize=15, fontweight='bold', fontname='Calibri') axes[3, 0].set_title(r"$PS$", fontsize=15, fontweight='bold', fontname='Calibri') axes[3, 0].set_ylabel(r"$\mu_{value}$", fontsize=15, fontweight='bold', fontname='Calibri') axes[3, 0].tick_params(axis="y", labelsize=12) fig.subplots_adjust(wspace=0.35, hspace=0.2) fig.savefig( "training_visualisation.png", dpi=300, format="png", bbox_inches="tight", ) for i in range(2): for j in range(2): axes[i, j].set_ylim(ymin=0.0) return dict_list
def do_transformation(self): """Fuse the quantized op with the following requantize op. The transformation has two stages, the first step is to fuse the patterns defined in self.fuse_patterns and the last step is to fuse the self.sum_patterns. Returns: [graphdef]: the optimized graphdef object """ int8_type = dtypes.qint8.as_datatype_enum uint8_type = dtypes.quint8.as_datatype_enum float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum while True: target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( self.fuse_patterns) if len(target_nodes) == 0: break i = target_nodes[0] quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node requantize_node_name = i[1] requantize_node = self.graph_info[requantize_node_name].node requested_output_min_name = requantize_node.input[3] requested_output_max_name = requantize_node.input[4] quantized_node_op = i[-1][0] new_node = node_def_pb2.NodeDef() new_node.op = quantized_node_op + "AndRequantize" new_node.name = requantize_node_name for _, value in enumerate(quantized_node.input): new_node.input.append(value) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) if 'Tinput' in quantized_node.attr: new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput']) if 'Tfilter' in quantized_node.attr: new_node.attr["Tfilter"].CopyFrom( quantized_node.attr['Tfilter']) if 'strides' in quantized_node.attr: new_node.attr["strides"].CopyFrom( quantized_node.attr['strides']) if 'padding' in quantized_node.attr: new_node.attr["padding"].CopyFrom( quantized_node.attr['padding']) parent_node_name = Helper.node_name_from_input( quantized_node.input[0]) max_filter_node = self.graph_info[new_node.input[6]].node min_filter_node = self.graph_info[new_node.input[5]].node last_node = self.graph_info[new_node.input[0]].node if last_node.op.find('Requantize') != -1: bias_node = self.graph_info[new_node.input[2]].node max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node min_input = (min_input_node.attr['value'].tensor.float_val)[0] max_input = (max_input_node.attr['value'].tensor.float_val)[0] if 'Depthwise' in quantized_node_op or requantize_node.op.find( 'PerChannel') != -1: channel_size = max_filter_node.attr[ 'value'].tensor.tensor_shape.dim[0].size max_filter_tensor = tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter_node.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter_node.attr['value'].tensor.float_val)[0]) bias_tensor = tensor_util.MakeNdarray(self.graph_info[ new_node.input[2]].node.attr['value'].tensor) bias_length = bias_tensor.shape[0] scales = [] for i in range(channel_size): scales.append(255.0 * 127.0 / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[0])) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=float32_type if self. device == 'gpu' else qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_tensor if self.device == 'gpu' else int32_bias, dtypes.float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = float32_type \ if self.device == 'gpu' else qint32_type new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ if self.device == 'gpu' else qint32_type)) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if "padding_list" in quantized_node.attr: new_node.attr["padding_list"].CopyFrom( quantized_node.attr['padding_list']) if "dilations" in quantized_node.attr: new_node.attr["dilations"].CopyFrom( quantized_node.attr['dilations']) if quantized_node.op == "QuantizedConv2D" or \ quantized_node.op == "QuantizedConv2DWithBias": new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=int8_type)) else: new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) self.graph_analyzer.replace_single_node( new_node, [parent_node_name], quantized_node_name, [self.graph_info[requantize_node_name].outputs[0]], requantize_node_name) self.graph_analyzer.remove_node(quantized_node_name) target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( self.sum_pattern) while target_nodes: i = target_nodes[0] quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node requantize_node_name = i[1] requantize_node = self.graph_info[requantize_node_name].node requested_output_min_name = requantize_node.input[3] requested_output_max_name = requantize_node.input[4] quantized_node_op = i[-1][0] new_node = node_def_pb2.NodeDef() new_node.op = quantized_node_op + "AndRequantize" new_node.name = requantize_node_name for _, value in enumerate(quantized_node.input[:-1]): new_node.input.append(value) new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput']) new_node.attr["Tfilter"].CopyFrom(quantized_node.attr['Tfilter']) new_node.attr["strides"].CopyFrom(quantized_node.attr['strides']) new_node.attr["padding"].CopyFrom(quantized_node.attr['padding']) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) deq_node = self.graph_info[Helper.node_name_from_input( quantized_node.input[-1])].node if deq_node.op != 'Dequantize' or deq_node.op.find( "Quantize") != -1: self.logger.debug( 'Dropping fusion due to unsupported pattern..... {}'. format(i)) target_nodes.remove(i) continue if deq_node.op == 'Dequantize': original_summand_node = self.graph_info[ Helper.node_name_from_input(deq_node.input[0])].node else: original_summand_node = deq_node summand_op_type = uint8_type if dtypes.as_dtype( deq_node.attr["T"].type) == uint8_type else int8_type for j in range(3): new_node.input.append(original_summand_node.name + ':{}'.format(j)) if "padding_list" in quantized_node.attr: new_node.attr["padding_list"].CopyFrom( quantized_node.attr['padding_list']) if "dilations" in quantized_node.attr: new_node.attr["dilations"].CopyFrom( quantized_node.attr['dilations']) new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if summand_op_type == int8_type: new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" new_node.attr["Tsummand"].CopyFrom( attr_value_pb2.AttrValue(type=summand_op_type)) self.graph_analyzer.replace_single_node( new_node, [quantized_node.input[0], original_summand_node.name], quantized_node.name, self.graph_info[requantize_node_name].outputs, requantize_node_name) self.graph_analyzer.remove_node(quantized_node_name) if deq_node.op == 'Dequantize': self.graph_analyzer.remove_node_with_single_input_output( deq_node.name) target_nodes.remove(i) return self.graph_analyzer.dump_graph()
def to_numpy(summary_value): return tensor_util.MakeNdarray(summary_value.tensor)
def _get_value(input_node): input_tensor = input_node.attr["value"].tensor tensor_value = tensor_util.MakeNdarray(input_tensor) return tensor_value
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_name): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] for index, node in enumerate(input_graph_def.node): if node.name in fuse_op_name: conv_node = input_node_map[node.name] bn_node = input_node_map[fuse_op_name[node.name]] scales, offsets = self.get_scale_and_offset_values( input_node_map, bn_node) weights_node_name = conv_node.input[1] weights_node = input_node_map[weights_node_name] for bn_input in bn_node.input: skip_node_name.append(bn_input) skip_node_name.append(bn_node.name) new_node = node_def_pb2.NodeDef() new_node.op = conv_node.op new_node.name = conv_node.name for _, value in enumerate(node.input): new_node.input.append(value) weights_node_tensor_shape = weights_node.attr[ 'value'].tensor.tensor_shape if conv_node.op == 'Conv2D': weights_cols = weights_node_tensor_shape.dim[3].size elif conv_node.op == "DepthwiseConv2dNative": weights_cols = weights_node_tensor_shape.dim[ 2].size * weights_node_tensor_shape.dim[3].size else: weights_cols = weights_node_tensor_shape.dim[1].size weights_tensor = tensor_util.MakeNdarray( weights_node.attr['value'].tensor) new_weights = [] for index, i in enumerate(weights_tensor.flat): new_weights_value = weights_tensor.flat[index] * scales[ index % weights_cols] new_weights.append(new_weights_value) new_bn = [] for index in range(weights_cols): new_bn_value = offsets[index] new_bn.append(new_bn_value) weights_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( new_weights, dtypes.float32, weights_tensor.shape))) bias_offset_node = node_def_pb2.NodeDef() bias_offset_node.op = "Const" bias_offset_node.name = conv_node.name + "_bn_offset" bias_offset_node.attr["dtype"].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) bias_offset_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( new_bn, dtypes.float32, [weights_cols]))) biasadd_node = node_def_pb2.NodeDef() biasadd_node.op = "BiasAdd" biasadd_node.name = bn_node.name if "data_format" in conv_node.attr: biasadd_node.attr["data_format"].CopyFrom( conv_node.attr['data_format']) biasadd_node.attr["T"].CopyFrom(conv_node.attr['T']) biasadd_node.input.append(conv_node.name) biasadd_node.input.append(bias_offset_node.name) for key in conv_node.attr: new_node.attr[key].CopyFrom(conv_node.attr[key]) output_graph_def.node.extend( [weights_node, bias_offset_node, biasadd_node, new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def get_interactive_infer_results(model, model_in): fetches = [ model.get_data_layer().input_tensors, model.get_output_tensors(), ] feed_dict = model.get_data_layer().create_feed_dict(model_in) # inputs, outputs = sess.run(fetches, feed_dict=feed_dict) # export_path = "/tmp/speech2text/0" # print('Exporting trained model to', export_path) # builder = tf.saved_model.builder.SavedModelBuilder(export_path) # # Define input tensors # audio = tf.saved_model.utils.build_tensor_info( # model.get_data_layer().input_tensors["source_tensors"][0]) # audio_length = tf.saved_model.utils.build_tensor_info( # model.get_data_layer().input_tensors["source_tensors"][1]) # x_id = tf.saved_model.utils.build_tensor_info( # model.get_data_layer().input_tensors["source_ids"][0]) # # Define output tensors # # decoded_sequence = tf.saved_model.utils.build_tensor_info( # # model.get_output_tensors()[0]) # # prediction_signature = ( # # tf.saved_model.signature_def_utils.build_signature_def( # # inputs={'audio': audio, 'audio_length': audio_length, 'x_id': x_id}, # # outputs={'decoded_sequence': decoded_sequence}, # # method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)) # indices_decoded_sequence = tf.saved_model.utils.build_tensor_info( # model.get_output_tensors()[0].indices) # values_decoded_sequence = tf.saved_model.utils.build_tensor_info( # model.get_output_tensors()[0].values) # dense_shape_decoded_sequence = tf.saved_model.utils.build_tensor_info( # model.get_output_tensors()[0].dense_shape) # prediction_signature = ( # tf.saved_model.signature_def_utils.build_signature_def( # inputs={'audio': audio, 'audio_length': audio_length, 'x_id': x_id}, # outputs={'indices_decoded_sequence': indices_decoded_sequence, # 'values_decoded_sequence': values_decoded_sequence, # 'dense_shape_decoded_sequence': dense_shape_decoded_sequence}, # method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)) # builder.add_meta_graph_and_variables( # sess, [tf.saved_model.tag_constants.SERVING], # signature_def_map={ # 'predict_output': # prediction_signature, # }, # main_op=tf.tables_initializer(), # strip_default_attrs=True) # builder.save() audio = feed_dict[model.get_data_layer().input_tensors["source_tensors"] [0]] audio_length = feed_dict[ model.get_data_layer().input_tensors["source_tensors"][1]] x_id = feed_dict[model.get_data_layer().input_tensors["source_ids"][0]] print('audio shape: ', audio.shape) print('audio_length shape: ', audio_length.shape) # inputs, outputs = sess.run(fetches, feed_dict=feed_dict) channel = grpc.insecure_channel('0.0.0.0:8500') stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'speech2text' request.model_spec.signature_name = 'predict_output' request.inputs['audio'].CopyFrom( tf.contrib.util.make_tensor_proto(audio, shape=list(audio.shape))) request.inputs['audio_length'].CopyFrom( tf.contrib.util.make_tensor_proto(audio_length, shape=list(audio_length.shape))) request.inputs['x_id'].CopyFrom( tf.contrib.util.make_tensor_proto(x_id, shape=list(x_id.shape))) result_future = stub.Predict.future(request, 5.0) # 5 seconds exception = result_future.exception() if exception: print(exception) else: print('Result returned from rpc') inputs = model.get_data_layer().input_tensors indices_decoded_sequence = tensor_util.MakeNdarray( result_future.result().outputs['indices_decoded_sequence']) values_decoded_sequence = tensor_util.MakeNdarray( result_future.result().outputs['values_decoded_sequence']) dense_shape_decoded_sequence = tensor_util.MakeNdarray( result_future.result().outputs['dense_shape_decoded_sequence']) outputs = tf.SparseTensorValue(indices=indices_decoded_sequence, values=values_decoded_sequence, dense_shape=dense_shape_decoded_sequence) outputs = [outputs] return model.infer(inputs, outputs)
def testQuantizedTypes(self): # Test with array. data = [(21, ), (22, ), (23, )] t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint32) if sys.byteorder == "big": self.assertProtoEquals( """ dtype: DT_QINT32 tensor_shape { dim { size: 3 } } tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027" """, t) else: self.assertProtoEquals( """ dtype: DT_QINT32 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\000\000\026\000\000\000\027\000\000\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint32.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint8) self.assertProtoEquals( """ dtype: DT_QUINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.quint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint8) self.assertProtoEquals( """ dtype: DT_QINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint16) if sys.byteorder == "big": self.assertProtoEquals( """ dtype: DT_QUINT16 tensor_shape { dim { size: 3 } } tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals( """ dtype: DT_QUINT16 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\026\000\027\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.quint16.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint16) if sys.byteorder == "big": self.assertProtoEquals( """ dtype: DT_QINT16 tensor_shape { dim { size: 3 } } tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals( """ dtype: DT_QINT16 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\026\000\027\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint16.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a)
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_name): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] for index, node in enumerate(input_graph_def.node): if node.name in fuse_op_name: skip_list.append(index + 1) original_node = input_node_map[node.name] mul_node = input_node_map[fuse_op_name[node.name]] weights_node_name = original_node.input[1] weights_node = input_node_map[weights_node_name] mul_value_node_name = mul_node.input[1] mul_value_node = input_node_map[mul_value_node_name] new_node = node_def_pb2.NodeDef() new_node.op = original_node.op new_node.name = mul_node.name for _, value in enumerate(node.input): new_node.input.append(value) if original_node.op == "DepthwiseConv2dNative": weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[ 2].size * weights_node.attr[ 'value'].tensor.tensor_shape.dim[3].size elif original_node.op == "Conv2D": weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[3].size else: weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[1].size mul_value_node_tensor = mul_value_node.attr['value'].tensor weights_node_tensor = weights_node.attr['value'].tensor if len(mul_value_node_tensor.tensor_shape.dim ) != 1 or mul_value_node_tensor.tensor_shape.dim[ 0].size != weights_col: self.logger.info("Invalid Mul OP fusion.") mul_value_node_list = [ i for i in tensor_util.MakeNdarray( mul_value_node_tensor).flat ] new_weights = [] for index, i in enumerate( tensor_util.MakeNdarray(weights_node_tensor).flat): new_weights_value = i * mul_value_node_list[ index % len(mul_value_node_list)] new_weights.append(new_weights_value) weights_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( new_weights, dtypes.float32, tensor_util.MakeNdarray(weights_node_tensor).shape))) skip_node_name.append(weights_node.name) output_graph_def.node.extend([weights_node]) for key in original_node.attr: new_node.attr[key].CopyFrom(original_node.attr[key]) output_graph_def.node.extend([new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def testStringWithImplicitRepeat(self): t = tensor_util.make_tensor_proto("f", shape=[3, 4]) a = tensor_util.MakeNdarray(t) self.assertAllEqual(np.array([[b"f"] * 4] * 3, dtype=np.object), a)
def generate_output_graph(input_graph_def, input_node_map, output_node_map, fuse_op_list, fuse_op_deq_list, device): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] int8_type = dtypes.qint8.as_datatype_enum uint8_type = dtypes.quint8.as_datatype_enum float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum for index, node in enumerate(input_graph_def.node): if index in fuse_op_list: const_node_1 = input_graph_def.node[index + 1] const_node_2 = input_graph_def.node[index + 2] requantize_node = input_graph_def.node[index + 3] new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = requantize_node.name for _, value in enumerate(node.input): new_node.input.append(value) new_node.input.append(const_node_1.name) new_node.input.append(const_node_2.name) new_node.attr["Tinput"].CopyFrom(node.attr['Tinput']) new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter']) new_node.attr["strides"].CopyFrom(node.attr['strides']) new_node.attr["padding"].CopyFrom(node.attr['padding']) if input_node_map[new_node.input[0]].op.find("Requantize") != -1: bias_node = input_node_map[new_node.input[2]] last_node = input_node_map[new_node.input[0]] max_input_node = (input_node_map[last_node.input[4][:-2]]) min_input_node = (input_node_map[last_node.input[3][:-2]]) max_filter = input_node_map[new_node.input[6]] min_filter = input_node_map[new_node.input[5]] min_input = (min_input_node.attr['value'].tensor.float_val)[0] max_input = (max_input_node.attr['value'].tensor.float_val)[0] if 'Depthwise' in node.op or "RequantizePerChannel" in [ node.op for node in output_node_map[node.name] ]: channel_size = max_filter.attr[ 'value'].tensor.tensor_shape.dim[0].size max_filter_tensor = tensor_util.MakeNdarray( max_filter.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter.attr['value'].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter.attr['value'].tensor.float_val)[0]) bias_tensor = tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor) bias_length = bias_tensor.shape[0] scales = [] for i in range(channel_size): scales.append(255.0 * 127.0 / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[0])) #(TODO) GPU not support qint32 bias tensor # float32 type should be removed after GPU support qint32 bias bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=float32_type \ if device =='gpu' else qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_tensor if device == 'gpu' else int32_bias, dtypes.float32 if device == 'gpu' else dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = float32_type \ if device == 'gpu' else qint32_type skip_node_name.append(bias_node.name) output_graph_def.node.extend([bias_node]) new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type \ if device == 'gpu' else qint32_type)) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if "padding_list" in node.attr: new_node.attr["padding_list"].CopyFrom( node.attr['padding_list']) if "dilations" in node.attr: new_node.attr["dilations"].CopyFrom(node.attr['dilations']) if node.op == "QuantizedConv2D" or node.op == "QuantizedConv2DWithBias": new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=int8_type)) else: new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) output_graph_def.node.extend( [new_node, const_node_1, const_node_2]) elif index in skip_list or node.name in skip_node_name: continue elif node.op == "Dequantize": new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) new_node.attr["mode"].s = b"SCALED" p_node = input_node_map[new_node.input[0]] pp_node = input_node_map[p_node.name].input[0] if input_node_map[pp_node].op.find("Relu") != -1 or p_node.op in ( "QuantizedAvgPool", "QuantizedMaxPool", "QuantizedConcatV2"): new_node.attr["T"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) elif input_node_map[pp_node].op.find( "QuantizedMatMulWithBias") != -1 and p_node.op.find( "Requantize") != -1: new_node.attr["mode"].s = node.attr["mode"].s new_node.attr["T"].CopyFrom( attr_value_pb2.AttrValue(type=node.attr["T"].type)) else: new_node.attr["T"].CopyFrom( attr_value_pb2.AttrValue(type=int8_type)) output_graph_def.node.extend([new_node]) elif index in fuse_op_deq_list: original_summand_node = input_node_map[ input_graph_def.node[index].input[-1]] sum_const_node_1 = input_graph_def.node[index + 1] sum_const_node_2 = input_graph_def.node[index + 2] sum_requantize_node = input_graph_def.node[index + 3] new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = sum_requantize_node.name for _, value in enumerate(node.input[:-1]): new_node.input.append(value) new_node.input.append(sum_const_node_1.name) new_node.input.append(sum_const_node_2.name) new_node.input.append( input_node_map[original_summand_node.name].input[0]) new_node.input.append( input_node_map[original_summand_node.name].input[0] + ":1") new_node.input.append( input_node_map[original_summand_node.name].input[0] + ":2") # skip_list.append(index + 1) # skip_list.append(index + 2) skip_list.append(index + 3) new_node.attr["Tinput"].CopyFrom(node.attr['Tinput']) new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter']) new_node.attr["strides"].CopyFrom(node.attr['strides']) new_node.attr["padding"].CopyFrom(node.attr['padding']) if input_node_map[new_node.input[0]].op.find("Requantize") != -1: bias_node = input_node_map[new_node.input[2]] last_node = input_node_map[new_node.input[0]] max_input_node = (input_node_map[last_node.input[4][:-2]]) min_input_node = (input_node_map[last_node.input[3][:-2]]) max_filter = input_node_map[new_node.input[6]] min_filter = input_node_map[new_node.input[5]] min_input = (min_input_node.attr['value'].tensor.float_val)[0] max_input = (max_input_node.attr['value'].tensor.float_val)[0] if "RequantizePerChannel" in [ node.op for node in output_node_map[node.name] ]: channel_size = max_filter.attr[ 'value'].tensor.tensor_shape.dim[0].size max_filter_tensor = tensor_util.MakeNdarray( max_filter.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter.attr['value'].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter.attr['value'].tensor.float_val)[0]) bias_tensor = (tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor)) bias_length = bias_tensor.shape[0] scales = [] for i in range(channel_size): scales.append(255.0 * 127.0 / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[0])) #(TODO) GPU not support qint32 bias tensor # float32 type should be removed after GPU support qint32 bias bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=float32_type \ if device =='gpu' else qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_tensor if device == 'gpu' else int32_bias, dtypes.float32 if device == 'gpu' else dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = float32_type \ if device == 'gpu' else qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type \ if device == 'gpu' else qint32_type)) skip_node_name.append(bias_node.name) output_graph_def.node.extend([bias_node]) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if "padding_list" in node.attr: new_node.attr["padding_list"].CopyFrom( node.attr['padding_list']) if "dilations" in node.attr: new_node.attr["dilations"].CopyFrom(node.attr['dilations']) new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) summand_op_type = uint8_type if dtypes.as_dtype( original_summand_node.attr["T"].type ) == uint8_type else int8_type if summand_op_type == int8_type: new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" new_node.attr["Tsummand"].CopyFrom( attr_value_pb2.AttrValue(type=summand_op_type)) output_graph_def.node.extend([new_node]) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def testSingleTensorFullTensorDebugModeWithCircularBufferBehavior(self): @def_function.function def write_debug_trace(x): # DebugIdentityV2 is a stateful op. It ought to be included by auto # control dependency. square = math_ops.square(x) gen_debug_ops.debug_identity_v2( square, tfdbg_context_id="deadbeaf", op_name="Square", output_slot=0, tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR, debug_urls=["file://%s" % self.dump_root]) sqrt = math_ops.sqrt(x) gen_debug_ops.debug_identity_v2( sqrt, tfdbg_context_id="beafdead", op_name="Sqrt", output_slot=0, tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR, debug_urls=["file://%s" % self.dump_root]) return square + sqrt x = np.array([3.0, 4.0]) # Only the graph-execution trace of the last iteration should be written # to self.dump_root. for _ in range(self.circular_buffer_size // 2 + 1): self.assertAllClose(write_debug_trace(x), [9.0 + np.sqrt(3.0), 16.0 + 2.0]) with debug_events_reader.DebugEventsReader(self.dump_root) as reader: metadata_iter = reader.metadata_iterator() # Check that the .metadata DebugEvents data file has been created, even # before FlushExecutionFiles() is called. debug_event = next(metadata_iter).debug_event self.assertGreater(debug_event.wall_time, 0) self.assertTrue(debug_event.debug_metadata.tensorflow_version) self.assertTrue( debug_event.debug_metadata.file_version.startswith( "debug.Event:")) graph_trace_iter = reader.graph_execution_traces_iterator() # Before FlushExecutionFiles() is called, the .graph_execution_traces file # ought to be empty. with self.assertRaises(StopIteration): next(graph_trace_iter) # Flush the circular buffer. self.writer.FlushExecutionFiles() graph_trace_iter = reader.graph_execution_traces_iterator() # The circular buffer has a size of 4. So only the data from the # last two iterations should have been written to self.dump_root. for _ in range(2): debug_event = next(graph_trace_iter).debug_event self.assertGreater(debug_event.wall_time, 0) trace = debug_event.graph_execution_trace self.assertEqual(trace.tfdbg_context_id, "deadbeaf") self.assertEqual(trace.op_name, "Square") self.assertEqual(trace.output_slot, 0) self.assertEqual(trace.tensor_debug_mode, debug_event_pb2.TensorDebugMode.FULL_TENSOR) tensor_value = tensor_util.MakeNdarray(trace.tensor_proto) self.assertAllClose(tensor_value, [9.0, 16.0]) debug_event = next(graph_trace_iter).debug_event self.assertGreater(debug_event.wall_time, 0) trace = debug_event.graph_execution_trace self.assertEqual(trace.tfdbg_context_id, "beafdead") self.assertEqual(trace.op_name, "Sqrt") self.assertEqual(trace.output_slot, 0) self.assertEqual(trace.tensor_debug_mode, debug_event_pb2.TensorDebugMode.FULL_TENSOR) tensor_value = tensor_util.MakeNdarray(trace.tensor_proto) self.assertAllClose(tensor_value, [np.sqrt(3.0), 2.0]) # Only the graph-execution trace of the last iteration should be written # to self.dump_root. with self.assertRaises(StopIteration): next(graph_trace_iter)
prnet_image_cropper.PreProcess(prnet_request, stub) prnet_image_cropper.Apply() next_request = prnet_image_cropper.PostProcess() elapsed_time = time.time() - start_time print('prnet_image_cropper time cost: {}'.format(elapsed_time)) start_time = time.time() prn = PRNet() prn.PreProcess(next_request, stub) prn.Apply() final_request = prn.PostProcess() elapsed_time = time.time() - start_time print('prnet time cost: {}'.format(elapsed_time)) start_time = time.time() kpt = tensor_util.MakeNdarray(final_request.inputs["prnet_output"]) vertices = tensor_util.MakeNdarray(final_request.inputs["vertices"]) print(vertices.shape) q.put(vertices) # show_img = plot_vertices(np.zeros_like(image), vertices) # show_img = image elapsed_time = time.time() - start_time print('plot vertices time cost: {}'.format(elapsed_time)) else: q.put(None) # Display the resulting frame # cv2.imshow('frame',show_img)
def parse_tflite_graph(tflite_g, opcodes_map, model, input_prefix='', tensor_shapes_override=None): """ Returns a Graph object along with some op count stats. All tflite op types are prefixed with "TFL_". Names of graph inputs are optionally prefixed with a string to prevent name conflicts in subgraphs. Quantizatized tensors are surrounded with quantize/dequantize ops """ op_cnt = collections.Counter() attr_cnt = collections.Counter() onnx_nodes = [] output_shapes = {} dtypes = {} tensor_names = {} if tensor_shapes_override is None: tensor_shapes_override = {} # Map tensor name to tflite Tensor object so we can fetch quantization info as needed name_to_tensor = {} # If a node takes a quantized tensor as input, we must add a dequantize op after it. # Store a mapping so we only need to make at most one dequantize op per tensor. tensor_name_to_dequant_output = {} # tflite uses generic names (arg0, arg1, etc.) for inputs but full names for other tensors, so # prefixing just the inputs should be fine. Other tensors are prefixed when we do inlining. input_indices = { tflite_g.Inputs(i) for i in range(tflite_g.InputsLength()) } for i in range(tflite_g.TensorsLength()): tensor = tflite_g.Tensors(i) name = tensor.Name().decode() if i in input_indices: name = input_prefix + name tensor_names[i] = name name_to_tensor[name] = tensor if name in tensor_shapes_override: output_shapes[name] = tensor_shapes_override[name] elif tensor.ShapeIsNone(): output_shapes[name] = None elif tensor.ShapeSignatureIsNone(): # The shape signature uses -1 to signify unknown dims. Old models don't have this and use Shape instead. output_shapes[name] = tensor.ShapeAsNumpy().tolist() else: output_shapes[name] = tensor.ShapeSignatureAsNumpy().tolist() buf = model.Buffers(tensor.Buffer()) dtypes[name] = map_tflite_dtype_to_onnx(tensor.Type()) if not buf.DataIsNone() and tensor.Buffer() > 0: # For const values we use TF to decode the binary data from the buffer t = tensor_pb2.TensorProto() t.tensor_content = buf.DataAsNumpy().tobytes() if output_shapes[name] is None: output_shapes[name] = [] for d in output_shapes[name]: t.tensor_shape.dim.add().size = d t.dtype = map_tflite_dtype_to_tf(tensor.Type()) if t.dtype == tf.string: onnx_tensor = parse_tflite_string_tensor( t.tensor_content, output_shapes[name]) else: np_data = tensor_util.MakeNdarray(t) onnx_tensor = numpy_helper.from_array(np_data, name=name) onnx_node = helper.make_node("Const", [], outputs=[name], name=name, value=onnx_tensor) onnx_nodes.append(onnx_node) op_cnt["Const"] += 1 def get_dequant(tensor_name): """Creates a dequantize op for the provided tensor if needed and returns the output of the op, or the original tensor name if no dequantization is needed""" quant = name_to_tensor[tensor_name].Quantization() if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone(): return tensor_name if tensor_name in tensor_name_to_dequant_output: return tensor_name_to_dequant_output[tensor_name] dequant_name = tensor_name + "_dequant" attr = get_quantization_attr(quant) onnx_node = helper.make_node("TFL_DEQUANTIZE", [tensor_name], [dequant_name], name=dequant_name, **attr) onnx_nodes.append(onnx_node) tensor_name_to_dequant_output[tensor_name] = dequant_name output_shapes[dequant_name] = output_shapes[tensor_name].copy() dtypes[dequant_name] = onnx_pb.TensorProto.FLOAT return dequant_name def get_prequant(tensor_name): """Called by nodes with the name of the tensor they must output. If the output is supposed to be quantized, creates a Quantize op outputting the tensor. Returns the name that should be used for the "prequantized" tensor, or the original tensor if no quantization is needed""" quant = name_to_tensor[tensor_name].Quantization() if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone(): return tensor_name prequant_name = tensor_name + "_prequant" quantize_name = tensor_name + "_quantize" attr = get_quantization_attr(quant) onnx_node = helper.make_node("TFL_QUANTIZE", [prequant_name], [tensor_name], name=quantize_name, **attr) onnx_nodes.append(onnx_node) output_shapes[prequant_name] = output_shapes[tensor_name].copy() dtypes[prequant_name] = onnx_pb.TensorProto.FLOAT return prequant_name for i in range(tflite_g.OperatorsLength()): op = tflite_g.Operators(i) optype = 'TFL_' + opcodes_map[op.OpcodeIndex()] op_cnt[optype] += 1 attr = {} options_type_name = lookup_enum(op.BuiltinOptionsType(), 'BuiltinOptions') option_class = get_options_class(options_type_name) wants_dequantized_input = True has_prequantized_output = True if optype == 'TFL_QUANTIZE': out_tensor = tflite_g.Tensors(op.Outputs(0)) quant = out_tensor.Quantization() has_prequantized_output = False if quant is not None and not quant.ScaleIsNone( ) and not quant.ZeroPointIsNone(): attr.update(get_quantization_attr(quant)) elif optype == 'TFL_DEQUANTIZE': in_tensor = tflite_g.Tensors(op.Inputs(0)) quant = in_tensor.Quantization() wants_dequantized_input = False if quant is not None and not quant.ScaleIsNone( ) and not quant.ZeroPointIsNone(): attr.update(get_quantization_attr(quant)) input_names = [ tensor_names[op.Inputs(i)] for i in range(op.InputsLength()) if op.Inputs(i) != -1 ] output_names = [ tensor_names[op.Outputs(i)] for i in range(op.OutputsLength()) if op.Outputs(i) != -1 ] if optype.startswith("TFL_Flex"): data = read_flexbuffer(op.CustomOptionsAsNumpy().tobytes(), decode_strings=False) utils.make_sure( isinstance(data, list), "Flex ops are expected to store data as a flexbuffer list") tf_op = data[0].decode("utf-8") tf_node_def = node_def_pb2.NodeDef() tf_node_def.ParseFromString(data[1]) input_tf_dtypes = [ map_tflite_dtype_to_tf(name_to_tensor[inp].Type()) for inp in input_names ] def shape_to_tf_shape(dims): return [None if d < 0 else d for d in dims] if dims is not None else None input_shapes = [ shape_to_tf_shape(output_shapes[inp]) for inp in input_names ] tf_attrs, _ = read_tf_node_def_attrs(tf_node_def, input_tf_dtypes, input_shapes) attr.update(tf_attrs) optype = tf_op elif not op.CustomOptionsIsNone(): custom_ops_format = lookup_enum(op.CustomOptionsFormat(), 'CustomOptionsFormat') if custom_ops_format == 'FLEXBUFFERS': data = None try: data = read_flexbuffer(op.CustomOptionsAsNumpy().tobytes()) except Exception as e: # pylint: disable=broad-except logger.warning( "Could not parse attributes for custom op '%s': %s", optype, e) if isinstance(data, dict): attr.update(data) if option_class is not None: options = option_class() options.Init(op.BuiltinOptions().Bytes, op.BuiltinOptions().Pos) # All flatbuffer objects have these properties. block_list = [ options_type_name + 'BufferHasIdentifier', 'Init', 'GetRootAs' + options_type_name ] # The rest of the properties of the options class provide its attribute names attr_names = { opt for opt in dir(options) if not opt.startswith('_') and opt not in block_list } for a in list(attr_names): # Flatbufffer list properties have 3 functions: *Length, *IsNone, and *AsNumpy if a + 'Length' in attr_names: attr_names.remove(a + 'Length') attr_names.remove(a + 'IsNone') attr_names.remove(a) for a in attr_names: if a.endswith('AsNumpy'): value = getattr(options, a)().tolist() a = a[:-len('AsNumpy')] else: # For enums we use a string with the value name, not enum index value = getattr(options, a)() if a in NODE_ATTR_NAME_TO_ENUM_TYPE: value = lookup_enum(value, NODE_ATTR_NAME_TO_ENUM_TYPE[a]) elif a in FUNCTION_ATTRS: value = model.Subgraphs(value).Name().decode() attr_cnt[a] += 1 attr[proper_to_snake_case(a)] = value if wants_dequantized_input: input_names = [get_dequant(inp) for inp in input_names] if optype == "TFL_TFLite_Detection_PostProcess": # There's a bug in tflite for the output shapes of this op for out, shape in zip(output_names, [[-1, -1, 4], [-1, -1], [-1, -1], [-1]]): if len(output_shapes[out]) != len(shape): output_shapes[out] = shape if all(output_shapes[out] == [] for out in output_names): # tflite uses [] to represent both scalars and completely unknown shapes # If an op has non-scalar inputs and all scalar outputs, it is very likely the shapes are actually unknown. inp_shapes = [output_shapes[inp] for inp in input_names] if not all(s == [] for s in inp_shapes): if any(s is None for s in inp_shapes) or not op_has_scalar_output( inp_shapes, optype, attr): for out in output_names: logger.warning( "Replacing scalar output shape of %s with unknown shape", out) output_shapes[out] = None if has_prequantized_output: output_names = [get_prequant(out) for out in output_names] onnx_node = helper.make_node(optype, input_names, output_names, name=output_names[0], **attr) onnx_nodes.append(onnx_node) inputs = [ tensor_names[tflite_g.Inputs(i)] for i in range(tflite_g.InputsLength()) ] outputs = [ tensor_names[tflite_g.Outputs(i)] for i in range(tflite_g.OutputsLength()) ] # TODO: Allow input/outputs to be overridden for inp in inputs: onnx_node = helper.make_node("Placeholder", [], outputs=[inp], name=inp) onnx_nodes.append(onnx_node) graph_name = (tflite_g.Name() or b'tflite graph').decode() return onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, inputs, outputs, graph_name
def tf_to_hls(yamlConfig): ###################### ## Do translation ###################### #This is a list of dictionaries to hold all the layer info we need to generate HLS layer_list = [] if not os.path.exists(yamlConfig['TensorFlowModel']): raise Exception('The specified file does not exist: {}'.format( yamlConfig['TensorFlowModel'])) graph_def = None graph = None #Extract model architecture from pb try: with tf.io.gfile.GFile(yamlConfig['TensorFlowModel'], "rb") as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) except BaseException as e: raise Exception('Error loading the graph definition: {}'.format( str(e))) try: assert graph_def is not None with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, input_map=None, return_elements=None, name='', producer_op_list=None) except BaseException as e: raise Exception('Error importing the graph: {}'.format(str(e))) #Define supported operations array_ops = ['ConcatV2', 'StridedSlice', 'Transpose'] core_ops = ['Const', 'Identity', 'Placeholder'] image_ops = ['ResizeNearestNeighbor'] math_ops = ['Add', 'MatMul', 'Mul', 'Sigmoid'] nn_ops = [ 'AvgPool', 'BiasAdd', 'Conv2D', 'Elu', 'FusedBatchNorm', 'MaxPool', 'Relu', 'Selu', 'Softmax' ] supported_ops = array_ops + core_ops + image_ops + math_ops + nn_ops input_layers = [] output_layers = _find_graph_outputs(graph) # Get input shape and check for unsupported layer type output_shape = None for tf_op in graph.get_operations(): if tf_op.type not in supported_ops: raise Exception('ERROR: Unsupported layer type: {}'.format( tf_op.type)) print('Topology:') for tf_op in graph.get_operations(): handled = False layer = {} layer['name'] = tf_op.name if tf_op.type == 'Placeholder': if len(tf_op.inputs) == 0: # Input output_shape = tf_op.outputs[0].shape.as_list() layer['class_name'] = 'InputLayer' layer['input_shape'] = output_shape[1:] #layer['outputs'] = [tf_op.outputs[0].name for o in tf_op.outputs] layer['outputs'] = _parse_tensor_names(tf_op.outputs) input_layers.append(layer['name']) handled = True elif tf_op.type == 'Const' or tf_op.type == 'Identity': # Nothing to do here, TFDataReader handles these handled = True continue elif tf_op.type == 'MatMul': input_shape = tf_op.inputs[0].shape.as_list() output_shape = tf_op.outputs[0].shape.as_list() layer['class_name'] = 'Dense' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) layer['n_in'] = input_shape[-1] layer['n_out'] = output_shape[-1] handled = True elif tf_op.type == 'BiasAdd': input_shape = tf_op.inputs[0].shape.as_list() output_shape = tf_op.outputs[0].shape.as_list() layer['class_name'] = 'BiasAdd' layer['op'] = 'Add' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) handled = True elif tf_op.type in ['Elu', 'Relu', 'Selu', 'Sigmoid', 'Softmax']: output_shape = tf_op.outputs[0].shape.as_list() layer['class_name'] = 'Activation' layer['activation'] = tf_op.type layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) handled = True elif tf_op.type == 'Conv2D': input_shape = tf_op.inputs[0].shape.as_list() weights_shape = tf_op.inputs[1].shape.as_list() output_shape = tf_op.outputs[0].shape.as_list() layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format( tf_op.get_attr('data_format').decode()) dilations = tf_op.get_attr('dilations') strides = tf_op.get_attr('strides') layer['class_name'] = 'Conv2D' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) layer['n_chan'] = input_shape[c_idx] layer['in_height'] = input_shape[h_idx] layer['in_width'] = input_shape[w_idx] # weights_shape = (filter_height, filter_width, n_channels, n_filters) layer['filt_height'] = weights_shape[0] layer['filt_width'] = weights_shape[1] layer['n_chan'] = weights_shape[2] layer['n_filt'] = weights_shape[3] layer['stride_height'] = strides[h_idx] layer['stride_width'] = strides[w_idx] layer['dilation_height'] = dilations[h_idx] layer['dilation_width'] = dilations[w_idx] layer['padding'] = tf_op.get_attr('padding').decode().lower() in_height = input_shape[h_idx] in_width = input_shape[w_idx] _compute_pads_2d(layer, in_height, in_width) handled = True elif tf_op.type == 'MaxPool': input_shape = tf_op.inputs[0].shape.as_list() output_shape = tf_op.outputs[0].shape.as_list() layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format( tf_op.get_attr('data_format').decode()) strides = tf_op.get_attr('strides') kernel_size = tf_op.get_attr('ksize') layer['class_name'] = 'MaxPooling2D' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) layer['padding'] = tf_op.get_attr('padding').decode().lower() layer['in_height'] = input_shape[h_idx] layer['in_width'] = input_shape[w_idx] layer['n_filt'] = input_shape[c_idx] layer['stride_height'] = strides[h_idx] layer['stride_width'] = strides[w_idx] layer['filt_height'] = layer['pool_height'] = kernel_size[h_idx] layer['filt_width'] = layer['pool_width'] = kernel_size[w_idx] layer['padding'] = tf_op.get_attr('padding').decode().lower() in_height = input_shape[h_idx] in_width = input_shape[w_idx] _compute_pads_2d(layer, in_height, in_width) handled = True elif tf_op.type == 'FusedBatchNorm': input_shape = tf_op.inputs[0].shape.as_list() output_shape = tf_op.outputs[0].shape.as_list() layer['class_name'] = 'BatchNormalization' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format( tf_op.get_attr('data_format').decode()) layer['n_in'] = np.prod(input_shape[1:]) layer['epsilon'] = tf_op.get_attr('epsilon') if len(input_shape) < 4: layer['n_filt'] = -1 else: layer['n_filt'] = input_shape[c_idx] handled = True elif tf_op.type == 'ConcatV2': layer['class_name'] = 'Concatenate' layer['inputs'] = _parse_tensor_names(tf_op.inputs[:-1]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) output_shape = tf_op.outputs[0].shape.as_list() rank = tf_op.get_attr('N') if rank != 2: raise Exception( 'Unsupported number of inputs in Concat operation') layer['op'] = layer['class_name'].lower() + '{}d'.format(rank) layer['axis'] = tf_op.inputs[2].op.node_def.attr[ 'value'].tensor.int_val[0] # Urgh! handled = True elif tf_op.type in ['Add', 'Mul']: layer['class_name'] = 'Merge' layer['inputs'] = _parse_tensor_names(list(tf_op.inputs)) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) output_shape = tf_op.outputs[0].shape.as_list() layer['op'] = tf_op.type.lower() if layer['op'] == 'mul': layer['op'] = 'multiply' handled = True elif tf_op.type == 'Transpose': layer['class_name'] = 'Transpose' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) layer['perm'] = tensor_util.MakeNdarray( tf_op.inputs[1].op.node_def.attr['value'].tensor).tolist() output_shape = tf_op.outputs[0].shape.as_list() handled = True elif tf_op.type == 'ResizeNearestNeighbor': layer['class_name'] = 'Resize' layer['algorithm'] = 'nearest' layer['inputs'] = _parse_tensor_names(tf_op.inputs[0]) layer['outputs'] = _parse_tensor_names(tf_op.outputs[0]) input_shape = tf_op.inputs[0].shape.as_list() # (B, H, W, C) output_shape = tf_op.outputs[0].shape.as_list() layer['height'] = input_shape[1] layer['width'] = input_shape[2] layer['n_chan'] = input_shape[3] layer['new_height'] = output_shape[1] layer['new_width'] = output_shape[2] # Check for currently unsupported operations align_corners = tf_op.get_attr('align_corners') if align_corners: raise NotImplementedError( 'Property "align_corners=True" is not supported.') half_pixel_centers = tf_op.get_attr('align_corners') if half_pixel_centers: raise NotImplementedError( 'Property "half_pixel_centers=True" is not supported.') handled = True if not handled: raise Exception('Unable to parse operation: {} - {}'.format( tf_op.type, tf_op.name)) print('Layer name: {}, layer type: {}, current shape: {}'.format( layer['name'], layer['class_name'], output_shape)) layer_list.append(layer) ################# ## Generate HLS ################# reader = TFDataReader(graph) print('Creating HLS model') hls_model = HLSModel(yamlConfig, reader, layer_list, input_layers, output_layers) optimizers = [ 'eliminate_linear_activation', 'merge_batch_norm_quantized_tanh', 'quantize_dense_output', 'fuse_biasadd', 'fuse_dense_batch_norm' ] optimize_model(hls_model, optimizers) return hls_model
def apply_matmul_biasadd_relu_fusion(self, match_node_name): skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] control_inputs, normal_inputs = self._get_node_input( matched_node.node.name) weight_name = normal_inputs[1] weight_node = self.node_name_mapping[helper.node_name_from_input( weight_name)].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. if weight_node.op != 'Const': self.output_graph = self.input_graph return [] weights_content = tensor_util.MakeNdarray( weight_node.attr['value'].tensor) if np.any(np.isnan(weights_content)): self.output_graph = self.input_graph return [] for i in self.node_name_mapping: if weight_node.name in self.node_name_mapping[i].output: self.output_graph = self.input_graph return [] q_weights_name, q_weights_min_name, q_weights_max_name = \ self._intel_cpu_quantize_weight_eightbit( matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) skip_node_name.append(weight_name) for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: pass elif node.name == match_node_name[0]: self.logger.debug("matched node {} with input {}".format( node.name, node.input)) self.logger.debug("apply_matmul_biasadd_relu_fusion") quantized_node_name = node.name + "_eightbit_quantized_mat_mul" bias_node_name = self.node_name_mapping[ match_node_name[1]].node.input[1] relu_node_name = match_node_name[2] all_input_names = self._add_eightbit_prologue_nodes( matched_node.node.name) all_input_names = all_input_names[:1] + [ q_weights_name ] + all_input_names[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) quantized_node_input_names = all_input_names[:2] + [ bias_node_name ] + all_input_names[2:] + control_inputs quantized_matmul_node = helper.create_node( "QuantizedMatMulWithBiasAndRelu", quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) helper.set_attr_dtype(quantized_matmul_node, "T1", dtypes.quint8) helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Toutput", dtypes.qint32) helper.set_attr_string( quantized_matmul_node, 'input_quant_mode', b'MIN_FIRST' if self.is_asymmetric else b'SCALED') self.add_output_graph_node(quantized_matmul_node) quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.quint8, False) self._intel_cpu_add_dequantize_result_node( quantize_down_name, relu_node_name) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) self.add_output_graph_node(new_node) return match_node_name