def print_operation_details(tf_op): """ print_operation_details, shows operation attributes, inputs and outputs details Prints out all the attributes of an operation as well as the size and types all any input and output tensors """ inputs = [] for op_input in tf_op.inputs: if op_input.op.type == "Identity": inputs += [op_input.op.inputs[0]] else: inputs += [op_input] # Brutally hacky way of getting the list of attributes # from a tensorflow.core.framework.node_def_pb2.NodeDef lines = str(tf_op.node_def).split("\n") attr_keys = [] for l in lines: if l.startswith(" key: \""): key = l[8:100].replace("\"", "") attr_keys += [key] print("Attr keys are : " + str(attr_keys)) print("Details of operation \"%s\" " "type [%s] -------------------" % (tf_op.name, tf_op.type)) if len(attr_keys) > 0: print("Attributes:") for key in attr_keys: value = tf_op.get_attr(key) print(" \"%s\"\t\ttype(%s)\t\tvalue(%s)" % (key, str(type(value)), str(value))) print("%d inputs:" % len(inputs)) for idx, input in enumerate(inputs): input_parent_op = tf_utils.get_parent_of_tensor(input) print( " [%2d] \"%s\" %s rank(%d) %s : source op (\"%s\" - %s)" % (idx, input.name, code_gen.get_c_dtype(input.dtype.base_dtype), len(tf_utils.np_tensor_shape(input)), tf_utils.np_tensor_shape(input), input_parent_op.name, input_parent_op.type)) print("%d outputs:" % len(tf_op.outputs)) for idx, output in enumerate(tf_op.outputs): print(" [%2d] \"%s\" %s rank(%d) %s" % (idx, output.name, code_gen.get_c_dtype(output.dtype.base_dtype), len(tf_utils.np_tensor_shape(output)), tf_utils.np_tensor_shape(output))) print("--------------------------------------------------")
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) # calculate the number of elements in the input tensor input_shape = tf_utils.np_tensor_shape(inputs[0]) element_count = 1 for dim in input_shape: element_count *= dim # generate code to define the output tensor code = cpp_gen.CodeBlock() code.add_statement( cpp_gen.Statement( base_op.BaseOpKernel.output_assignment(tf_op, eval=True, assignment=False))) # generate a loop to perform a hyperbolic tan on each element, placing the result in the output tensor for_loop = cpp_gen.LoopStatement("for", "int i=0; i<%d; ++i" % element_count) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = std::tanh(((%s*)%s.data())[i])" % (type, output_identifier, type, input0_identifier))) code.add_statement(for_loop) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) code = "%s %s.cwiseMax((%s)0);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, type) return code
def write_numpy_array_c(file, identifier, array, data=True): c_data_type = code_gen.get_c_dtype(array.dtype) type = code_gen.get_c_dtype(array.dtype) is_float = type == "float" or type == "double" or type == "long double" if not is_float: declaration = "%s %s" % (c_data_type, identifier) for dim in range(array.ndim): declaration = ("%s[%d]" % (declaration, array.shape[dim])) if data: declaration = ("%s = " % declaration) indent = " " * len(declaration) file.write(declaration) write_numpy_array_data(file, array, indent, type) file.write(";\n\n") else: file.write(declaration + ";\n\n") # if the type of this numpy ndarray was float or double then also transcode this to hex and write it if is_float: array_flat = array.reshape(np.prod(array.shape)) file.write("\n\nunsigned int %sHex[%d]" % (identifier, array_flat.shape[0])) if data: file.write(" = { ") for i in range(array_flat.shape[0]): int_of_float = struct.unpack('<I', struct.pack('<f', array_flat[i])) file.write("0x%X" % int_of_float) if i != array_flat.shape[0] - 1: file.write(", ") file.write("};\n\n") else: file.write(";\n\n")
def gen_code(cls, tf_op, inputs): output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # if the second argument is a scalar tensor input1_shape = tf_utils.np_tensor_shape(inputs[1]) if len(input1_shape) == 0 or (len(input1_shape) == 1 and input1_shape[0] == 1): input0_shape = tf_utils.np_tensor_shape(inputs[0]) input0_size = np.prod(input0_shape) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code = cpp_gen.CodeBlock() target = "%s" % base_op.BaseOpKernel.output_assignment( tf_op, True, assignment=False) code.add_statement( cpp_gen.Statement(target.replace(";", "").replace('\n', ''))) # determine the type of expression to use. Either a division by the value of # a rank zero tensor, a division by a constant or a shift by a constant # in the case of power of two denominators if inputs[1].op.type == 'Const': const_value = tf_utils.get_const_scalar(inputs[1].op) if math.log2(const_value).is_integer(): expression = ">> %d" % int(math.log2(const_value)) else: expression = "/ (%s)%f" % (type, const_value) else: expression = "/ %s(0)" % input1_identifier for_loop = cpp_gen.LoopStatement( "for", "int i=0; i<%d; ++i" % input0_size) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = ((%s*)%s.data())[i] %s" % (type, output_identifier, type, input0_identifier, expression))) code.add_statement(for_loop) else: code = "%s %s / %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.get_attr("DstT")) code = "%s %s.cast<%s>();" % \ (base_op.BaseOpKernel.output_assignment(tf_op, eval=False), input_identifier, type) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) dtype_string = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code ="%s (%s)1.0 / ((%s)1.0 + ((%s)0.0 - %s).exp())" %\ (base_op.BaseOpKernel.output_assignment(tf_op, True), dtype_string, dtype_string, dtype_string, input0_identifier) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) alpha = tf_op.get_attr("alpha") input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) code = "%s %s.cwiseMax(%s * (%s)%f);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, input_identifier, type, alpha) return code
def gen_code(cls, tf_op, inputs): type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) param_a_is_scalar = (inputs[0].shape.ndims == 0 or (inputs[0].shape.ndims == 1 and inputs[0].shape.dims[0] == 1)) param_b_is_scalar = (inputs[1].shape.ndims == 0 or (inputs[1].shape.ndims == 1 and inputs[1].shape.dims[1] == 1)) param_a_is_const = tf_utils.operation_is_constant(inputs[0].op) param_b_is_const = tf_utils.operation_is_constant(inputs[1].op) # if one of the inputs is a constant scalar then implement form 2 if param_a_is_const and param_a_is_scalar: tensor_identifier = code_gen.c_safe_identifier(inputs[1].name) const_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(tf_op.inputs[0])) return "%s %s * (%s)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), tensor_identifier, type, str(const_value)) if param_b_is_const and param_b_is_scalar: tensor_identifier = code_gen.c_safe_identifier(inputs[0].name) const_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(tf_op.inputs[1])) return "%s %s * (%s)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), tensor_identifier, type, str(const_value)) # if both inputs are either tensors or not constants then generate form 1 input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) code = "%s %s * %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, eval), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) six_constant = 6 # if this is operating on quantised data then the # six_constant will need multplying by the correct power of 2. code = "%s %s.cwiseMax((%s)0).cwiseMin((%s)%d);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, type, type, six_constant) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) input_shape = tf_utils.np_tensor_shape(inputs[0]) code = cpp_gen.CodeBlock() assignment = base_op.BaseOpKernel.output_assignment(tf_op, True, assignment=False) if assignment[-1] == ';': assignment = assignment[:-1] assignment = assignment.replace('\n', '') code.add_statement(cpp_gen.Statement(str(assignment))) code.add_statement( cpp_gen.Statement("%s %s_max = std::numeric_limits<%s>::min()" % (type, identifier, type))) code.add_statement(cpp_gen.Statement("%s(0) = 0" % identifier)) if_statement = cpp_gen.IfStatement( "%s(%s_it) > %s_max" % (input0_identifier, identifier, identifier)) if_statement.if_code.add_statement( cpp_gen.Statement("%s_max = %s(%s_it)" % (identifier, input0_identifier, identifier))) if_statement.if_code.add_statement( cpp_gen.Statement("%s(0) = %s_it" % (identifier, identifier))) for_loop = cpp_gen.LoopStatement( "for", "long %s_it=0; %s_it<%d; ++%s_it" % (identifier, identifier, input_shape[0], identifier)) for_loop.code.add_statement(if_statement) code.add_statement(for_loop) return code
def add_verification_to_class(self, class_obj, constructor): if self.validation_type == 'Full': for op in self.list_operations: for out in op.outputs: identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) if len(shape) == 0: shape = [1] type = code_gen.get_c_dtype(out.dtype) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(len(shape))) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( identifier + "_val", tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) lit_suffix = "" if type == "float" or type == "double" or type == "long double": lit_suffix = "Hex" literal_identifier = (class_obj.identifier + "Weights::" + identifier + "VerificationData" + lit_suffix) constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (identifier + "_val", type, literal_identifier, code_gen.ndarray_1d_to_literal( shape, open='', close='')) ]
def add_weights_to_class(self, class_obj, constructor): # Add stored tensors to properties and constructor initialiser list for t in self.list_training_tensors: type = code_gen.get_c_dtype(t.dtype.base_dtype) rank = max(1, len(tf_utils.np_tensor_shape(t))) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(rank)) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( code_gen.c_safe_identifier(t.name), tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) # For now just use literal values, TODO add option to load weights from file as well literal_name = class_obj.identifier + "Weights::" + \ code_gen.c_safe_identifier(t.name) + "Flat" if type == "float" or type == "double" or type == "long double": literal_name += "Hex" shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t), open='', close='') # convert rank zero tensor to rank 1 for eigen if shape == ' ': shape = ' 1 ' constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (code_gen.c_safe_identifier(t.name), type, literal_name, shape) ]
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code = "\nauto %sExp = %s.exp();" % \ (input0_identifier, input0_identifier) code += "\nEigen::Tensor<%s, 0, %s> %sExpSum = %sExp.sum();" % \ (type, base_op.BaseOpKernel.data_layout, input0_identifier, input0_identifier) code += "%s %sExp / %sExp.constant(%sExpSum(0));" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input0_identifier, input0_identifier) return code
def gen_code(cls, tf_op, inputs): # output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) # print("Fill operation looks like this. . .") # super().print_operation_details(tf_op) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) constant_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[1])) code = cpp_gen.CodeBlock() code.add_statement( cpp_gen.Statement( base_op.BaseOpKernel.output_assignment(tf_op, eval=True, assignment=False))) code.add_statement( cpp_gen.Statement("%s.setConstant((%s)%f)" % (output_identifier, type, constant_value))) return code
def add_parameters_to_methods(self, eval_method, validate_method, timing_method, class_name): parameter_comment = "Input tensors\n" for i, input_placeholder in enumerate(self.list_input_placeholders): type = code_gen.get_c_dtype( input_placeholder.outputs[0].dtype.base_dtype) identifier = code_gen.c_safe_identifier( input_placeholder.outputs[0].name) shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0]) if len(shape) == 0: shape = [1] parameter_comment += "[%s] %s %s\n" % ( type, identifier, str(input_placeholder.outputs[0].shape[1:])) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \ "<%s, %d, %s>> %s(%s,%s)" % \ (type, len(shape), "Eigen::"+self.data_layout, identifier, identifier+"Param", code_gen.ndarray_1d_to_literal(shape, open='', close='')) val_data_identifier = (class_name + "Weights::" + identifier + "VerificationDataHex") val_tensor_map = ( "Eigen::TensorMap<Eigen::Tensor" "<%s, %d, %s>> %s((%s*)%s,%s)" % (type, len(shape), "Eigen::" + self.data_layout, identifier, type, val_data_identifier, code_gen.ndarray_1d_to_literal(shape, open='', close=''))) comment = None if i == 0: comment = cpp_gen.Comment("Creating TensorMaps of inputs") eval_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) timing_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) validate_method.code_block.add_statement( cpp_gen.Statement(val_tensor_map, comment)) parameter_comment += "Output tensors\n" for out in self.output_tensors: type = code_gen.get_c_dtype(out.dtype) identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) parameter_comment += "[%s] %s [%s]\n" % \ (type, identifier, code_gen.ndarray_1d_to_literal(shape, open='', close='')) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) # create buffers to hold final output tensors in the validate method which doesn't actually # return anything to the calling process dummy_param = "%s %s[%d]" % (type, identifier + "Param", np.prod(shape)) dummy_param_comment = cpp_gen.Comment("Dummy parameter for output") validate_method.code_block.add_statement( cpp_gen.Statement(dummy_param, dummy_param_comment)) # Tag this tensor as an output so that operation kernels will # map the output to the given function parameter instead of a block in the memory map. # out.tfmin_is_output = True if out.op.type == 'Identity': out = out.op.inputs[0] out.tfmin_output_identifier = identifier + "Param" timing_method.parameter_list.add( cpp_gen.Parameter('print', cpp_gen.TypeDefinition('bool'), default='true')) eval_method.comment.text += parameter_comment timing_method.comment.text += parameter_comment
def output_assignment(tf_op, eval=True, idx=0, assignment=True): """ Words.""" identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name) type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype) rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx])) shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx]) shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='') # -- special case -- # if the result of this operation is a model output then # create a tensor map to the output buffer if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'): code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \ (type, rank, BaseOpKernel.data_layout) code += " %s((%s*)%s, %s);" % \ (identifier, type, tf_op.outputs[idx].tfmin_output_identifier, shape) if assignment: code += "\n%s = " % identifier return code # if this operation needs to be concrete or all ops are being evaluated if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed: eval = True # if evaluate is true then create a concrete tensor or # map of the operations result if eval: if BaseOpKernel.use_memory_map: precalculated_offset = None if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'): precalculated_offset = tf_op.outputs[ idx]._tfmin_memory_offset tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \ (type, precalculated_offset) # if no precalculated_offset was found then assume it is # safe to use the memory space of the input to this operation. # NOTE this will be safe is most cases but this may well explode # in some rare cases!! I apologise in advance if this has just # happened to you. if precalculated_offset is None: input = tf_op.inputs[0] if input.op.type == "Identity": input = input.op.inputs[0] tensor_map_pointer = "%s.data()" % \ code_gen.c_safe_identifier(input.name) code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % (type, rank, BaseOpKernel.data_layout)) code += " %s(%s, %s);" % \ (identifier, tensor_map_pointer, shape) else: code = "\nEigen::Tensor<%s, %d, %s> %s =" % \ (type, rank, data_layout, identifier) if assignment: code += "\n%s.device(d) =" % identifier return code # if this operation is not being evaluated then create # an auto type so that the Eigen library produces a evaluator # object instead of a concrete tensor. else: code = "\nauto %s = " % identifier return code