def gen_code(cls, tf_op, inputs): # generate source information used to generate MatMul statement input0_statement = code_gen.c_safe_identifier(inputs[0].name) input1_statement = code_gen.c_safe_identifier(inputs[1].name) input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) # if the inputs include vectors then reshape them to rank 2 reshaped = False if len(input0_shape) == 1: input0_statement += ".reshape(Eigen::array<int,2>({1,%d}))" % input0_shape[ 0] reshaped = True if len(input1_shape) == 1: input1_statement += ".reshape(Eigen::array<int,2>({%d,1}))" % input1_shape[ 1] reshaped = True final_reshape = "" if reshaped: output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) final_reshape = ".reshape(Eigen::array<int,1>({%d}))" % output_shape[ 0] code = "%s %s.contract(%s, matMulDims)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_statement, input1_statement, final_reshape) return code
def print_operation_details(tf_op): """ print_operation_details, shows operation attributes, inputs and outputs details Prints out all the attributes of an operation as well as the size and types all any input and output tensors """ inputs = [] for op_input in tf_op.inputs: if op_input.op.type == "Identity": inputs += [op_input.op.inputs[0]] else: inputs += [op_input] # Brutally hacky way of getting the list of attributes # from a tensorflow.core.framework.node_def_pb2.NodeDef lines = str(tf_op.node_def).split("\n") attr_keys = [] for l in lines: if l.startswith(" key: \""): key = l[8:100].replace("\"", "") attr_keys += [key] print("Attr keys are : " + str(attr_keys)) print("Details of operation \"%s\" " "type [%s] -------------------" % (tf_op.name, tf_op.type)) if len(attr_keys) > 0: print("Attributes:") for key in attr_keys: value = tf_op.get_attr(key) print(" \"%s\"\t\ttype(%s)\t\tvalue(%s)" % (key, str(type(value)), str(value))) print("%d inputs:" % len(inputs)) for idx, input in enumerate(inputs): input_parent_op = tf_utils.get_parent_of_tensor(input) print( " [%2d] \"%s\" %s rank(%d) %s : source op (\"%s\" - %s)" % (idx, input.name, code_gen.get_c_dtype(input.dtype.base_dtype), len(tf_utils.np_tensor_shape(input)), tf_utils.np_tensor_shape(input), input_parent_op.name, input_parent_op.type)) print("%d outputs:" % len(tf_op.outputs)) for idx, output in enumerate(tf_op.outputs): print(" [%2d] \"%s\" %s rank(%d) %s" % (idx, output.name, code_gen.get_c_dtype(output.dtype.base_dtype), len(tf_utils.np_tensor_shape(output)), tf_utils.np_tensor_shape(output))) print("--------------------------------------------------")
def gen_code(cls, tf_op, inputs): output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # if the second argument is a scalar tensor input1_shape = tf_utils.np_tensor_shape(inputs[1]) if len(input1_shape) == 0 or (len(input1_shape) == 1 and input1_shape[0] == 1): input0_shape = tf_utils.np_tensor_shape(inputs[0]) input0_size = np.prod(input0_shape) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code = cpp_gen.CodeBlock() target = "%s" % base_op.BaseOpKernel.output_assignment( tf_op, True, assignment=False) code.add_statement( cpp_gen.Statement(target.replace(";", "").replace('\n', ''))) # determine the type of expression to use. Either a division by the value of # a rank zero tensor, a division by a constant or a shift by a constant # in the case of power of two denominators if inputs[1].op.type == 'Const': const_value = tf_utils.get_const_scalar(inputs[1].op) if math.log2(const_value).is_integer(): expression = ">> %d" % int(math.log2(const_value)) else: expression = "/ (%s)%f" % (type, const_value) else: expression = "/ %s(0)" % input1_identifier for_loop = cpp_gen.LoopStatement( "for", "int i=0; i<%d; ++i" % input0_size) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = ((%s*)%s.data())[i] %s" % (type, output_identifier, type, input0_identifier, expression))) code.add_statement(for_loop) else: code = "%s %s / %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) # calculate the number of elements in the input tensor input_shape = tf_utils.np_tensor_shape(inputs[0]) element_count = 1 for dim in input_shape: element_count *= dim # generate code to define the output tensor code = cpp_gen.CodeBlock() code.add_statement( cpp_gen.Statement( base_op.BaseOpKernel.output_assignment(tf_op, eval=True, assignment=False))) # generate a loop to perform a hyperbolic tan on each element, placing the result in the output tensor for_loop = cpp_gen.LoopStatement("for", "int i=0; i<%d; ++i" % element_count) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = std::tanh(((%s*)%s.data())[i])" % (type, output_identifier, type, input0_identifier))) code.add_statement(for_loop) return code
def add_weights_to_class(self, class_obj, constructor): # Add stored tensors to properties and constructor initialiser list for t in self.list_training_tensors: type = code_gen.get_c_dtype(t.dtype.base_dtype) rank = max(1, len(tf_utils.np_tensor_shape(t))) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(rank)) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( code_gen.c_safe_identifier(t.name), tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) # For now just use literal values, TODO add option to load weights from file as well literal_name = class_obj.identifier + "Weights::" + \ code_gen.c_safe_identifier(t.name) + "Flat" if type == "float" or type == "double" or type == "long double": literal_name += "Hex" shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t), open='', close='') # convert rank zero tensor to rank 1 for eigen if shape == ' ': shape = ' 1 ' constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (code_gen.c_safe_identifier(t.name), type, literal_name, shape) ]
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # If the bias tensor needs to be cast into the same time as the input bias_cast = "" # if the bias tensor needs to be broadcast into the same shape as the input bias_broadcast = "" input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) shapes_match = False if len(input0_shape) == len(input1_shape): shapes_match = True for i in range(len(input0_shape)): if input0_shape[i] != input1_shape[i]: shapes_match = False if not shapes_match: broadcast_shape = tf_utils.np_tensor_shape(inputs[0]) broadcast_shape[len(broadcast_shape) - 1] = 1 reshape_shape = np.array(([1] * (len(broadcast_shape) - 1)) + [input1_shape[0]]) bias_broadcast = "\n .reshape(Eigen::array<int, %d>(%s))" % \ (len(reshape_shape), code_gen.ndarray_1d_to_literal(reshape_shape)) bias_broadcast += "\n .broadcast(Eigen::array<int, %d>(%s))" % \ (len(broadcast_shape), code_gen.ndarray_1d_to_literal(broadcast_shape)) code = "%s %s + %s%s%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, False), input0_identifier, input1_identifier, bias_cast, bias_broadcast) return code
def gen_code(cls, tf_op, inputs): output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) input_identifier = code_gen.c_safe_identifier(inputs[0].name) code = "%s %s.reshape(Eigen::array<int, %d>(%s));" % \ (base_op.BaseOpKernel.output_assignment( tf_op, base_op.BaseOpKernel.evaluate_all ), input_identifier, len(output_shape), code_gen.ndarray_1d_to_literal(output_shape)) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) input_shape = tf_utils.np_tensor_shape(inputs[0]) code = cpp_gen.CodeBlock() assignment = base_op.BaseOpKernel.output_assignment(tf_op, True, assignment=False) if assignment[-1] == ';': assignment = assignment[:-1] assignment = assignment.replace('\n', '') code.add_statement(cpp_gen.Statement(str(assignment))) code.add_statement( cpp_gen.Statement("%s %s_max = std::numeric_limits<%s>::min()" % (type, identifier, type))) code.add_statement(cpp_gen.Statement("%s(0) = 0" % identifier)) if_statement = cpp_gen.IfStatement( "%s(%s_it) > %s_max" % (input0_identifier, identifier, identifier)) if_statement.if_code.add_statement( cpp_gen.Statement("%s_max = %s(%s_it)" % (identifier, input0_identifier, identifier))) if_statement.if_code.add_statement( cpp_gen.Statement("%s(0) = %s_it" % (identifier, identifier))) for_loop = cpp_gen.LoopStatement( "for", "long %s_it=0; %s_it<%d; ++%s_it" % (identifier, identifier, input_shape[0], identifier)) for_loop.code.add_statement(if_statement) code.add_statement(for_loop) return code
def add_verification_to_class(self, class_obj, constructor): if self.validation_type == 'Full': for op in self.list_operations: for out in op.outputs: identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) if len(shape) == 0: shape = [1] type = code_gen.get_c_dtype(out.dtype) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(len(shape))) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( identifier + "_val", tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) lit_suffix = "" if type == "float" or type == "double" or type == "long double": lit_suffix = "Hex" literal_identifier = (class_obj.identifier + "Weights::" + identifier + "VerificationData" + lit_suffix) constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (identifier + "_val", type, literal_identifier, code_gen.ndarray_1d_to_literal( shape, open='', close='')) ]
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # if the second argument is a scalar tensor input1_shape = tf_utils.np_tensor_shape(inputs[1]) if len(input1_shape) == 0 or (len(input1_shape) == 1 and input1_shape[0] == 1): code = "%s %s / %s.constant(%s(0));" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input0_identifier, input1_identifier) else: code = "%s %s / %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) axis = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[2])) # if there is an undefined batch dimension that has been collapsed # reduce the axis index by 1 reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0])) if reduced_rank != tf_op.outputs[0].shape.ndims: axis -= (tf_op.outputs[0].shape.ndims - reduced_rank) code = "%s %s.concatenate(%s, %d);" % \ (base_op.BaseOpKernel.output_assignment(tf_op), input0_identifier, input1_identifier, axis) return code
def add_parameters_to_methods(self, eval_method, validate_method, timing_method, class_name): parameter_comment = "Input tensors\n" for i, input_placeholder in enumerate(self.list_input_placeholders): type = code_gen.get_c_dtype( input_placeholder.outputs[0].dtype.base_dtype) identifier = code_gen.c_safe_identifier( input_placeholder.outputs[0].name) shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0]) if len(shape) == 0: shape = [1] parameter_comment += "[%s] %s %s\n" % ( type, identifier, str(input_placeholder.outputs[0].shape[1:])) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \ "<%s, %d, %s>> %s(%s,%s)" % \ (type, len(shape), "Eigen::"+self.data_layout, identifier, identifier+"Param", code_gen.ndarray_1d_to_literal(shape, open='', close='')) val_data_identifier = (class_name + "Weights::" + identifier + "VerificationDataHex") val_tensor_map = ( "Eigen::TensorMap<Eigen::Tensor" "<%s, %d, %s>> %s((%s*)%s,%s)" % (type, len(shape), "Eigen::" + self.data_layout, identifier, type, val_data_identifier, code_gen.ndarray_1d_to_literal(shape, open='', close=''))) comment = None if i == 0: comment = cpp_gen.Comment("Creating TensorMaps of inputs") eval_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) timing_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) validate_method.code_block.add_statement( cpp_gen.Statement(val_tensor_map, comment)) parameter_comment += "Output tensors\n" for out in self.output_tensors: type = code_gen.get_c_dtype(out.dtype) identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) parameter_comment += "[%s] %s [%s]\n" % \ (type, identifier, code_gen.ndarray_1d_to_literal(shape, open='', close='')) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) # create buffers to hold final output tensors in the validate method which doesn't actually # return anything to the calling process dummy_param = "%s %s[%d]" % (type, identifier + "Param", np.prod(shape)) dummy_param_comment = cpp_gen.Comment("Dummy parameter for output") validate_method.code_block.add_statement( cpp_gen.Statement(dummy_param, dummy_param_comment)) # Tag this tensor as an output so that operation kernels will # map the output to the given function parameter instead of a block in the memory map. # out.tfmin_is_output = True if out.op.type == 'Identity': out = out.op.inputs[0] out.tfmin_output_identifier = identifier + "Param" timing_method.parameter_list.add( cpp_gen.Parameter('print', cpp_gen.TypeDefinition('bool'), default='true')) eval_method.comment.text += parameter_comment timing_method.comment.text += parameter_comment
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # If the input tensor sizes match then this is a simple elementwise addition # however if one of th tensors is smaller than the other then it will attempt to # `broadcast' the smaller tensor upto the size of the larger one input0_expression = input0_identifier input1_expression = input1_identifier input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) if not np.array_equal(input0_shape, input1_shape): # print("Broadcasting needed in Add operation!") # print("Old input_0 (%s) input_1 (%s)" % # (input0_shape, input1_shape)) smaller = None # if one shape has lower rank than the other then pad the smaller rank # with size 1 dimensions if input1_shape.size < input0_shape.size: smaller = 1 padding = np.ones(int(input0_shape.size - input1_shape.size), np.int) input1_shape = np.concatenate((padding, input1_shape)) input1_expression += ".reshape(Eigen::array<int, %d>(%s))" % \ (input1_shape.size, code_gen.ndarray_1d_to_literal(input1_shape)) elif input0_shape.size < input1_shape.size: smaller = 0 padding = np.ones(int(input1_shape.size - input0_shape.size), np.int) input0_shape = np.concatenate((padding, input0_shape)) input0_expression += ".reshape(Eigen::array<int, %d>(%s))" % \ (input0_shape.size, code_gen.ndarray_1d_to_literal(input0_shape)) # print("New input_0 (%s) input_1 (%s)" % # (input0_shape, input1_shape)) broadcast_multiplier = np.ones(input1_shape.size, dtype=np.int) for d in range(input0_shape.size): if input0_shape[d] != input1_shape[d]: # check error cases where dimensions are not universally smaller on one side if (smaller == 0 and input0_shape[d] > input1_shape[d]) or\ (smaller == 1 and input1_shape[d] > input0_shape[d]): print( "Error: Add operation with non-broadcastable sized input tensors!" ) return "// Error generating Add operation, non-broadcastable sized input tensors." # check error case where dimenions are not equal or one of them is 1 if (input0_shape[d] < input1_shape[d] and input0_shape[d] != 1) or \ (input1_shape[d] < input0_shape[d] and input1_shape[d] != 1): print( "Error: Add operation with non-broadcastable sized input tensors!" ) return "// Error generating Add operation, non-broadcastable sized input tensors." # check if this dimension defines the smallest tensor if smaller is None and input0_shape[d] < input1_shape[d]: smaller = 0 elif smaller is None and input1_shape[d] < input0_shape[d]: smaller = 1 # update the broadcast multiplier for this dimension if smaller == 0: broadcast_multiplier[d] = input1_shape[d] else: broadcast_multiplier[d] = input0_shape[d] broadcast_expression = ".broadcast(Eigen::array<int, %d>(%s))" % \ (broadcast_multiplier.size, code_gen.ndarray_1d_to_literal(broadcast_multiplier)) # update the expression for the smaller tensor if smaller == 0: input0_expression += broadcast_expression elif smaller == 1: input1_expression += broadcast_expression code = "%s %s + %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_expression, input1_expression) return code
def output_assignment(tf_op, eval=True, idx=0, assignment=True): """ Words.""" identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name) type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype) rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx])) shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx]) shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='') # -- special case -- # if the result of this operation is a model output then # create a tensor map to the output buffer if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'): code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \ (type, rank, BaseOpKernel.data_layout) code += " %s((%s*)%s, %s);" % \ (identifier, type, tf_op.outputs[idx].tfmin_output_identifier, shape) if assignment: code += "\n%s = " % identifier return code # if this operation needs to be concrete or all ops are being evaluated if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed: eval = True # if evaluate is true then create a concrete tensor or # map of the operations result if eval: if BaseOpKernel.use_memory_map: precalculated_offset = None if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'): precalculated_offset = tf_op.outputs[ idx]._tfmin_memory_offset tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \ (type, precalculated_offset) # if no precalculated_offset was found then assume it is # safe to use the memory space of the input to this operation. # NOTE this will be safe is most cases but this may well explode # in some rare cases!! I apologise in advance if this has just # happened to you. if precalculated_offset is None: input = tf_op.inputs[0] if input.op.type == "Identity": input = input.op.inputs[0] tensor_map_pointer = "%s.data()" % \ code_gen.c_safe_identifier(input.name) code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % (type, rank, BaseOpKernel.data_layout)) code += " %s(%s, %s);" % \ (identifier, tensor_map_pointer, shape) else: code = "\nEigen::Tensor<%s, %d, %s> %s =" % \ (type, rank, data_layout, identifier) if assignment: code += "\n%s.device(d) =" % identifier return code # if this operation is not being evaluated then create # an auto type so that the Eigen library produces a evaluator # object instead of a concrete tensor. else: code = "\nauto %s = " % identifier return code
def gen_code(cls, tf_op, inputs): # base_op.BaseOpKernel.print_operation_details(tf_op) num_split = tf_op.get_attr("num_split") # This development version only supports the form where axis is # provided by a rank 0 constant operation if tf_utils.get_parent_of_tensor(inputs[0]).type != "Const": print("Error : Split operation doesn't support computed values " "for axis yet!") return "// Error : Couldn't produce split operation with a " \ "computed axis dimension." # axis is provided by the first input tensor axis = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[0])) # if there is an undefined batch dimension that has been collapsed # reduce the axis index by 1 reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0])) if reduced_rank != tf_op.outputs[0].shape.ndims: axis -= (tf_op.outputs[0].shape.ndims - reduced_rank) code = "" # if num_split is an integer then generate form 1 of this # operation where the input tensor is split into # num_split tensors, divided evenly along axis if type(num_split) is int: # verify that the size of dimenions 'axis' is a muliple of num_split input_axis_size = tf_utils.np_tensor_shape(inputs[1])[axis] if input_axis_size % num_split != 0: print("Error : Split operation trying to split dimenson of " "size %d into %d parts, leaves remainder." % (input_axis_size, num_split)) return "// Error : Couldn't produce split operation where " \ "tensor doesn't divide into num_split parts" # Calculate the size in 'axis' of each output slice size = input_axis_size / num_split input1_identifier = code_gen.c_safe_identifier(inputs[1].name) rank = len(tf_utils.np_tensor_shape(inputs[1])) offset = np.zeros(rank, dtype=int) extents = tf_utils.np_tensor_shape(inputs[1]) extents[axis] = size # generate code for each output tensor for idx in range(num_split): code += base_op.BaseOpKernel.output_assignment(tf_op, idx=idx) offset[axis] = idx * size code += " %s.slice(Eigen::array<int, %d>(%s), " \ "Eigen::array<int, %d>(%s));" % \ (input1_identifier, rank, code_gen.ndarray_1d_to_literal(offset), rank, code_gen.ndarray_1d_to_literal(extents) ) else: # TODO need to implement this code = "// Error Split operation does not currently " \ "support arbitrary sized splits" return code