def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) # calculate the number of elements in the input tensor input_shape = tf_utils.np_tensor_shape(inputs[0]) element_count = 1 for dim in input_shape: element_count *= dim # generate code to define the output tensor code = cpp_gen.CodeBlock() code.add_statement( cpp_gen.Statement( base_op.BaseOpKernel.output_assignment(tf_op, eval=True, assignment=False))) # generate a loop to perform a hyperbolic tan on each element, placing the result in the output tensor for_loop = cpp_gen.LoopStatement("for", "int i=0; i<%d; ++i" % element_count) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = std::tanh(((%s*)%s.data())[i])" % (type, output_identifier, type, input0_identifier))) code.add_statement(for_loop) return code
def gen_code(cls, tf_op, inputs): # generate source information used to generate MatMul statement input0_statement = code_gen.c_safe_identifier(inputs[0].name) input1_statement = code_gen.c_safe_identifier(inputs[1].name) input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) # if the inputs include vectors then reshape them to rank 2 reshaped = False if len(input0_shape) == 1: input0_statement += ".reshape(Eigen::array<int,2>({1,%d}))" % input0_shape[ 0] reshaped = True if len(input1_shape) == 1: input1_statement += ".reshape(Eigen::array<int,2>({%d,1}))" % input1_shape[ 1] reshaped = True final_reshape = "" if reshaped: output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) final_reshape = ".reshape(Eigen::array<int,1>({%d}))" % output_shape[ 0] code = "%s %s.contract(%s, matMulDims)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_statement, input1_statement, final_reshape) return code
def gen_code(cls, tf_op, inputs): output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input_identifier = code_gen.c_safe_identifier(inputs[0].name) filter_identifier = code_gen.c_safe_identifier(inputs[1].name) filter_stride = np.array(tf_op.get_attr("strides")) row_stride = filter_stride[1] col_stride = filter_stride[2] code = base_op.BaseOpKernel.output_assignment(tf_op, eval=True, idx=0, assignment=False) code += "TFMin::DepthwiseConvFloatTFL::depthwiseConv(" \ "%s, %s, %s, %d, %d)" % \ (input_identifier, filter_identifier, output_identifier, col_stride, row_stride ) return code
def gen_code(cls, tf_op, inputs): input1_identifier = code_gen.c_safe_identifier(inputs[0].name) input2_identifier = code_gen.c_safe_identifier(inputs[1].name) code = "%s %s.cwiseMin(%s);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input1_identifier, input2_identifier) return code
def gen_code(cls, tf_op, inputs): output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # if the second argument is a scalar tensor input1_shape = tf_utils.np_tensor_shape(inputs[1]) if len(input1_shape) == 0 or (len(input1_shape) == 1 and input1_shape[0] == 1): input0_shape = tf_utils.np_tensor_shape(inputs[0]) input0_size = np.prod(input0_shape) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code = cpp_gen.CodeBlock() target = "%s" % base_op.BaseOpKernel.output_assignment( tf_op, True, assignment=False) code.add_statement( cpp_gen.Statement(target.replace(";", "").replace('\n', ''))) # determine the type of expression to use. Either a division by the value of # a rank zero tensor, a division by a constant or a shift by a constant # in the case of power of two denominators if inputs[1].op.type == 'Const': const_value = tf_utils.get_const_scalar(inputs[1].op) if math.log2(const_value).is_integer(): expression = ">> %d" % int(math.log2(const_value)) else: expression = "/ (%s)%f" % (type, const_value) else: expression = "/ %s(0)" % input1_identifier for_loop = cpp_gen.LoopStatement( "for", "int i=0; i<%d; ++i" % input0_size) for_loop.code.add_statement( cpp_gen.Statement( "((%s*)%s.data())[i] = ((%s*)%s.data())[i] %s" % (type, output_identifier, type, input0_identifier, expression))) code.add_statement(for_loop) else: code = "%s %s / %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) param_a_is_scalar = (inputs[0].shape.ndims == 0 or (inputs[0].shape.ndims == 1 and inputs[0].shape.dims[0] == 1)) param_b_is_scalar = (inputs[1].shape.ndims == 0 or (inputs[1].shape.ndims == 1 and inputs[1].shape.dims[1] == 1)) param_a_is_const = tf_utils.operation_is_constant(inputs[0].op) param_b_is_const = tf_utils.operation_is_constant(inputs[1].op) # if one of the inputs is a constant scalar then implement form 2 if param_a_is_const and param_a_is_scalar: tensor_identifier = code_gen.c_safe_identifier(inputs[1].name) const_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(tf_op.inputs[0])) return "%s %s * (%s)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), tensor_identifier, type, str(const_value)) if param_b_is_const and param_b_is_scalar: tensor_identifier = code_gen.c_safe_identifier(inputs[0].name) const_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(tf_op.inputs[1])) return "%s %s * (%s)%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), tensor_identifier, type, str(const_value)) # if both inputs are either tensors or not constants then generate form 1 input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) code = "%s %s * %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, eval), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) code = "%s %s.rsqrt();" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) code = "%s %s.cwiseMax((%s)0);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, type) return code
def add_weights_to_class(self, class_obj, constructor): # Add stored tensors to properties and constructor initialiser list for t in self.list_training_tensors: type = code_gen.get_c_dtype(t.dtype.base_dtype) rank = max(1, len(tf_utils.np_tensor_shape(t))) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(rank)) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( code_gen.c_safe_identifier(t.name), tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) # For now just use literal values, TODO add option to load weights from file as well literal_name = class_obj.identifier + "Weights::" + \ code_gen.c_safe_identifier(t.name) + "Flat" if type == "float" or type == "double" or type == "long double": literal_name += "Hex" shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t), open='', close='') # convert rank zero tensor to rank 1 for eigen if shape == ' ': shape = ' 1 ' constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (code_gen.c_safe_identifier(t.name), type, literal_name, shape) ]
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) input0_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) input_shape = tf_utils.np_tensor_shape(inputs[0]) code = cpp_gen.CodeBlock() assignment = base_op.BaseOpKernel.output_assignment(tf_op, True, assignment=False) if assignment[-1] == ';': assignment = assignment[:-1] assignment = assignment.replace('\n', '') code.add_statement(cpp_gen.Statement(str(assignment))) code.add_statement( cpp_gen.Statement("%s %s_max = std::numeric_limits<%s>::min()" % (type, identifier, type))) code.add_statement(cpp_gen.Statement("%s(0) = 0" % identifier)) if_statement = cpp_gen.IfStatement( "%s(%s_it) > %s_max" % (input0_identifier, identifier, identifier)) if_statement.if_code.add_statement( cpp_gen.Statement("%s_max = %s(%s_it)" % (identifier, input0_identifier, identifier))) if_statement.if_code.add_statement( cpp_gen.Statement("%s(0) = %s_it" % (identifier, identifier))) for_loop = cpp_gen.LoopStatement( "for", "long %s_it=0; %s_it<%d; ++%s_it" % (identifier, identifier, input_shape[0], identifier)) for_loop.code.add_statement(if_statement) code.add_statement(for_loop) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # if the second argument is a scalar tensor input1_shape = tf_utils.np_tensor_shape(inputs[1]) if len(input1_shape) == 0 or (len(input1_shape) == 1 and input1_shape[0] == 1): code = "%s %s / %s.constant(%s(0));" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input0_identifier, input1_identifier) else: code = "%s %s / %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input1_identifier) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) axis = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[2])) # if there is an undefined batch dimension that has been collapsed # reduce the axis index by 1 reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0])) if reduced_rank != tf_op.outputs[0].shape.ndims: axis -= (tf_op.outputs[0].shape.ndims - reduced_rank) code = "%s %s.concatenate(%s, %d);" % \ (base_op.BaseOpKernel.output_assignment(tf_op), input0_identifier, input1_identifier, axis) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.get_attr("DstT")) code = "%s %s.cast<%s>();" % \ (base_op.BaseOpKernel.output_assignment(tf_op, eval=False), input_identifier, type) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # If the bias tensor needs to be cast into the same time as the input bias_cast = "" # if the bias tensor needs to be broadcast into the same shape as the input bias_broadcast = "" input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) shapes_match = False if len(input0_shape) == len(input1_shape): shapes_match = True for i in range(len(input0_shape)): if input0_shape[i] != input1_shape[i]: shapes_match = False if not shapes_match: broadcast_shape = tf_utils.np_tensor_shape(inputs[0]) broadcast_shape[len(broadcast_shape) - 1] = 1 reshape_shape = np.array(([1] * (len(broadcast_shape) - 1)) + [input1_shape[0]]) bias_broadcast = "\n .reshape(Eigen::array<int, %d>(%s))" % \ (len(reshape_shape), code_gen.ndarray_1d_to_literal(reshape_shape)) bias_broadcast += "\n .broadcast(Eigen::array<int, %d>(%s))" % \ (len(broadcast_shape), code_gen.ndarray_1d_to_literal(broadcast_shape)) code = "%s %s + %s%s%s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, False), input0_identifier, input1_identifier, bias_cast, bias_broadcast) return code
def gen_code(cls, tf_op, inputs): output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) input_identifier = code_gen.c_safe_identifier(inputs[0].name) code = "%s %s.reshape(Eigen::array<int, %d>(%s));" % \ (base_op.BaseOpKernel.output_assignment( tf_op, base_op.BaseOpKernel.evaluate_all ), input_identifier, len(output_shape), code_gen.ndarray_1d_to_literal(output_shape)) return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) dtype_string = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code ="%s (%s)1.0 / ((%s)1.0 + ((%s)0.0 - %s).exp())" %\ (base_op.BaseOpKernel.output_assignment(tf_op, True), dtype_string, dtype_string, dtype_string, input0_identifier) return code
def gen_code(cls, tf_op, inputs): # super().print_operation_details(tf_op) alpha = tf_op.get_attr("alpha") input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) code = "%s %s.cwiseMax(%s * (%s)%f);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, input_identifier, type, alpha) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) filter_identifier = code_gen.c_safe_identifier(inputs[1].name) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) padding = "Eigen::" if tf_op.get_attr("padding") == b'SAME': padding += "PADDING_SAME" elif tf_op.get_attr("padding") == b'VALID': padding += "PADDING_VALID" filter_stride = np.array(tf_op.get_attr("strides")) row_stride = filter_stride[1] col_stride = filter_stride[2] row_dilation = 1 col_dilation = 1 code = base_op.BaseOpKernel.output_assignment(tf_op, eval=True, idx=0, assignment=False) code += "TFMin::ConvTFL::conv(%s, %s, %s, %s, %d, %d, %d, %d)" % \ (input_identifier, filter_identifier, output_identifier, padding, col_stride, row_stride, col_dilation, row_dilation ) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) six_constant = 6 # if this is operating on quantised data then the # six_constant will need multplying by the correct power of 2. code = "%s %s.cwiseMax((%s)0).cwiseMin((%s)%d);" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, type, type, six_constant) return code
def add_verification_to_class(self, class_obj, constructor): if self.validation_type == 'Full': for op in self.list_operations: for out in op.outputs: identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) if len(shape) == 0: shape = [1] type = code_gen.get_c_dtype(out.dtype) inner_template = cpp_gen.TemplateInstance() inner_template.add_element(cpp_gen.TypeDefinition(type)) inner_template.add_element(str(len(shape))) inner_template.add_element("Eigen::" + self.data_layout) template = cpp_gen.TemplateInstance() template.add_element( cpp_gen.TypeDefinition('Tensor', namespace='Eigen', template=inner_template)) tensor_type = cpp_gen.TypeDefinition('TensorMap', namespace='Eigen', template=template) tensor_map_property = cpp_gen.ClassProperty( identifier + "_val", tensor_type) tensor_map_property.access_modifier = "private" class_obj.add(tensor_map_property) lit_suffix = "" if type == "float" or type == "double" or type == "long double": lit_suffix = "Hex" literal_identifier = (class_obj.identifier + "Weights::" + identifier + "VerificationData" + lit_suffix) constructor.initialiser_list += [ "%s((%s*)%s,%s)" % (identifier + "_val", type, literal_identifier, code_gen.ndarray_1d_to_literal( shape, open='', close='')) ]
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype) code = "\nauto %sExp = %s.exp();" % \ (input0_identifier, input0_identifier) code += "\nEigen::Tensor<%s, 0, %s> %sExpSum = %sExp.sum();" % \ (type, base_op.BaseOpKernel.data_layout, input0_identifier, input0_identifier) code += "%s %sExp / %sExp.constant(%sExpSum(0));" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_identifier, input0_identifier, input0_identifier) return code
def gen_code(cls, tf_op, inputs): input_identifier = code_gen.c_safe_identifier(inputs[0].name) begin_type = tf_utils.get_parent_of_tensor(inputs[1]).type if begin_type != "Const": print("Error generating 'Slice' Operation: op_kernel only " "supports Constant begin tensors.") return "// Error cannot generate Slice operation with " \ "non-const begin tensor!" size_type = tf_utils.get_parent_of_tensor(inputs[2]).type if size_type != "Const": print("Error generating 'Slice' Operation: op_kernel only " "supports Constant size tensors.") return "// Error cannot generate Slice operation with " \ "non-const size tensor!" begin = tf_utils.get_const_tensor( tf_utils.get_parent_of_tensor(inputs[1])) size = tf_utils.get_const_tensor( tf_utils.get_parent_of_tensor(inputs[2])) # if -1 was given for any size dimensions then set them to the size # required to fill the remainder of the input for si in range(len(size)): if size[si] == -1: size[si] = inputs[0].dim_size(si) - begin[si] code = "%s %s.slice(Eigen::array<int, 2>(%s), " \ "Eigen::array<int, 2>({%s}));" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input_identifier, code_gen.ndarray_1d_to_literal(begin), code_gen.ndarray_1d_to_literal(size)) # print("Slice operation looks like this. . .") # super().print_operation_details(tf_op) return code
def gen_code(cls, tf_op, inputs): # output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0]) output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name) # print("Fill operation looks like this. . .") # super().print_operation_details(tf_op) type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype) constant_value = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[1])) code = cpp_gen.CodeBlock() code.add_statement( cpp_gen.Statement( base_op.BaseOpKernel.output_assignment(tf_op, eval=True, assignment=False))) code.add_statement( cpp_gen.Statement("%s.setConstant((%s)%f)" % (output_identifier, type, constant_value))) return code
def add_memory_trace(self, model_class, constructor): """ add_memory_trace method, adds the properties, calls and template instantiations required to run this model attached to the memory tracer utility and analyse it's memory use pattern. :param model_class: :param constructor: :return: """ # make the memory block pointer public model_class.element_by_identifier('memoryBlock').\ access_modifier = "public" # add safe write space incase the calling process doesn't initialize # the event location pointers safe_write_space = cpp_gen.ClassProperty( 'safeWriteSpace', type=cpp_gen.TypeDefinition('int')) safe_write_space.comment = cpp_gen.Comment( "Default location for event trace writes.") safe_write_space.access_modifier = 'private' model_class.add(safe_write_space) # add vector of trace pointers and operation names to class trace_events = cpp_gen.ClassProperty( 'traceEvents', type=cpp_gen.TypeDefinition('TFMin::MemoryTraceEvents')) model_class.add(trace_events) # add vector of memory areas to class memory_areas = cpp_gen.ClassProperty( 'memoryAreas', type=cpp_gen.TypeDefinition('TFMin::MemoryTraceAreas')) model_class.add(memory_areas) # populate events in class constructor for op in self.list_operations: constructor.code_block.add_statement( cpp_gen.Statement( "traceEvents.push_back(TFMin::MemoryTraceEvent" "(\"%s\", &safeWriteSpace))" % op.name)) # populate memore areas in class constructor for area in self.allocated_memory_areas: constructor.code_block.add_statement( cpp_gen.Statement( "memoryAreas.push_back(TFMin::MemoryTraceArea" "(%d, %d, \"%s\", \"%s\"))" % (area['offset'], area['size'], area['start_op'], area['end_op']))) # add memory map size property memory_map_size = cpp_gen.ClassProperty( 'memoryMapSize', type=cpp_gen.TypeDefinition('unsigned long')) model_class.add(memory_map_size) constructor.code_block.add_statement( cpp_gen.Statement("memoryMapSize = %d" % self.memory_map_size)) # add event trace pointers to each operation for op in self.list_operations: identifier = code_gen.c_safe_identifier(op.name) + '_TraceEvent' trace_pointer = cpp_gen.ClassProperty(identifier, type=cpp_gen.TypeDefinition( 'int', volatile=True, ptr_levels=1)) trace_pointer.access_modifier = 'public' model_class.add(trace_pointer) constructor.code_block.add_statement( cpp_gen.Statement("%s = &safeWriteSpace" % identifier)) # add 'Eigen::MemPreallocDevice' explicit instantiation to all # evaluation # explc_inst_pre_device = cpp_gen.TemplateInstance() # explc_inst_pre_thread_device = cpp_gen.TemplateInstance() # explc_inst_pre_device.add_element(cpp_gen.TypeDefinition('Eigen::MemPreallocDevice')) # explc_inst_pre_thread_device.add_element(cpp_gen.TypeDefinition('Eigen::ThreadPoolDevice')) # additional_explc_instationations = [explc_inst_pre_device, # explc_inst_pre_thread_device] """eval_method = model_class.element_by_identifier("eval")
def add_parameters_to_methods(self, eval_method, validate_method, timing_method, class_name): parameter_comment = "Input tensors\n" for i, input_placeholder in enumerate(self.list_input_placeholders): type = code_gen.get_c_dtype( input_placeholder.outputs[0].dtype.base_dtype) identifier = code_gen.c_safe_identifier( input_placeholder.outputs[0].name) shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0]) if len(shape) == 0: shape = [1] parameter_comment += "[%s] %s %s\n" % ( type, identifier, str(input_placeholder.outputs[0].shape[1:])) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \ "<%s, %d, %s>> %s(%s,%s)" % \ (type, len(shape), "Eigen::"+self.data_layout, identifier, identifier+"Param", code_gen.ndarray_1d_to_literal(shape, open='', close='')) val_data_identifier = (class_name + "Weights::" + identifier + "VerificationDataHex") val_tensor_map = ( "Eigen::TensorMap<Eigen::Tensor" "<%s, %d, %s>> %s((%s*)%s,%s)" % (type, len(shape), "Eigen::" + self.data_layout, identifier, type, val_data_identifier, code_gen.ndarray_1d_to_literal(shape, open='', close=''))) comment = None if i == 0: comment = cpp_gen.Comment("Creating TensorMaps of inputs") eval_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) timing_method.code_block.add_statement( cpp_gen.Statement(param_tensor_map, comment)) validate_method.code_block.add_statement( cpp_gen.Statement(val_tensor_map, comment)) parameter_comment += "Output tensors\n" for out in self.output_tensors: type = code_gen.get_c_dtype(out.dtype) identifier = code_gen.c_safe_identifier(out.name) shape = tf_utils.np_tensor_shape(out) parameter_comment += "[%s] %s [%s]\n" % \ (type, identifier, code_gen.ndarray_1d_to_literal(shape, open='', close='')) eval_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) timing_method.parameter_list.add( cpp_gen.Parameter(identifier + "Param", cpp_gen.TypeDefinition(type, ptr_levels=1))) # create buffers to hold final output tensors in the validate method which doesn't actually # return anything to the calling process dummy_param = "%s %s[%d]" % (type, identifier + "Param", np.prod(shape)) dummy_param_comment = cpp_gen.Comment("Dummy parameter for output") validate_method.code_block.add_statement( cpp_gen.Statement(dummy_param, dummy_param_comment)) # Tag this tensor as an output so that operation kernels will # map the output to the given function parameter instead of a block in the memory map. # out.tfmin_is_output = True if out.op.type == 'Identity': out = out.op.inputs[0] out.tfmin_output_identifier = identifier + "Param" timing_method.parameter_list.add( cpp_gen.Parameter('print', cpp_gen.TypeDefinition('bool'), default='true')) eval_method.comment.text += parameter_comment timing_method.comment.text += parameter_comment
def write_data_header(self, file_name, class_name, validation_type='Full', validation_inputs=None): # write model training data file with open(file_name, "w") as data_file: # write file header data_file.write("#ifndef __%s_WEIGHTS_H__\n" % class_name.upper()) data_file.write("#define __%s_WEIGHTS_H__\n" % class_name.upper()) data_file.write("//" + "-" * 80 + "\n") data_file.write("// Training data literal declarations.\n") data_file.write("// Generated by TFMin, do not edit.\n") data_file.write("//" + "-" * 80 + "\n") data_order = 'F' if self.data_layout == 'RowMajor': data_order = 'C' export_data = True # Debugging aid to generate data header # without MBs of text so it is unloadable! data_file.write("namespace %sWeights\n{\n\n" % class_name) # evaluate and write model weights for tensor in self.list_training_tensors: # write flat version [var_value] = self.sess.run([tensor], {}) identifier = code_gen.c_safe_identifier(tensor.name) + "Flat" flat_tensor_values = var_value.reshape(var_value.size, order=data_order) tf_utils.write_numpy_array_c(data_file, " " + identifier, flat_tensor_values, export_data) # if required add verification data if validation_type == "Full": # self.list_verification_tensors = self.output_tensors for op in self.list_operations: for tensor in op.outputs: self.list_verification_tensors += [tensor] for tensor in self.list_verification_tensors: [verification_value] = self.sess.run([tensor], validation_inputs) identifier = (code_gen.c_safe_identifier(tensor.name) + "VerificationData") flat_tensor_values = verification_value.reshape( np.prod(verification_value.shape), order=data_order) tf_utils.write_numpy_array_c(data_file, " " + identifier, flat_tensor_values, export_data) data_file.write("}\n\n") data_file.write("#endif // __%s_WEIGHTS_H__\n" % class_name.upper())
def gen_code(cls, tf_op, inputs): # base_op.BaseOpKernel.print_operation_details(tf_op) num_split = tf_op.get_attr("num_split") # This development version only supports the form where axis is # provided by a rank 0 constant operation if tf_utils.get_parent_of_tensor(inputs[0]).type != "Const": print("Error : Split operation doesn't support computed values " "for axis yet!") return "// Error : Couldn't produce split operation with a " \ "computed axis dimension." # axis is provided by the first input tensor axis = tf_utils.get_const_scalar( tf_utils.get_parent_of_tensor(inputs[0])) # if there is an undefined batch dimension that has been collapsed # reduce the axis index by 1 reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0])) if reduced_rank != tf_op.outputs[0].shape.ndims: axis -= (tf_op.outputs[0].shape.ndims - reduced_rank) code = "" # if num_split is an integer then generate form 1 of this # operation where the input tensor is split into # num_split tensors, divided evenly along axis if type(num_split) is int: # verify that the size of dimenions 'axis' is a muliple of num_split input_axis_size = tf_utils.np_tensor_shape(inputs[1])[axis] if input_axis_size % num_split != 0: print("Error : Split operation trying to split dimenson of " "size %d into %d parts, leaves remainder." % (input_axis_size, num_split)) return "// Error : Couldn't produce split operation where " \ "tensor doesn't divide into num_split parts" # Calculate the size in 'axis' of each output slice size = input_axis_size / num_split input1_identifier = code_gen.c_safe_identifier(inputs[1].name) rank = len(tf_utils.np_tensor_shape(inputs[1])) offset = np.zeros(rank, dtype=int) extents = tf_utils.np_tensor_shape(inputs[1]) extents[axis] = size # generate code for each output tensor for idx in range(num_split): code += base_op.BaseOpKernel.output_assignment(tf_op, idx=idx) offset[axis] = idx * size code += " %s.slice(Eigen::array<int, %d>(%s), " \ "Eigen::array<int, %d>(%s));" % \ (input1_identifier, rank, code_gen.ndarray_1d_to_literal(offset), rank, code_gen.ndarray_1d_to_literal(extents) ) else: # TODO need to implement this code = "// Error Split operation does not currently " \ "support arbitrary sized splits" return code
def gen_code(cls, tf_op, inputs): input0_identifier = code_gen.c_safe_identifier(inputs[0].name) input1_identifier = code_gen.c_safe_identifier(inputs[1].name) # If the input tensor sizes match then this is a simple elementwise addition # however if one of th tensors is smaller than the other then it will attempt to # `broadcast' the smaller tensor upto the size of the larger one input0_expression = input0_identifier input1_expression = input1_identifier input0_shape = tf_utils.np_tensor_shape(inputs[0]) input1_shape = tf_utils.np_tensor_shape(inputs[1]) if not np.array_equal(input0_shape, input1_shape): # print("Broadcasting needed in Add operation!") # print("Old input_0 (%s) input_1 (%s)" % # (input0_shape, input1_shape)) smaller = None # if one shape has lower rank than the other then pad the smaller rank # with size 1 dimensions if input1_shape.size < input0_shape.size: smaller = 1 padding = np.ones(int(input0_shape.size - input1_shape.size), np.int) input1_shape = np.concatenate((padding, input1_shape)) input1_expression += ".reshape(Eigen::array<int, %d>(%s))" % \ (input1_shape.size, code_gen.ndarray_1d_to_literal(input1_shape)) elif input0_shape.size < input1_shape.size: smaller = 0 padding = np.ones(int(input1_shape.size - input0_shape.size), np.int) input0_shape = np.concatenate((padding, input0_shape)) input0_expression += ".reshape(Eigen::array<int, %d>(%s))" % \ (input0_shape.size, code_gen.ndarray_1d_to_literal(input0_shape)) # print("New input_0 (%s) input_1 (%s)" % # (input0_shape, input1_shape)) broadcast_multiplier = np.ones(input1_shape.size, dtype=np.int) for d in range(input0_shape.size): if input0_shape[d] != input1_shape[d]: # check error cases where dimensions are not universally smaller on one side if (smaller == 0 and input0_shape[d] > input1_shape[d]) or\ (smaller == 1 and input1_shape[d] > input0_shape[d]): print( "Error: Add operation with non-broadcastable sized input tensors!" ) return "// Error generating Add operation, non-broadcastable sized input tensors." # check error case where dimenions are not equal or one of them is 1 if (input0_shape[d] < input1_shape[d] and input0_shape[d] != 1) or \ (input1_shape[d] < input0_shape[d] and input1_shape[d] != 1): print( "Error: Add operation with non-broadcastable sized input tensors!" ) return "// Error generating Add operation, non-broadcastable sized input tensors." # check if this dimension defines the smallest tensor if smaller is None and input0_shape[d] < input1_shape[d]: smaller = 0 elif smaller is None and input1_shape[d] < input0_shape[d]: smaller = 1 # update the broadcast multiplier for this dimension if smaller == 0: broadcast_multiplier[d] = input1_shape[d] else: broadcast_multiplier[d] = input0_shape[d] broadcast_expression = ".broadcast(Eigen::array<int, %d>(%s))" % \ (broadcast_multiplier.size, code_gen.ndarray_1d_to_literal(broadcast_multiplier)) # update the expression for the smaller tensor if smaller == 0: input0_expression += broadcast_expression elif smaller == 1: input1_expression += broadcast_expression code = "%s %s + %s;" % \ (base_op.BaseOpKernel.output_assignment(tf_op, True), input0_expression, input1_expression) return code
def add_operations_to_method(self, method, type='eval'): if type == 'validate': base_op.BaseOpKernel.evaluate_all = True if type == 'timing': comment = cpp_gen.Comment("Timing working variables") method.code_block.add_statement( cpp_gen.Statement(self.lib_namespace + "TimingResult result", comment=comment)) method.code_block.add_statement(cpp_gen.Statement("float start")) # Add operations, including validation and timing as required for idx, op in enumerate(self.list_operations): operation_code = cpp_gen.CodeBlock() if type == 'timing': operation_code.add_statement( cpp_gen.Statement("start = getTime()")) print_if_statement = cpp_gen.IfStatement("print") print_if_statement.if_code.add_statement( cpp_gen.Statement( "std::cout << \"Starting %s [%s]\" << std::endl" % (op.name, op.type))) operation_code.add_statement(print_if_statement) if type == 'validate': operation_code.add_statement( cpp_gen.Statement("std::cout << \"About to perform " "%s operation [%s]\\n\"" % (op.type, op.name))) if self.export_memory_trace: first_parameter = self.list_input_placeholders[0] source_identifier = code_gen.c_safe_identifier( first_parameter.outputs[0].name) operation_code.add_statement( cpp_gen.Statement("*(traceEvents[%d].addr) = " "*(int*)%s.data();" % (idx, source_identifier))) #identifier = code_gen.c_safe_identifier(op.name) + '_TraceEvent' #first_parameter = self.list_input_placeholders[0] #source_identifier = code_gen.c_safe_identifier(first_parameter.outputs[0].name) #operation_code.add_statement( # cpp_gen.Statement("*(%s) = *(int*)%s.data()" % # (identifier, # source_identifier)) #) # Find op_kernel for this operation type and generate code k = op_kernel_loader.find_op_kernel(op) if k is not None: op_code = k.generate(op) if isinstance(op_code, cpp_gen.CodeBlock): operation_code.add_block(op_code) else: op_statements = op_code.split(";") for s in op_statements: if s.strip() != "": operation_code.add_statement( cpp_gen.Statement(s.strip())) if self.export_memory_trace: # identifier = code_gen.c_safe_identifier(op.name)+'_TraceEvent' operation_code.add_statement( cpp_gen.Statement("std::cout << *(traceEvents[%d].addr)" " << std::endl" % idx)) if type == 'timing': operation_code.add_statement( cpp_gen.Statement("result.push_back(TFMin::OperationTime(" "\"%s\", getTime()-start))" % op.name)) print_if_statement = cpp_gen.IfStatement("print") print_if_statement.if_code.add_statement( cpp_gen.Statement("std::cout << \"Completed %s [%s]" "operation\" << std::endl" % (op.name, op.type))) operation_code.add_statement(print_if_statement) if type == 'validate': for out in op.outputs: identifier = code_gen.c_safe_identifier(out.name) val_if = cpp_gen.IfStatement("!tensorsApproximatelyEqual(" "%s, %s_val, true)" % (identifier, identifier)) val_if.if_code.add_statement( cpp_gen.Statement( "std::cout << \"Validation failed at " "operation [%s]\\n\"" % identifier)) val_if.if_code.add_statement( cpp_gen.Statement("return false")) operation_code.add_statement(val_if) op_comment = cpp_gen.Comment("Generated %s [%s] operation." % (op.name, op.type), style='//') operation_code.statements[0].comment = op_comment method.code_block.add_block(operation_code) if type == 'timing': if_print = cpp_gen.IfStatement("print") if_print.if_code.add_statement( cpp_gen.Statement("printTiming(result)")) method.code_block.add_statement(if_print) method.code_block.add_statement(cpp_gen.Statement("return result")) if type == 'validate': method.code_block.add_statement(cpp_gen.Statement("return true")) base_op.BaseOpKernel.evaluate_all = False
def output_assignment(tf_op, eval=True, idx=0, assignment=True): """ Words.""" identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name) type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype) rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx])) shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx]) shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='') # -- special case -- # if the result of this operation is a model output then # create a tensor map to the output buffer if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'): code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \ (type, rank, BaseOpKernel.data_layout) code += " %s((%s*)%s, %s);" % \ (identifier, type, tf_op.outputs[idx].tfmin_output_identifier, shape) if assignment: code += "\n%s = " % identifier return code # if this operation needs to be concrete or all ops are being evaluated if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed: eval = True # if evaluate is true then create a concrete tensor or # map of the operations result if eval: if BaseOpKernel.use_memory_map: precalculated_offset = None if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'): precalculated_offset = tf_op.outputs[ idx]._tfmin_memory_offset tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \ (type, precalculated_offset) # if no precalculated_offset was found then assume it is # safe to use the memory space of the input to this operation. # NOTE this will be safe is most cases but this may well explode # in some rare cases!! I apologise in advance if this has just # happened to you. if precalculated_offset is None: input = tf_op.inputs[0] if input.op.type == "Identity": input = input.op.inputs[0] tensor_map_pointer = "%s.data()" % \ code_gen.c_safe_identifier(input.name) code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % (type, rank, BaseOpKernel.data_layout)) code += " %s(%s, %s);" % \ (identifier, tensor_map_pointer, shape) else: code = "\nEigen::Tensor<%s, %d, %s> %s =" % \ (type, rank, data_layout, identifier) if assignment: code += "\n%s.device(d) =" % identifier return code # if this operation is not being evaluated then create # an auto type so that the Eigen library produces a evaluator # object instead of a concrete tensor. else: code = "\nauto %s = " % identifier return code