示例#1
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        # generate source information used to generate MatMul statement
        input0_statement = code_gen.c_safe_identifier(inputs[0].name)
        input1_statement = code_gen.c_safe_identifier(inputs[1].name)
        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])

        # if the inputs include vectors then reshape them to rank 2
        reshaped = False
        if len(input0_shape) == 1:
            input0_statement += ".reshape(Eigen::array<int,2>({1,%d}))" % input0_shape[
                0]
            reshaped = True
        if len(input1_shape) == 1:
            input1_statement += ".reshape(Eigen::array<int,2>({%d,1}))" % input1_shape[
                1]
            reshaped = True

        final_reshape = ""
        if reshaped:
            output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
            final_reshape = ".reshape(Eigen::array<int,1>({%d}))" % output_shape[
                0]

        code = "%s %s.contract(%s, matMulDims)%s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_statement,
                input1_statement,
                final_reshape)
        return code
示例#2
0
    def print_operation_details(tf_op):
        """ print_operation_details, shows operation attributes,
        inputs and outputs details Prints out all the attributes
        of an operation as well as the size and types all any
        input and output tensors
        """

        inputs = []
        for op_input in tf_op.inputs:
            if op_input.op.type == "Identity":
                inputs += [op_input.op.inputs[0]]
            else:
                inputs += [op_input]

        # Brutally hacky way of getting the list of attributes
        # from a tensorflow.core.framework.node_def_pb2.NodeDef
        lines = str(tf_op.node_def).split("\n")
        attr_keys = []
        for l in lines:
            if l.startswith("  key: \""):
                key = l[8:100].replace("\"", "")
                attr_keys += [key]

        print("Attr keys are : " + str(attr_keys))

        print("Details of operation \"%s\" "
              "type [%s] -------------------" % (tf_op.name, tf_op.type))

        if len(attr_keys) > 0:
            print("Attributes:")
            for key in attr_keys:
                value = tf_op.get_attr(key)
                print("   \"%s\"\t\ttype(%s)\t\tvalue(%s)" %
                      (key, str(type(value)), str(value)))

        print("%d inputs:" % len(inputs))
        for idx, input in enumerate(inputs):
            input_parent_op = tf_utils.get_parent_of_tensor(input)
            print(
                "   [%2d] \"%s\" %s rank(%d) %s : source op (\"%s\" - %s)" %
                (idx, input.name, code_gen.get_c_dtype(input.dtype.base_dtype),
                 len(tf_utils.np_tensor_shape(input)),
                 tf_utils.np_tensor_shape(input), input_parent_op.name,
                 input_parent_op.type))

        print("%d outputs:" % len(tf_op.outputs))
        for idx, output in enumerate(tf_op.outputs):
            print("   [%2d] \"%s\" %s rank(%d) %s" %
                  (idx, output.name,
                   code_gen.get_c_dtype(output.dtype.base_dtype),
                   len(tf_utils.np_tensor_shape(output)),
                   tf_utils.np_tensor_shape(output)))
        print("--------------------------------------------------")
示例#3
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)
        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # if the second argument is a scalar tensor
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        if len(input1_shape) == 0 or (len(input1_shape) == 1
                                      and input1_shape[0] == 1):

            input0_shape = tf_utils.np_tensor_shape(inputs[0])
            input0_size = np.prod(input0_shape)
            type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

            code = cpp_gen.CodeBlock()
            target = "%s" % base_op.BaseOpKernel.output_assignment(
                tf_op, True, assignment=False)
            code.add_statement(
                cpp_gen.Statement(target.replace(";", "").replace('\n', '')))

            # determine the type of expression to use. Either a division by the value of
            # a rank zero tensor, a division by a constant or a shift by a constant
            # in the case of power of two denominators

            if inputs[1].op.type == 'Const':
                const_value = tf_utils.get_const_scalar(inputs[1].op)

                if math.log2(const_value).is_integer():
                    expression = ">> %d" % int(math.log2(const_value))
                else:
                    expression = "/ (%s)%f" % (type, const_value)

            else:

                expression = "/ %s(0)" % input1_identifier

            for_loop = cpp_gen.LoopStatement(
                "for", "int i=0; i<%d; ++i" % input0_size)
            for_loop.code.add_statement(
                cpp_gen.Statement(
                    "((%s*)%s.data())[i] = ((%s*)%s.data())[i] %s" %
                    (type, output_identifier, type, input0_identifier,
                     expression)))

            code.add_statement(for_loop)
        else:
            code = "%s %s / %s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    input0_identifier,
                    input1_identifier)

        return code
示例#4
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)

        type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

        # calculate the number of elements in the input tensor
        input_shape = tf_utils.np_tensor_shape(inputs[0])
        element_count = 1
        for dim in input_shape:
            element_count *= dim

        # generate code to define the output tensor
        code = cpp_gen.CodeBlock()
        code.add_statement(
            cpp_gen.Statement(
                base_op.BaseOpKernel.output_assignment(tf_op,
                                                       eval=True,
                                                       assignment=False)))

        # generate a loop to perform a hyperbolic tan on each element, placing the result in the output tensor
        for_loop = cpp_gen.LoopStatement("for",
                                         "int i=0; i<%d; ++i" % element_count)
        for_loop.code.add_statement(
            cpp_gen.Statement(
                "((%s*)%s.data())[i] = std::tanh(((%s*)%s.data())[i])" %
                (type, output_identifier, type, input0_identifier)))
        code.add_statement(for_loop)

        return code
示例#5
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_weights_to_class(self, class_obj, constructor):

        # Add stored tensors to properties and constructor initialiser list
        for t in self.list_training_tensors:

            type = code_gen.get_c_dtype(t.dtype.base_dtype)
            rank = max(1, len(tf_utils.np_tensor_shape(t)))

            inner_template = cpp_gen.TemplateInstance()
            inner_template.add_element(cpp_gen.TypeDefinition(type))
            inner_template.add_element(str(rank))
            inner_template.add_element("Eigen::" + self.data_layout)
            template = cpp_gen.TemplateInstance()
            template.add_element(
                cpp_gen.TypeDefinition('Tensor',
                                       namespace='Eigen',
                                       template=inner_template))
            tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                 namespace='Eigen',
                                                 template=template)
            tensor_map_property = cpp_gen.ClassProperty(
                code_gen.c_safe_identifier(t.name), tensor_type)
            tensor_map_property.access_modifier = "private"
            class_obj.add(tensor_map_property)

            # For now just use literal values, TODO add option to load weights from file as well
            literal_name = class_obj.identifier + "Weights::" + \
                           code_gen.c_safe_identifier(t.name) + "Flat"
            if type == "float" or type == "double" or type == "long double":
                literal_name += "Hex"
            shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t),
                                                   open='',
                                                   close='')
            # convert rank zero tensor to rank 1 for eigen
            if shape == '  ':
                shape = ' 1 '

            constructor.initialiser_list += [
                "%s((%s*)%s,%s)" %
                (code_gen.c_safe_identifier(t.name), type, literal_name, shape)
            ]
示例#6
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the bias tensor needs to be cast into the same time as the input
        bias_cast = ""

        # if the bias tensor needs to be broadcast into the same shape as the input
        bias_broadcast = ""
        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        shapes_match = False
        if len(input0_shape) == len(input1_shape):
            shapes_match = True
            for i in range(len(input0_shape)):
                if input0_shape[i] != input1_shape[i]:
                    shapes_match = False
        if not shapes_match:

            broadcast_shape = tf_utils.np_tensor_shape(inputs[0])
            broadcast_shape[len(broadcast_shape) - 1] = 1

            reshape_shape = np.array(([1] * (len(broadcast_shape) - 1)) +
                                     [input1_shape[0]])
            bias_broadcast = "\n    .reshape(Eigen::array<int, %d>(%s))" % \
                             (len(reshape_shape),
                              code_gen.ndarray_1d_to_literal(reshape_shape))
            bias_broadcast += "\n        .broadcast(Eigen::array<int, %d>(%s))" % \
                              (len(broadcast_shape),
                               code_gen.ndarray_1d_to_literal(broadcast_shape))

        code = "%s %s + %s%s%s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, False),
                input0_identifier,
                input1_identifier,
                bias_cast,
                bias_broadcast)
        return code
示例#7
0
    def gen_code(cls, tf_op, inputs):

        output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
        input_identifier = code_gen.c_safe_identifier(inputs[0].name)

        code = "%s %s.reshape(Eigen::array<int, %d>(%s));" % \
               (base_op.BaseOpKernel.output_assignment(
                 tf_op, base_op.BaseOpKernel.evaluate_all
               ),
                input_identifier,
                len(output_shape),
                code_gen.ndarray_1d_to_literal(output_shape))
        return code
示例#8
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        # super().print_operation_details(tf_op)

        identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)
        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)
        input_shape = tf_utils.np_tensor_shape(inputs[0])

        code = cpp_gen.CodeBlock()

        assignment = base_op.BaseOpKernel.output_assignment(tf_op,
                                                            True,
                                                            assignment=False)
        if assignment[-1] == ';':
            assignment = assignment[:-1]
        assignment = assignment.replace('\n', '')
        code.add_statement(cpp_gen.Statement(str(assignment)))

        code.add_statement(
            cpp_gen.Statement("%s %s_max = std::numeric_limits<%s>::min()" %
                              (type, identifier, type)))

        code.add_statement(cpp_gen.Statement("%s(0) = 0" % identifier))

        if_statement = cpp_gen.IfStatement(
            "%s(%s_it) > %s_max" % (input0_identifier, identifier, identifier))
        if_statement.if_code.add_statement(
            cpp_gen.Statement("%s_max = %s(%s_it)" %
                              (identifier, input0_identifier, identifier)))
        if_statement.if_code.add_statement(
            cpp_gen.Statement("%s(0) = %s_it" % (identifier, identifier)))

        for_loop = cpp_gen.LoopStatement(
            "for", "long %s_it=0; %s_it<%d; ++%s_it" %
            (identifier, identifier, input_shape[0], identifier))
        for_loop.code.add_statement(if_statement)

        code.add_statement(for_loop)

        return code
示例#9
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_verification_to_class(self, class_obj, constructor):
        if self.validation_type == 'Full':
            for op in self.list_operations:
                for out in op.outputs:

                    identifier = code_gen.c_safe_identifier(out.name)
                    shape = tf_utils.np_tensor_shape(out)
                    if len(shape) == 0:
                        shape = [1]
                    type = code_gen.get_c_dtype(out.dtype)

                    inner_template = cpp_gen.TemplateInstance()
                    inner_template.add_element(cpp_gen.TypeDefinition(type))
                    inner_template.add_element(str(len(shape)))
                    inner_template.add_element("Eigen::" + self.data_layout)
                    template = cpp_gen.TemplateInstance()
                    template.add_element(
                        cpp_gen.TypeDefinition('Tensor',
                                               namespace='Eigen',
                                               template=inner_template))
                    tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                         namespace='Eigen',
                                                         template=template)
                    tensor_map_property = cpp_gen.ClassProperty(
                        identifier + "_val", tensor_type)
                    tensor_map_property.access_modifier = "private"
                    class_obj.add(tensor_map_property)
                    lit_suffix = ""
                    if type == "float" or type == "double" or type == "long double":
                        lit_suffix = "Hex"

                    literal_identifier = (class_obj.identifier + "Weights::" +
                                          identifier + "VerificationData" +
                                          lit_suffix)

                    constructor.initialiser_list += [
                        "%s((%s*)%s,%s)" %
                        (identifier + "_val", type, literal_identifier,
                         code_gen.ndarray_1d_to_literal(
                             shape, open='', close=''))
                    ]
示例#10
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # if the second argument is a scalar tensor
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        if len(input1_shape) == 0 or (len(input1_shape) == 1
                                      and input1_shape[0] == 1):
            code = "%s %s / %s.constant(%s(0));" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_identifier,
                input0_identifier,
                input1_identifier)
        else:
            code = "%s %s / %s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    input0_identifier,
                    input1_identifier)

        return code
示例#11
0
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        axis = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[2]))

        # if there is an undefined batch dimension that has been collapsed
        # reduce the axis index by 1
        reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0]))
        if reduced_rank != tf_op.outputs[0].shape.ndims:
            axis -= (tf_op.outputs[0].shape.ndims - reduced_rank)

        code = "%s %s.concatenate(%s, %d);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op),
                input0_identifier,
                input1_identifier,
                axis)

        return code
示例#12
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_parameters_to_methods(self, eval_method, validate_method,
                                  timing_method, class_name):
        parameter_comment = "Input tensors\n"
        for i, input_placeholder in enumerate(self.list_input_placeholders):
            type = code_gen.get_c_dtype(
                input_placeholder.outputs[0].dtype.base_dtype)
            identifier = code_gen.c_safe_identifier(
                input_placeholder.outputs[0].name)
            shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0])
            if len(shape) == 0:
                shape = [1]

            parameter_comment += "[%s] %s %s\n" % (
                type, identifier, str(input_placeholder.outputs[0].shape[1:]))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \
                               "<%s, %d, %s>> %s(%s,%s)" % \
                               (type,
                                len(shape),
                                "Eigen::"+self.data_layout,
                                identifier,
                                identifier+"Param",
                                code_gen.ndarray_1d_to_literal(shape,
                                                               open='',
                                                               close=''))

            val_data_identifier = (class_name + "Weights::" + identifier +
                                   "VerificationDataHex")

            val_tensor_map = (
                "Eigen::TensorMap<Eigen::Tensor"
                "<%s, %d, %s>> %s((%s*)%s,%s)" %
                (type, len(shape), "Eigen::" + self.data_layout, identifier,
                 type, val_data_identifier,
                 code_gen.ndarray_1d_to_literal(shape, open='', close='')))

            comment = None
            if i == 0:
                comment = cpp_gen.Comment("Creating TensorMaps of inputs")

            eval_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))
            timing_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))

            validate_method.code_block.add_statement(
                cpp_gen.Statement(val_tensor_map, comment))

        parameter_comment += "Output tensors\n"
        for out in self.output_tensors:
            type = code_gen.get_c_dtype(out.dtype)
            identifier = code_gen.c_safe_identifier(out.name)
            shape = tf_utils.np_tensor_shape(out)

            parameter_comment += "[%s] %s [%s]\n" % \
                                 (type,
                                  identifier,
                                  code_gen.ndarray_1d_to_literal(shape, open='', close=''))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            # create buffers to hold final output tensors in the validate method which doesn't actually
            # return anything to the calling process
            dummy_param = "%s %s[%d]" % (type, identifier + "Param",
                                         np.prod(shape))
            dummy_param_comment = cpp_gen.Comment("Dummy parameter for output")
            validate_method.code_block.add_statement(
                cpp_gen.Statement(dummy_param, dummy_param_comment))

            # Tag this tensor as an output so that operation kernels will
            # map the output to the given function parameter instead of a block in the memory map.
            # out.tfmin_is_output = True
            if out.op.type == 'Identity':
                out = out.op.inputs[0]
            out.tfmin_output_identifier = identifier + "Param"

        timing_method.parameter_list.add(
            cpp_gen.Parameter('print',
                              cpp_gen.TypeDefinition('bool'),
                              default='true'))
        eval_method.comment.text += parameter_comment
        timing_method.comment.text += parameter_comment
示例#13
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the input tensor sizes match then this is a simple elementwise addition
        # however if one of th tensors is smaller than the other then it will attempt to
        # `broadcast' the smaller tensor upto the size of the larger one
        input0_expression = input0_identifier
        input1_expression = input1_identifier

        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])

        if not np.array_equal(input0_shape, input1_shape):
            # print("Broadcasting needed in Add operation!")

            # print("Old input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            smaller = None
            # if one shape has lower rank than the other then pad the smaller rank
            # with size 1 dimensions
            if input1_shape.size < input0_shape.size:
                smaller = 1
                padding = np.ones(int(input0_shape.size - input1_shape.size),
                                  np.int)
                input1_shape = np.concatenate((padding, input1_shape))
                input1_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input1_shape.size,
                                        code_gen.ndarray_1d_to_literal(input1_shape))
            elif input0_shape.size < input1_shape.size:
                smaller = 0
                padding = np.ones(int(input1_shape.size - input0_shape.size),
                                  np.int)
                input0_shape = np.concatenate((padding, input0_shape))
                input0_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input0_shape.size,
                                        code_gen.ndarray_1d_to_literal(input0_shape))

            # print("New input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            broadcast_multiplier = np.ones(input1_shape.size, dtype=np.int)

            for d in range(input0_shape.size):

                if input0_shape[d] != input1_shape[d]:

                    # check error cases where dimensions are not universally smaller on one side
                    if (smaller == 0 and input0_shape[d] > input1_shape[d]) or\
                            (smaller == 1 and input1_shape[d] > input0_shape[d]):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check error case where dimenions are not equal or one of them is 1
                    if (input0_shape[d] < input1_shape[d] and input0_shape[d] != 1) or \
                            (input1_shape[d] < input0_shape[d] and input1_shape[d] != 1):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check if this dimension defines the smallest tensor
                    if smaller is None and input0_shape[d] < input1_shape[d]:
                        smaller = 0
                    elif smaller is None and input1_shape[d] < input0_shape[d]:
                        smaller = 1

                    # update the broadcast multiplier for this dimension
                    if smaller == 0:
                        broadcast_multiplier[d] = input1_shape[d]
                    else:
                        broadcast_multiplier[d] = input0_shape[d]

            broadcast_expression = ".broadcast(Eigen::array<int, %d>(%s))" % \
                                   (broadcast_multiplier.size,
                                    code_gen.ndarray_1d_to_literal(broadcast_multiplier))

            # update the expression for the smaller tensor
            if smaller == 0:
                input0_expression += broadcast_expression
            elif smaller == 1:
                input1_expression += broadcast_expression

        code = "%s %s + %s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_expression,
                input1_expression)

        return code
示例#14
0
    def output_assignment(tf_op, eval=True, idx=0, assignment=True):
        """ Words."""

        identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name)
        type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype)
        rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx]))
        shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx])
        shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='')

        # -- special case --
        # if the result of this operation is a model output then
        # create a tensor map to the output buffer
        if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'):
            code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \
                    (type,
                     rank,
                     BaseOpKernel.data_layout)
            code += " %s((%s*)%s, %s);" % \
                    (identifier,
                     type,
                     tf_op.outputs[idx].tfmin_output_identifier,
                     shape)

            if assignment:
                code += "\n%s = " % identifier

            return code

        # if this operation needs to be concrete or all ops are being evaluated
        if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed:
            eval = True

        # if evaluate is true then create a concrete tensor or
        # map of the operations result
        if eval:

            if BaseOpKernel.use_memory_map:

                precalculated_offset = None
                if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'):
                    precalculated_offset = tf_op.outputs[
                        idx]._tfmin_memory_offset

                tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \
                                     (type,
                                      precalculated_offset)

                # if no precalculated_offset was found then assume it is
                # safe to use the memory space of the input to this operation.
                # NOTE this will be safe is most cases but this may well explode
                # in some rare cases!! I apologise in advance if this has just
                # happened to you.
                if precalculated_offset is None:
                    input = tf_op.inputs[0]
                    if input.op.type == "Identity":
                        input = input.op.inputs[0]
                    tensor_map_pointer = "%s.data()" % \
                                         code_gen.c_safe_identifier(input.name)

                code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" %
                        (type, rank, BaseOpKernel.data_layout))

                code += " %s(%s, %s);" % \
                        (identifier,
                         tensor_map_pointer,
                         shape)
            else:
                code = "\nEigen::Tensor<%s, %d, %s> %s =" % \
                        (type,
                         rank,
                         data_layout,
                         identifier)

            if assignment:
                code += "\n%s.device(d) =" % identifier

            return code

        # if this operation is not being evaluated then create
        # an auto type so that the Eigen library produces a evaluator
        # object instead of a concrete tensor.
        else:
            code = "\nauto %s = " % identifier

            return code
示例#15
0
    def gen_code(cls, tf_op, inputs):

        # base_op.BaseOpKernel.print_operation_details(tf_op)

        num_split = tf_op.get_attr("num_split")

        # This development version only supports the form where axis is
        # provided by a rank 0 constant operation
        if tf_utils.get_parent_of_tensor(inputs[0]).type != "Const":
            print("Error : Split operation doesn't support computed values "
                  "for axis yet!")
            return "// Error : Couldn't produce split operation with a " \
                   "computed axis dimension."

        # axis is provided by the first input tensor
        axis = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[0]))

        # if there is an undefined batch dimension that has been collapsed
        # reduce the axis index by 1
        reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0]))
        if reduced_rank != tf_op.outputs[0].shape.ndims:
            axis -= (tf_op.outputs[0].shape.ndims - reduced_rank)

        code = ""

        # if num_split is an integer then generate form 1 of this
        # operation where the input tensor is split into
        # num_split tensors, divided evenly along axis
        if type(num_split) is int:

            # verify that the size of dimenions 'axis' is a muliple of num_split
            input_axis_size = tf_utils.np_tensor_shape(inputs[1])[axis]
            if input_axis_size % num_split != 0:
                print("Error : Split operation trying to split dimenson of "
                      "size %d into %d parts, leaves remainder." %
                      (input_axis_size, num_split))
                return "// Error : Couldn't produce split operation where " \
                       "tensor doesn't divide into num_split parts"

            # Calculate the size in 'axis' of each output slice
            size = input_axis_size / num_split

            input1_identifier = code_gen.c_safe_identifier(inputs[1].name)
            rank = len(tf_utils.np_tensor_shape(inputs[1]))

            offset = np.zeros(rank, dtype=int)
            extents = tf_utils.np_tensor_shape(inputs[1])
            extents[axis] = size

            # generate code for each output tensor
            for idx in range(num_split):
                code += base_op.BaseOpKernel.output_assignment(tf_op, idx=idx)

                offset[axis] = idx * size

                code += " %s.slice(Eigen::array<int, %d>(%s), " \
                        "Eigen::array<int, %d>(%s));" % \
                        (input1_identifier,
                         rank,
                         code_gen.ndarray_1d_to_literal(offset),
                         rank,
                         code_gen.ndarray_1d_to_literal(extents)
                         )

        else:  # TODO need to implement this
            code = "// Error Split operation does not currently " \
                   "support arbitrary sized splits"

        return code