示例#1
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)

        type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

        # calculate the number of elements in the input tensor
        input_shape = tf_utils.np_tensor_shape(inputs[0])
        element_count = 1
        for dim in input_shape:
            element_count *= dim

        # generate code to define the output tensor
        code = cpp_gen.CodeBlock()
        code.add_statement(
            cpp_gen.Statement(
                base_op.BaseOpKernel.output_assignment(tf_op,
                                                       eval=True,
                                                       assignment=False)))

        # generate a loop to perform a hyperbolic tan on each element, placing the result in the output tensor
        for_loop = cpp_gen.LoopStatement("for",
                                         "int i=0; i<%d; ++i" % element_count)
        for_loop.code.add_statement(
            cpp_gen.Statement(
                "((%s*)%s.data())[i] = std::tanh(((%s*)%s.data())[i])" %
                (type, output_identifier, type, input0_identifier)))
        code.add_statement(for_loop)

        return code
示例#2
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        # generate source information used to generate MatMul statement
        input0_statement = code_gen.c_safe_identifier(inputs[0].name)
        input1_statement = code_gen.c_safe_identifier(inputs[1].name)
        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])

        # if the inputs include vectors then reshape them to rank 2
        reshaped = False
        if len(input0_shape) == 1:
            input0_statement += ".reshape(Eigen::array<int,2>({1,%d}))" % input0_shape[
                0]
            reshaped = True
        if len(input1_shape) == 1:
            input1_statement += ".reshape(Eigen::array<int,2>({%d,1}))" % input1_shape[
                1]
            reshaped = True

        final_reshape = ""
        if reshaped:
            output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
            final_reshape = ".reshape(Eigen::array<int,1>({%d}))" % output_shape[
                0]

        code = "%s %s.contract(%s, matMulDims)%s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_statement,
                input1_statement,
                final_reshape)
        return code
示例#3
0
文件: beta_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)
        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        filter_identifier = code_gen.c_safe_identifier(inputs[1].name)

        filter_stride = np.array(tf_op.get_attr("strides"))
        row_stride = filter_stride[1]
        col_stride = filter_stride[2]

        code = base_op.BaseOpKernel.output_assignment(tf_op,
                                                      eval=True,
                                                      idx=0,
                                                      assignment=False)

        code += "TFMin::DepthwiseConvFloatTFL::depthwiseConv(" \
                "%s, %s, %s, %d, %d)" % \
                (input_identifier,
                 filter_identifier,
                 output_identifier,
                 col_stride,
                 row_stride
                 )

        return code
示例#4
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input1_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input2_identifier = code_gen.c_safe_identifier(inputs[1].name)

        code = "%s %s.cwiseMin(%s);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input1_identifier,
                input2_identifier)
        return code
示例#5
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)
        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # if the second argument is a scalar tensor
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        if len(input1_shape) == 0 or (len(input1_shape) == 1
                                      and input1_shape[0] == 1):

            input0_shape = tf_utils.np_tensor_shape(inputs[0])
            input0_size = np.prod(input0_shape)
            type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

            code = cpp_gen.CodeBlock()
            target = "%s" % base_op.BaseOpKernel.output_assignment(
                tf_op, True, assignment=False)
            code.add_statement(
                cpp_gen.Statement(target.replace(";", "").replace('\n', '')))

            # determine the type of expression to use. Either a division by the value of
            # a rank zero tensor, a division by a constant or a shift by a constant
            # in the case of power of two denominators

            if inputs[1].op.type == 'Const':
                const_value = tf_utils.get_const_scalar(inputs[1].op)

                if math.log2(const_value).is_integer():
                    expression = ">> %d" % int(math.log2(const_value))
                else:
                    expression = "/ (%s)%f" % (type, const_value)

            else:

                expression = "/ %s(0)" % input1_identifier

            for_loop = cpp_gen.LoopStatement(
                "for", "int i=0; i<%d; ++i" % input0_size)
            for_loop.code.add_statement(
                cpp_gen.Statement(
                    "((%s*)%s.data())[i] = ((%s*)%s.data())[i] %s" %
                    (type, output_identifier, type, input0_identifier,
                     expression)))

            code.add_statement(for_loop)
        else:
            code = "%s %s / %s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    input0_identifier,
                    input1_identifier)

        return code
示例#6
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype)

        param_a_is_scalar = (inputs[0].shape.ndims == 0
                             or (inputs[0].shape.ndims == 1
                                 and inputs[0].shape.dims[0] == 1))
        param_b_is_scalar = (inputs[1].shape.ndims == 0
                             or (inputs[1].shape.ndims == 1
                                 and inputs[1].shape.dims[1] == 1))

        param_a_is_const = tf_utils.operation_is_constant(inputs[0].op)
        param_b_is_const = tf_utils.operation_is_constant(inputs[1].op)

        # if one of the inputs is a constant scalar then implement form 2
        if param_a_is_const and param_a_is_scalar:
            tensor_identifier = code_gen.c_safe_identifier(inputs[1].name)
            const_value = tf_utils.get_const_scalar(
                tf_utils.get_parent_of_tensor(tf_op.inputs[0]))

            return "%s %s * (%s)%s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    tensor_identifier,
                    type,
                    str(const_value))

        if param_b_is_const and param_b_is_scalar:
            tensor_identifier = code_gen.c_safe_identifier(inputs[0].name)
            const_value = tf_utils.get_const_scalar(
                tf_utils.get_parent_of_tensor(tf_op.inputs[1]))

            return "%s %s * (%s)%s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    tensor_identifier,
                    type,
                    str(const_value))

        # if both inputs are either tensors or not constants then generate form 1
        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        code = "%s %s * %s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, eval),
                input0_identifier,
                input1_identifier)
        return code
示例#7
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)

        code = "%s %s.rsqrt();" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_identifier)

        return code
示例#8
0
    def gen_code(cls, tf_op, inputs):

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype)

        code = "%s %s.cwiseMax((%s)0);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input_identifier,
                type)
        return code
示例#9
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_weights_to_class(self, class_obj, constructor):

        # Add stored tensors to properties and constructor initialiser list
        for t in self.list_training_tensors:

            type = code_gen.get_c_dtype(t.dtype.base_dtype)
            rank = max(1, len(tf_utils.np_tensor_shape(t)))

            inner_template = cpp_gen.TemplateInstance()
            inner_template.add_element(cpp_gen.TypeDefinition(type))
            inner_template.add_element(str(rank))
            inner_template.add_element("Eigen::" + self.data_layout)
            template = cpp_gen.TemplateInstance()
            template.add_element(
                cpp_gen.TypeDefinition('Tensor',
                                       namespace='Eigen',
                                       template=inner_template))
            tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                 namespace='Eigen',
                                                 template=template)
            tensor_map_property = cpp_gen.ClassProperty(
                code_gen.c_safe_identifier(t.name), tensor_type)
            tensor_map_property.access_modifier = "private"
            class_obj.add(tensor_map_property)

            # For now just use literal values, TODO add option to load weights from file as well
            literal_name = class_obj.identifier + "Weights::" + \
                           code_gen.c_safe_identifier(t.name) + "Flat"
            if type == "float" or type == "double" or type == "long double":
                literal_name += "Hex"
            shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t),
                                                   open='',
                                                   close='')
            # convert rank zero tensor to rank 1 for eigen
            if shape == '  ':
                shape = ' 1 '

            constructor.initialiser_list += [
                "%s((%s*)%s,%s)" %
                (code_gen.c_safe_identifier(t.name), type, literal_name, shape)
            ]
示例#10
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        # super().print_operation_details(tf_op)

        identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)
        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)
        input_shape = tf_utils.np_tensor_shape(inputs[0])

        code = cpp_gen.CodeBlock()

        assignment = base_op.BaseOpKernel.output_assignment(tf_op,
                                                            True,
                                                            assignment=False)
        if assignment[-1] == ';':
            assignment = assignment[:-1]
        assignment = assignment.replace('\n', '')
        code.add_statement(cpp_gen.Statement(str(assignment)))

        code.add_statement(
            cpp_gen.Statement("%s %s_max = std::numeric_limits<%s>::min()" %
                              (type, identifier, type)))

        code.add_statement(cpp_gen.Statement("%s(0) = 0" % identifier))

        if_statement = cpp_gen.IfStatement(
            "%s(%s_it) > %s_max" % (input0_identifier, identifier, identifier))
        if_statement.if_code.add_statement(
            cpp_gen.Statement("%s_max = %s(%s_it)" %
                              (identifier, input0_identifier, identifier)))
        if_statement.if_code.add_statement(
            cpp_gen.Statement("%s(0) = %s_it" % (identifier, identifier)))

        for_loop = cpp_gen.LoopStatement(
            "for", "long %s_it=0; %s_it<%d; ++%s_it" %
            (identifier, identifier, input_shape[0], identifier))
        for_loop.code.add_statement(if_statement)

        code.add_statement(for_loop)

        return code
示例#11
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # if the second argument is a scalar tensor
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        if len(input1_shape) == 0 or (len(input1_shape) == 1
                                      and input1_shape[0] == 1):
            code = "%s %s / %s.constant(%s(0));" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_identifier,
                input0_identifier,
                input1_identifier)
        else:
            code = "%s %s / %s;" % \
                   (base_op.BaseOpKernel.output_assignment(tf_op, True),
                    input0_identifier,
                    input1_identifier)

        return code
示例#12
0
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        axis = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[2]))

        # if there is an undefined batch dimension that has been collapsed
        # reduce the axis index by 1
        reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0]))
        if reduced_rank != tf_op.outputs[0].shape.ndims:
            axis -= (tf_op.outputs[0].shape.ndims - reduced_rank)

        code = "%s %s.concatenate(%s, %d);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op),
                input0_identifier,
                input1_identifier,
                axis)

        return code
示例#13
0
    def gen_code(cls, tf_op, inputs):

        # super().print_operation_details(tf_op)

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(tf_op.get_attr("DstT"))

        code = "%s %s.cast<%s>();" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, eval=False),
                input_identifier,
                type)
        return code
示例#14
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the bias tensor needs to be cast into the same time as the input
        bias_cast = ""

        # if the bias tensor needs to be broadcast into the same shape as the input
        bias_broadcast = ""
        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        shapes_match = False
        if len(input0_shape) == len(input1_shape):
            shapes_match = True
            for i in range(len(input0_shape)):
                if input0_shape[i] != input1_shape[i]:
                    shapes_match = False
        if not shapes_match:

            broadcast_shape = tf_utils.np_tensor_shape(inputs[0])
            broadcast_shape[len(broadcast_shape) - 1] = 1

            reshape_shape = np.array(([1] * (len(broadcast_shape) - 1)) +
                                     [input1_shape[0]])
            bias_broadcast = "\n    .reshape(Eigen::array<int, %d>(%s))" % \
                             (len(reshape_shape),
                              code_gen.ndarray_1d_to_literal(reshape_shape))
            bias_broadcast += "\n        .broadcast(Eigen::array<int, %d>(%s))" % \
                              (len(broadcast_shape),
                               code_gen.ndarray_1d_to_literal(broadcast_shape))

        code = "%s %s + %s%s%s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, False),
                input0_identifier,
                input1_identifier,
                bias_cast,
                bias_broadcast)
        return code
示例#15
0
    def gen_code(cls, tf_op, inputs):

        output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
        input_identifier = code_gen.c_safe_identifier(inputs[0].name)

        code = "%s %s.reshape(Eigen::array<int, %d>(%s));" % \
               (base_op.BaseOpKernel.output_assignment(
                 tf_op, base_op.BaseOpKernel.evaluate_all
               ),
                input_identifier,
                len(output_shape),
                code_gen.ndarray_1d_to_literal(output_shape))
        return code
示例#16
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)

        dtype_string = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

        code ="%s (%s)1.0 / ((%s)1.0 + ((%s)0.0 - %s).exp())" %\
                (base_op.BaseOpKernel.output_assignment(tf_op, True),
                 dtype_string,
                 dtype_string,
                 dtype_string,
                 input0_identifier)

        return code
示例#17
0
    def gen_code(cls, tf_op, inputs):

        # super().print_operation_details(tf_op)
        alpha = tf_op.get_attr("alpha")

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype)

        code = "%s %s.cwiseMax(%s * (%s)%f);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input_identifier,
                input_identifier,
                type,
                alpha)
        return code
示例#18
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        filter_identifier = code_gen.c_safe_identifier(inputs[1].name)
        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)

        padding = "Eigen::"
        if tf_op.get_attr("padding") == b'SAME':
            padding += "PADDING_SAME"
        elif tf_op.get_attr("padding") == b'VALID':
            padding += "PADDING_VALID"

        filter_stride = np.array(tf_op.get_attr("strides"))
        row_stride = filter_stride[1]
        col_stride = filter_stride[2]

        row_dilation = 1
        col_dilation = 1

        code = base_op.BaseOpKernel.output_assignment(tf_op,
                                                      eval=True,
                                                      idx=0,
                                                      assignment=False)

        code += "TFMin::ConvTFL::conv(%s, %s, %s, %s, %d, %d, %d, %d)" % \
                    (input_identifier,
                     filter_identifier,
                     output_identifier,
                     padding,
                     col_stride,
                     row_stride,
                     col_dilation,
                     row_dilation
                     )

        return code
示例#19
0
    def gen_code(cls, tf_op, inputs):

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype)

        six_constant = 6

        # if this is operating on quantised data then the
        # six_constant will need multplying by the correct power of 2.

        code = "%s %s.cwiseMax((%s)0).cwiseMin((%s)%d);" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input_identifier,
                type,
                type,
                six_constant)
        return code
示例#20
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_verification_to_class(self, class_obj, constructor):
        if self.validation_type == 'Full':
            for op in self.list_operations:
                for out in op.outputs:

                    identifier = code_gen.c_safe_identifier(out.name)
                    shape = tf_utils.np_tensor_shape(out)
                    if len(shape) == 0:
                        shape = [1]
                    type = code_gen.get_c_dtype(out.dtype)

                    inner_template = cpp_gen.TemplateInstance()
                    inner_template.add_element(cpp_gen.TypeDefinition(type))
                    inner_template.add_element(str(len(shape)))
                    inner_template.add_element("Eigen::" + self.data_layout)
                    template = cpp_gen.TemplateInstance()
                    template.add_element(
                        cpp_gen.TypeDefinition('Tensor',
                                               namespace='Eigen',
                                               template=inner_template))
                    tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                         namespace='Eigen',
                                                         template=template)
                    tensor_map_property = cpp_gen.ClassProperty(
                        identifier + "_val", tensor_type)
                    tensor_map_property.access_modifier = "private"
                    class_obj.add(tensor_map_property)
                    lit_suffix = ""
                    if type == "float" or type == "double" or type == "long double":
                        lit_suffix = "Hex"

                    literal_identifier = (class_obj.identifier + "Weights::" +
                                          identifier + "VerificationData" +
                                          lit_suffix)

                    constructor.initialiser_list += [
                        "%s((%s*)%s,%s)" %
                        (identifier + "_val", type, literal_identifier,
                         code_gen.ndarray_1d_to_literal(
                             shape, open='', close=''))
                    ]
示例#21
0
文件: nn_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        type = code_gen.get_c_dtype(inputs[0].dtype.base_dtype)

        code = "\nauto %sExp = %s.exp();" % \
               (input0_identifier,
                input0_identifier)

        code += "\nEigen::Tensor<%s, 0, %s> %sExpSum = %sExp.sum();" % \
                (type,
                 base_op.BaseOpKernel.data_layout,
                 input0_identifier,
                 input0_identifier)

        code += "%s %sExp / %sExp.constant(%sExpSum(0));" % \
                (base_op.BaseOpKernel.output_assignment(tf_op, True),
                 input0_identifier,
                 input0_identifier,
                 input0_identifier)
        return code
示例#22
0
    def gen_code(cls, tf_op, inputs):

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)

        begin_type = tf_utils.get_parent_of_tensor(inputs[1]).type
        if begin_type != "Const":
            print("Error generating 'Slice' Operation: op_kernel only "
                  "supports Constant begin tensors.")
            return "// Error cannot generate Slice operation with " \
                   "non-const begin tensor!"

        size_type = tf_utils.get_parent_of_tensor(inputs[2]).type
        if size_type != "Const":
            print("Error generating 'Slice' Operation: op_kernel only "
                  "supports Constant size tensors.")
            return "// Error cannot generate Slice operation with " \
                   "non-const size tensor!"

        begin = tf_utils.get_const_tensor(
            tf_utils.get_parent_of_tensor(inputs[1]))
        size = tf_utils.get_const_tensor(
            tf_utils.get_parent_of_tensor(inputs[2]))

        # if -1 was given for any size dimensions then set them to the size
        # required to fill the remainder of the input
        for si in range(len(size)):
            if size[si] == -1:
                size[si] = inputs[0].dim_size(si) - begin[si]

        code = "%s %s.slice(Eigen::array<int, 2>(%s), " \
               "Eigen::array<int, 2>({%s}));" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input_identifier,
                code_gen.ndarray_1d_to_literal(begin),
                code_gen.ndarray_1d_to_literal(size))

        # print("Slice operation looks like this. . .")
        # super().print_operation_details(tf_op)

        return code
示例#23
0
    def gen_code(cls, tf_op, inputs):

        # output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
        output_identifier = code_gen.c_safe_identifier(tf_op.outputs[0].name)

        # print("Fill operation looks like this. . .")
        # super().print_operation_details(tf_op)

        type = code_gen.get_c_dtype(tf_op.outputs[0].dtype.base_dtype)
        constant_value = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[1]))

        code = cpp_gen.CodeBlock()
        code.add_statement(
            cpp_gen.Statement(
                base_op.BaseOpKernel.output_assignment(tf_op,
                                                       eval=True,
                                                       assignment=False)))

        code.add_statement(
            cpp_gen.Statement("%s.setConstant((%s)%f)" %
                              (output_identifier, type, constant_value)))
        return code
示例#24
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_memory_trace(self, model_class, constructor):
        """
        add_memory_trace method, adds the properties, calls and template
        instantiations required to run this model attached to the memory
        tracer utility and analyse it's memory use pattern.
        :param model_class:
        :param constructor:
        :return:
        """

        # make the memory block pointer public
        model_class.element_by_identifier('memoryBlock').\
          access_modifier = "public"

        # add safe write space incase the calling process doesn't initialize
        # the event location pointers
        safe_write_space = cpp_gen.ClassProperty(
            'safeWriteSpace', type=cpp_gen.TypeDefinition('int'))
        safe_write_space.comment = cpp_gen.Comment(
            "Default location for event trace writes.")
        safe_write_space.access_modifier = 'private'
        model_class.add(safe_write_space)

        # add vector of trace pointers and operation names to class
        trace_events = cpp_gen.ClassProperty(
            'traceEvents',
            type=cpp_gen.TypeDefinition('TFMin::MemoryTraceEvents'))
        model_class.add(trace_events)

        # add vector of memory areas to class
        memory_areas = cpp_gen.ClassProperty(
            'memoryAreas',
            type=cpp_gen.TypeDefinition('TFMin::MemoryTraceAreas'))
        model_class.add(memory_areas)

        # populate events in class constructor
        for op in self.list_operations:
            constructor.code_block.add_statement(
                cpp_gen.Statement(
                    "traceEvents.push_back(TFMin::MemoryTraceEvent"
                    "(\"%s\", &safeWriteSpace))" % op.name))

        # populate memore areas in class constructor
        for area in self.allocated_memory_areas:
            constructor.code_block.add_statement(
                cpp_gen.Statement(
                    "memoryAreas.push_back(TFMin::MemoryTraceArea"
                    "(%d, %d, \"%s\", \"%s\"))" %
                    (area['offset'], area['size'], area['start_op'],
                     area['end_op'])))

        # add memory map size property
        memory_map_size = cpp_gen.ClassProperty(
            'memoryMapSize', type=cpp_gen.TypeDefinition('unsigned long'))
        model_class.add(memory_map_size)
        constructor.code_block.add_statement(
            cpp_gen.Statement("memoryMapSize = %d" % self.memory_map_size))

        # add event trace pointers to each operation
        for op in self.list_operations:
            identifier = code_gen.c_safe_identifier(op.name) + '_TraceEvent'
            trace_pointer = cpp_gen.ClassProperty(identifier,
                                                  type=cpp_gen.TypeDefinition(
                                                      'int',
                                                      volatile=True,
                                                      ptr_levels=1))
            trace_pointer.access_modifier = 'public'
            model_class.add(trace_pointer)

            constructor.code_block.add_statement(
                cpp_gen.Statement("%s = &safeWriteSpace" % identifier))

        # add 'Eigen::MemPreallocDevice' explicit instantiation to all
        # evaluation
        # explc_inst_pre_device = cpp_gen.TemplateInstance()
        # explc_inst_pre_thread_device = cpp_gen.TemplateInstance()
        # explc_inst_pre_device.add_element(cpp_gen.TypeDefinition('Eigen::MemPreallocDevice'))
        # explc_inst_pre_thread_device.add_element(cpp_gen.TypeDefinition('Eigen::ThreadPoolDevice'))

        # additional_explc_instationations = [explc_inst_pre_device,
        #                                     explc_inst_pre_thread_device]
        """eval_method = model_class.element_by_identifier("eval")
示例#25
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_parameters_to_methods(self, eval_method, validate_method,
                                  timing_method, class_name):
        parameter_comment = "Input tensors\n"
        for i, input_placeholder in enumerate(self.list_input_placeholders):
            type = code_gen.get_c_dtype(
                input_placeholder.outputs[0].dtype.base_dtype)
            identifier = code_gen.c_safe_identifier(
                input_placeholder.outputs[0].name)
            shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0])
            if len(shape) == 0:
                shape = [1]

            parameter_comment += "[%s] %s %s\n" % (
                type, identifier, str(input_placeholder.outputs[0].shape[1:]))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \
                               "<%s, %d, %s>> %s(%s,%s)" % \
                               (type,
                                len(shape),
                                "Eigen::"+self.data_layout,
                                identifier,
                                identifier+"Param",
                                code_gen.ndarray_1d_to_literal(shape,
                                                               open='',
                                                               close=''))

            val_data_identifier = (class_name + "Weights::" + identifier +
                                   "VerificationDataHex")

            val_tensor_map = (
                "Eigen::TensorMap<Eigen::Tensor"
                "<%s, %d, %s>> %s((%s*)%s,%s)" %
                (type, len(shape), "Eigen::" + self.data_layout, identifier,
                 type, val_data_identifier,
                 code_gen.ndarray_1d_to_literal(shape, open='', close='')))

            comment = None
            if i == 0:
                comment = cpp_gen.Comment("Creating TensorMaps of inputs")

            eval_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))
            timing_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))

            validate_method.code_block.add_statement(
                cpp_gen.Statement(val_tensor_map, comment))

        parameter_comment += "Output tensors\n"
        for out in self.output_tensors:
            type = code_gen.get_c_dtype(out.dtype)
            identifier = code_gen.c_safe_identifier(out.name)
            shape = tf_utils.np_tensor_shape(out)

            parameter_comment += "[%s] %s [%s]\n" % \
                                 (type,
                                  identifier,
                                  code_gen.ndarray_1d_to_literal(shape, open='', close=''))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            # create buffers to hold final output tensors in the validate method which doesn't actually
            # return anything to the calling process
            dummy_param = "%s %s[%d]" % (type, identifier + "Param",
                                         np.prod(shape))
            dummy_param_comment = cpp_gen.Comment("Dummy parameter for output")
            validate_method.code_block.add_statement(
                cpp_gen.Statement(dummy_param, dummy_param_comment))

            # Tag this tensor as an output so that operation kernels will
            # map the output to the given function parameter instead of a block in the memory map.
            # out.tfmin_is_output = True
            if out.op.type == 'Identity':
                out = out.op.inputs[0]
            out.tfmin_output_identifier = identifier + "Param"

        timing_method.parameter_list.add(
            cpp_gen.Parameter('print',
                              cpp_gen.TypeDefinition('bool'),
                              default='true'))
        eval_method.comment.text += parameter_comment
        timing_method.comment.text += parameter_comment
示例#26
0
文件: exporter.py 项目: VoltaAI/TFMin
    def write_data_header(self,
                          file_name,
                          class_name,
                          validation_type='Full',
                          validation_inputs=None):

        # write model training data file
        with open(file_name, "w") as data_file:

            # write file header
            data_file.write("#ifndef __%s_WEIGHTS_H__\n" % class_name.upper())
            data_file.write("#define __%s_WEIGHTS_H__\n" % class_name.upper())
            data_file.write("//" + "-" * 80 + "\n")
            data_file.write("// Training data literal declarations.\n")
            data_file.write("// Generated by TFMin, do not edit.\n")
            data_file.write("//" + "-" * 80 + "\n")

            data_order = 'F'
            if self.data_layout == 'RowMajor':
                data_order = 'C'

            export_data = True  # Debugging aid to generate data header
            #                     without MBs of text so it is unloadable!

            data_file.write("namespace %sWeights\n{\n\n" % class_name)

            # evaluate and write model weights
            for tensor in self.list_training_tensors:
                # write flat version
                [var_value] = self.sess.run([tensor], {})

                identifier = code_gen.c_safe_identifier(tensor.name) + "Flat"
                flat_tensor_values = var_value.reshape(var_value.size,
                                                       order=data_order)

                tf_utils.write_numpy_array_c(data_file, "    " + identifier,
                                             flat_tensor_values, export_data)

            # if required add verification data
            if validation_type == "Full":

                # self.list_verification_tensors = self.output_tensors
                for op in self.list_operations:
                    for tensor in op.outputs:
                        self.list_verification_tensors += [tensor]

                for tensor in self.list_verification_tensors:

                    [verification_value] = self.sess.run([tensor],
                                                         validation_inputs)
                    identifier = (code_gen.c_safe_identifier(tensor.name) +
                                  "VerificationData")
                    flat_tensor_values = verification_value.reshape(
                        np.prod(verification_value.shape), order=data_order)
                    tf_utils.write_numpy_array_c(data_file,
                                                 "    " + identifier,
                                                 flat_tensor_values,
                                                 export_data)

            data_file.write("}\n\n")

            data_file.write("#endif  // __%s_WEIGHTS_H__\n" %
                            class_name.upper())
示例#27
0
    def gen_code(cls, tf_op, inputs):

        # base_op.BaseOpKernel.print_operation_details(tf_op)

        num_split = tf_op.get_attr("num_split")

        # This development version only supports the form where axis is
        # provided by a rank 0 constant operation
        if tf_utils.get_parent_of_tensor(inputs[0]).type != "Const":
            print("Error : Split operation doesn't support computed values "
                  "for axis yet!")
            return "// Error : Couldn't produce split operation with a " \
                   "computed axis dimension."

        # axis is provided by the first input tensor
        axis = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[0]))

        # if there is an undefined batch dimension that has been collapsed
        # reduce the axis index by 1
        reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0]))
        if reduced_rank != tf_op.outputs[0].shape.ndims:
            axis -= (tf_op.outputs[0].shape.ndims - reduced_rank)

        code = ""

        # if num_split is an integer then generate form 1 of this
        # operation where the input tensor is split into
        # num_split tensors, divided evenly along axis
        if type(num_split) is int:

            # verify that the size of dimenions 'axis' is a muliple of num_split
            input_axis_size = tf_utils.np_tensor_shape(inputs[1])[axis]
            if input_axis_size % num_split != 0:
                print("Error : Split operation trying to split dimenson of "
                      "size %d into %d parts, leaves remainder." %
                      (input_axis_size, num_split))
                return "// Error : Couldn't produce split operation where " \
                       "tensor doesn't divide into num_split parts"

            # Calculate the size in 'axis' of each output slice
            size = input_axis_size / num_split

            input1_identifier = code_gen.c_safe_identifier(inputs[1].name)
            rank = len(tf_utils.np_tensor_shape(inputs[1]))

            offset = np.zeros(rank, dtype=int)
            extents = tf_utils.np_tensor_shape(inputs[1])
            extents[axis] = size

            # generate code for each output tensor
            for idx in range(num_split):
                code += base_op.BaseOpKernel.output_assignment(tf_op, idx=idx)

                offset[axis] = idx * size

                code += " %s.slice(Eigen::array<int, %d>(%s), " \
                        "Eigen::array<int, %d>(%s));" % \
                        (input1_identifier,
                         rank,
                         code_gen.ndarray_1d_to_literal(offset),
                         rank,
                         code_gen.ndarray_1d_to_literal(extents)
                         )

        else:  # TODO need to implement this
            code = "// Error Split operation does not currently " \
                   "support arbitrary sized splits"

        return code
示例#28
0
文件: math_ops.py 项目: VoltaAI/TFMin
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the input tensor sizes match then this is a simple elementwise addition
        # however if one of th tensors is smaller than the other then it will attempt to
        # `broadcast' the smaller tensor upto the size of the larger one
        input0_expression = input0_identifier
        input1_expression = input1_identifier

        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])

        if not np.array_equal(input0_shape, input1_shape):
            # print("Broadcasting needed in Add operation!")

            # print("Old input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            smaller = None
            # if one shape has lower rank than the other then pad the smaller rank
            # with size 1 dimensions
            if input1_shape.size < input0_shape.size:
                smaller = 1
                padding = np.ones(int(input0_shape.size - input1_shape.size),
                                  np.int)
                input1_shape = np.concatenate((padding, input1_shape))
                input1_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input1_shape.size,
                                        code_gen.ndarray_1d_to_literal(input1_shape))
            elif input0_shape.size < input1_shape.size:
                smaller = 0
                padding = np.ones(int(input1_shape.size - input0_shape.size),
                                  np.int)
                input0_shape = np.concatenate((padding, input0_shape))
                input0_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input0_shape.size,
                                        code_gen.ndarray_1d_to_literal(input0_shape))

            # print("New input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            broadcast_multiplier = np.ones(input1_shape.size, dtype=np.int)

            for d in range(input0_shape.size):

                if input0_shape[d] != input1_shape[d]:

                    # check error cases where dimensions are not universally smaller on one side
                    if (smaller == 0 and input0_shape[d] > input1_shape[d]) or\
                            (smaller == 1 and input1_shape[d] > input0_shape[d]):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check error case where dimenions are not equal or one of them is 1
                    if (input0_shape[d] < input1_shape[d] and input0_shape[d] != 1) or \
                            (input1_shape[d] < input0_shape[d] and input1_shape[d] != 1):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check if this dimension defines the smallest tensor
                    if smaller is None and input0_shape[d] < input1_shape[d]:
                        smaller = 0
                    elif smaller is None and input1_shape[d] < input0_shape[d]:
                        smaller = 1

                    # update the broadcast multiplier for this dimension
                    if smaller == 0:
                        broadcast_multiplier[d] = input1_shape[d]
                    else:
                        broadcast_multiplier[d] = input0_shape[d]

            broadcast_expression = ".broadcast(Eigen::array<int, %d>(%s))" % \
                                   (broadcast_multiplier.size,
                                    code_gen.ndarray_1d_to_literal(broadcast_multiplier))

            # update the expression for the smaller tensor
            if smaller == 0:
                input0_expression += broadcast_expression
            elif smaller == 1:
                input1_expression += broadcast_expression

        code = "%s %s + %s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_expression,
                input1_expression)

        return code
示例#29
0
文件: exporter.py 项目: VoltaAI/TFMin
    def add_operations_to_method(self, method, type='eval'):

        if type == 'validate':
            base_op.BaseOpKernel.evaluate_all = True

        if type == 'timing':
            comment = cpp_gen.Comment("Timing working variables")
            method.code_block.add_statement(
                cpp_gen.Statement(self.lib_namespace + "TimingResult result",
                                  comment=comment))
            method.code_block.add_statement(cpp_gen.Statement("float start"))

        # Add operations, including validation and timing as required
        for idx, op in enumerate(self.list_operations):

            operation_code = cpp_gen.CodeBlock()

            if type == 'timing':
                operation_code.add_statement(
                    cpp_gen.Statement("start = getTime()"))

                print_if_statement = cpp_gen.IfStatement("print")
                print_if_statement.if_code.add_statement(
                    cpp_gen.Statement(
                        "std::cout << \"Starting %s [%s]\" << std::endl" %
                        (op.name, op.type)))
                operation_code.add_statement(print_if_statement)

            if type == 'validate':
                operation_code.add_statement(
                    cpp_gen.Statement("std::cout << \"About to perform "
                                      "%s operation [%s]\\n\"" %
                                      (op.type, op.name)))

            if self.export_memory_trace:

                first_parameter = self.list_input_placeholders[0]
                source_identifier = code_gen.c_safe_identifier(
                    first_parameter.outputs[0].name)
                operation_code.add_statement(
                    cpp_gen.Statement("*(traceEvents[%d].addr) = "
                                      "*(int*)%s.data();" %
                                      (idx, source_identifier)))
                #identifier = code_gen.c_safe_identifier(op.name) + '_TraceEvent'
                #first_parameter = self.list_input_placeholders[0]
                #source_identifier = code_gen.c_safe_identifier(first_parameter.outputs[0].name)

                #operation_code.add_statement(
                #  cpp_gen.Statement("*(%s) = *(int*)%s.data()" %
                #                    (identifier,
                #                     source_identifier))
                #)

            # Find op_kernel for this operation type and generate code
            k = op_kernel_loader.find_op_kernel(op)
            if k is not None:
                op_code = k.generate(op)

                if isinstance(op_code, cpp_gen.CodeBlock):
                    operation_code.add_block(op_code)
                else:
                    op_statements = op_code.split(";")

                    for s in op_statements:
                        if s.strip() != "":
                            operation_code.add_statement(
                                cpp_gen.Statement(s.strip()))

            if self.export_memory_trace:

                # identifier = code_gen.c_safe_identifier(op.name)+'_TraceEvent'

                operation_code.add_statement(
                    cpp_gen.Statement("std::cout << *(traceEvents[%d].addr)"
                                      " << std::endl" % idx))

            if type == 'timing':
                operation_code.add_statement(
                    cpp_gen.Statement("result.push_back(TFMin::OperationTime("
                                      "\"%s\", getTime()-start))" % op.name))

                print_if_statement = cpp_gen.IfStatement("print")
                print_if_statement.if_code.add_statement(
                    cpp_gen.Statement("std::cout << \"Completed %s [%s]"
                                      "operation\" << std::endl" %
                                      (op.name, op.type)))
                operation_code.add_statement(print_if_statement)

            if type == 'validate':
                for out in op.outputs:
                    identifier = code_gen.c_safe_identifier(out.name)
                    val_if = cpp_gen.IfStatement("!tensorsApproximatelyEqual("
                                                 "%s, %s_val, true)" %
                                                 (identifier, identifier))
                    val_if.if_code.add_statement(
                        cpp_gen.Statement(
                            "std::cout << \"Validation failed at "
                            "operation [%s]\\n\"" % identifier))
                    val_if.if_code.add_statement(
                        cpp_gen.Statement("return false"))
                    operation_code.add_statement(val_if)

            op_comment = cpp_gen.Comment("Generated %s [%s] operation." %
                                         (op.name, op.type),
                                         style='//')
            operation_code.statements[0].comment = op_comment
            method.code_block.add_block(operation_code)

        if type == 'timing':

            if_print = cpp_gen.IfStatement("print")
            if_print.if_code.add_statement(
                cpp_gen.Statement("printTiming(result)"))
            method.code_block.add_statement(if_print)

            method.code_block.add_statement(cpp_gen.Statement("return result"))

        if type == 'validate':
            method.code_block.add_statement(cpp_gen.Statement("return true"))
            base_op.BaseOpKernel.evaluate_all = False
示例#30
0
    def output_assignment(tf_op, eval=True, idx=0, assignment=True):
        """ Words."""

        identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name)
        type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype)
        rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx]))
        shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx])
        shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='')

        # -- special case --
        # if the result of this operation is a model output then
        # create a tensor map to the output buffer
        if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'):
            code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \
                    (type,
                     rank,
                     BaseOpKernel.data_layout)
            code += " %s((%s*)%s, %s);" % \
                    (identifier,
                     type,
                     tf_op.outputs[idx].tfmin_output_identifier,
                     shape)

            if assignment:
                code += "\n%s = " % identifier

            return code

        # if this operation needs to be concrete or all ops are being evaluated
        if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed:
            eval = True

        # if evaluate is true then create a concrete tensor or
        # map of the operations result
        if eval:

            if BaseOpKernel.use_memory_map:

                precalculated_offset = None
                if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'):
                    precalculated_offset = tf_op.outputs[
                        idx]._tfmin_memory_offset

                tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \
                                     (type,
                                      precalculated_offset)

                # if no precalculated_offset was found then assume it is
                # safe to use the memory space of the input to this operation.
                # NOTE this will be safe is most cases but this may well explode
                # in some rare cases!! I apologise in advance if this has just
                # happened to you.
                if precalculated_offset is None:
                    input = tf_op.inputs[0]
                    if input.op.type == "Identity":
                        input = input.op.inputs[0]
                    tensor_map_pointer = "%s.data()" % \
                                         code_gen.c_safe_identifier(input.name)

                code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" %
                        (type, rank, BaseOpKernel.data_layout))

                code += " %s(%s, %s);" % \
                        (identifier,
                         tensor_map_pointer,
                         shape)
            else:
                code = "\nEigen::Tensor<%s, %d, %s> %s =" % \
                        (type,
                         rank,
                         data_layout,
                         identifier)

            if assignment:
                code += "\n%s.device(d) =" % identifier

            return code

        # if this operation is not being evaluated then create
        # an auto type so that the Eigen library produces a evaluator
        # object instead of a concrete tensor.
        else:
            code = "\nauto %s = " % identifier

            return code