Exemplo n.º 1
0
class TensorflowEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16 : "tf.float16",
        graph_pb2.DT_FLOAT32 : "tf.float32",
        graph_pb2.DT_FLOAT64 : "tf.float64",
        graph_pb2.DT_INT16 : "tf.int16",
        graph_pb2.DT_INT32 : "tf.int32",
        graph_pb2.DT_INT64 : "tf.int64",
        graph_pb2.DT_UINT8 : "tf.uint8",
        graph_pb2.DT_UINT16 : "tf.uint16"
    }


    @property
    def header_code(self):
        return """import tensorflow as tf

__weights_dict = dict()

is_train = {}

def load_weights(weight_file):
    import numpy as np

    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)
""".format(self.trainable)


    def __init__(self, model):
        super(TensorflowEmitter, self).__init__()

        from six import string_types as _string_types
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(TensorflowEmitter, self)._build()


    def gen_code(self, phase):
        self.trainable = (phase == 'train')
        self.add_body(0, self.header_code)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("TensorflowEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(1, "return {}, {}".format(
            ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers]),
            ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.body_code


    @staticmethod
    def _shapeToStr(shapes):
        ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim]
        return ', '.join('%s' % i for i in ret)


    def emit_Conv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1])
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(1, "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')".format(
            IR_node.variable_name,
            input_node,
            IR_node.get_attr('group', 1),
            strides_str,
            padding,
            IR_node.name))


    def _defuse_padding(self, IR_node, extra_str=""):
        auto_pad = IR_node.get_attr('auto_pad')
        if auto_pad:
            input_node = self.parent_variable_name(IR_node)
            if auto_pad == 'VALID':
                padding = 'VALID'
            elif auto_pad.startswith("SAME"):
                padding = 'SAME'
            else:
                raise ValueError("Unknown padding type [{}].".format(auto_pad))

            return input_node, padding

        else:
            padding = IR_node.get_attr("pads")
            padding = convert_onnx_pad_to_tf(padding)
            if is_valid_padding(padding) == False:
                input_node = IR_node.variable_name + '_pad'
                self.add_body(1, "{:<15} = tf.pad({}, paddings = {}{})".format(
                    input_node,
                    self.parent_variable_name(IR_node),
                    padding,
                    extra_str
                    ))
            else:
                input_node = self.parent_variable_name(IR_node)

            return input_node, 'VALID'


    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            op = 'max_pool'
            padding_const = ", constant_values=float('-Inf')"
        elif pooling_type == 'AVG':
            op = 'avg_pool'
            padding_const = ""
        else:
            raise ValueError("unknown pooling type [{}].".format(pooling_type))

        arrlen = len(IR_node.get_attr('strides'))
        dim_str = '3d' if arrlen == 5 else ""

        if IR_node.layer.attr['global_pooling'].b:
            self.add_body(1, "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')".format(
                IR_node.variable_name,
                op,
                dim_str,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node),
                arrlen,
                IR_node.name))

        else:
            kernel_shape_str = ', '.join('%s' % i for i in IR_node.get_attr('kernel_shape'))
            strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides'))

            input_node, padding = self._defuse_padding(IR_node, padding_const)

            self.add_body(1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format(
                IR_node.variable_name,
                op,
                dim_str,
                input_node,
                kernel_shape_str,
                strides_str,
                padding,
                IR_node.name))


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)


    def emit_DataInput(self, IR_node):
        assert not IR_node.in_edges
        shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape)

        if 'dtype' in IR_node.layer.attr:
            dtype_str = "{}, ".format(self.dtype_map[IR_node.layer.attr['dtype'].type])
        else:
            dtype_str = "tf.float32,"

        code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format(
            IR_node.variable_name, dtype_str, shape_str, IR_node.name
        )

        self.add_body(1, code)


    def emit_Dropout(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        if self.trainable:
            self.add_body(1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format(
                IR_node.variable_name,
                IR_node.name,
                1 - IR_node.IR_layer.attr["keep_prob"].f,
                parent.real_variable_name))
        else:
            IR_node.real_name = parent.real_name


    def emit_FullyConnected(self, IR_node):
        if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]:
            kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format(IR_node.name)
        else: kernel_str = ""

        if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[IR_node.name]:
            bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format(IR_node.name)
        else: bias_str = ""

        code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.layer.attr['units'].i,
            kernel_str,
            bias_str,
            IR_node.layer.attr['use_bias'].b)
        self.add_body(1, code)


    def emit_Flatten(self, IR_node):
        #self._emit_unary_operation(IR_node, "contrib.layers.flatten")
        self.add_body(1, "{:<15} = tf.contrib.layers.flatten({})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node)))


    def emit_Reshape(self, IR_node):
        self.add_body(1, "{:<15} = tf.reshape({}, [{}], '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            ', '.join('%s' % i for i in IR_node.get_attr('shape')),
            IR_node.name))


    def _emit_unary_operation(self, IR_node, op_name):
        self.add_body(1, "{:<15} = tf.{}({}, name = '{}')".format(
            IR_node.variable_name,
            op_name,
            self.parent_variable_name(IR_node),
            IR_node.name))


    def emit_Tanh(self, IR_node):
        self._emit_unary_operation(IR_node, 'tanh')

    def emit_Elu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.elu')


    def emit_Relu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.relu')


    def emit_Relu6(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.relu6')


    def emit_CRelu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.crelu')


    def emit_Softmax(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.softmax')


    def emit_Sigmoid(self, IR_node):
        self._emit_unary_operation(IR_node, 'sigmoid')


    def emit_Embedding(self, IR_node):
        raise NotImplementedError()
        ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
                IR_node.name,
                IR_node.IR_layer.attr['input_dim'].i,
                IR_node.IR_layer.attr['output_dim'].i,
                IR_node.IR_layer.attr['mask_zero'].b,
                IR_node.in_edges[0])

        return ret


    def emit_RNNs(self, IR_node, func):
        assert False


    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")


    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")


    def emit_Add(self, IR_node):
        self.add_body(1, "{:<15} = {}".format(
            IR_node.variable_name,
            ' +'.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges)))


    def emit_Concat(self, IR_node):
        self.add_body(1, "{:<15} = tf.concat([{}], {}, name = '{}')".format(
            IR_node.variable_name,
            ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges),
            IR_node.layer.attr['axis'].i,
            IR_node.name))


    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(1, "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('epsilon'),
            IR_node.name))


    def emit_Pad(self, IR_node):
        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)

        mode = IR_node.get_attr('mode', 'constant')
        if mode == 'constant' or mode == 'reflect':
            mode = mode.upper()
        elif mode == 'edge':
            mode = 'SYMMETRIC'
        else:
            raise NotImplementedError("Not support padding mode {}.".format(mode))

        self.add_body(1, "{:<15} = tf.pad({}, {}, '{}', name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            padding,
            mode,
            IR_node.variable_name))


    def emit_Squeeze(self, IR_node):
        self.add_body(1, "{:<15} = tf.squeeze({}, [{}], name = '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            ', '.join('%s' % axis for axis in IR_node.layer.attr['axes'].list.i),
            IR_node.name))


    def emit_ReduceMean(self, IR_node):
        self.add_body(1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            ','.join('%s' % i for i in IR_node.get_attr('axes')),
            IR_node.get_attr('keepdims'),
            IR_node.name))


    def emit_LRN(self, IR_node):
        self.add_body(1, "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('size') - 1,
            IR_node.layer.attr['alpha'].f / (IR_node.layer.attr['size'].i * 2 - 1),
            IR_node.get_attr('beta'),
            IR_node.name))


    def emit_SeparableConv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides'))
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(1, "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format(
            IR_node.variable_name,
            input_node,
            strides_str,
            padding,
            IR_node.name))


    def emit_DepthwiseConv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i)
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(1, "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format(
            IR_node.variable_name,
            input_node,
            strides_str,
            padding,
            IR_node.name))


    def _layer_Conv(self):
        self.add_body(0, """
def convolution(input, name, group, **kwargs):
    w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight")
    if group == 1:
        layer = tf.nn.convolution(input, w, **kwargs)
    else:
        weight_groups = tf.split(w, num_or_size_splits=group, axis=-1)
        xs = tf.split(input, num_or_size_splits=group, axis=-1)
        convolved = [tf.nn.convolution(x, weight, **kwargs) for
                    (x, weight) in zip(xs, weight_groups)]
        layer = tf.concat(convolved, axis=-1)

    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias")
        layer = layer + b
    return layer""")


    def _layer_BatchNorm(self):
        self.add_body(0, """
def batch_normalization(input, name, **kwargs):
    mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train)
    variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train)
    offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None
    scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None
    return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs)
""")


    def _layer_SeparableConv(self):
        self.add_body(0, """
def separable_convolution(input, name, **kwargs):
    depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df")
    pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf")
    layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs)
    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias")
        layer = layer + b
    return layer""")


    def _layer_DepthwiseConv(self):
        self.add_body(0, """
def depthwise_convolution(input, name, **kwargs):
    depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df")
    layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs)
    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias")
        layer = layer + b
    return layer""")
Exemplo n.º 2
0
class CoreMLEmitter(Emitter):

    def __init__(self, architecture, weight):
        super(CoreMLEmitter, self).__init__()
        if os.path.exists(architecture) == False:
            raise ValueError("IR architecture file [{}] is not found.".format(architecture))
        else:
            self.IR_graph = IRGraph(architecture)
            self.IR_graph.build()

        if os.path.exists(weight) == False:
            raise ValueError("IR weight file [{}] is not found.".format(weight))
        else:
            self._load_weights(weight)


    def _get_inout(self):
        input_features = []
        output_features = []
        for input_node in self.IR_graph.input_layers:
            shape = shape_to_list(self.IR_graph.get_node(input_node).get_attr('shape'))
            shape = _infer_coreml_input_shape(shape)
            input_features.append((str(input_node), shape))
            print("CoreML Model Input Layer: [{}] {}".format(input_node, shape))

        for output_node in self.IR_graph.output_layers:
            node = self.IR_graph.get_node(output_node)
            node.out_edges.append(node.name)
            shape = node.get_attr('_output_shapes')
            if shape:
                shape = shape_to_list(shape[0])
            else:
                shape = [1]


            if shape == []:
                pre_output_node = self.IR_graph.get_node(node.in_edges[0])
                pre_output_node.out_edges.append(pre_output_node.name)
                shape = pre_output_node.get_attr('_output_shapes')
                shape = shape_to_list(shape[0])
            # else:
            shape = _infer_coreml_input_shape(shape)


            output_features.append((str(node.in_edges[0]), shape))
            print("CoreML Model Output Layer: [{}] {}".format(output_node, shape))

        return list(input_features), list(output_features)

    def _connect_coreml_layers(self):
        for layer in self.builder.nn_spec.layers:
            # for i, in_node in enumerate(layer.input):
            #     layer.input[i] = self.IR_graph.get_node(in_node).real_name

            for i, out_node in enumerate(layer.output):
                layer.output[i] = self.IR_graph.get_node(out_node).real_name

    def gen_model(self,
                  input_names=None,
                  output_names=None,
                  image_input_names=None,
                  is_bgr=False,
                  red_bias=0.0,
                  green_bias=0.0,
                  blue_bias=0.0,
                  gray_bias=0.0,
                  image_scale=1.0,
                  class_labels=None,
                  predicted_feature_name=None,
                  predicted_probabilities_output=''):

        input_features, output_features = self._get_inout()
        # assert False
        is_classifier = class_labels is not None
        mode = 'classifier' if is_classifier else None
        self.builder = _NeuralNetworkBuilder(input_features, output_features, mode=mode)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            print("Converting layer {}({})".format(current_node.name, current_node.type))
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CoreMLEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)
                assert False

        # self._connect_coreml_layers()
        # Add classifier classes (if applicable)
        if is_classifier:
            classes_in = class_labels
            if isinstance(classes_in, _string_types):
                if not os.path.isfile(classes_in):
                    raise ValueError("Path to class labels [{}] does not exist.".format(classes_in))
                with open(classes_in, 'r') as f:
                    classes = f.read()
                classes = classes.splitlines()
            elif type(classes_in) is list: # list[int or str]
                classes = classes_in
            else:
                raise ValueError('Class labels must be a list of integers / strings, or a file path')

            if predicted_feature_name is not None:
                self.builder.set_class_labels(classes, predicted_feature_name = predicted_feature_name,
                    prediction_blob = predicted_probabilities_output)
            else:
                self.builder.set_class_labels(classes)

        # Set pre-processing paramsters
        self.builder.set_pre_processing_parameters(
            image_input_names=[input_features[0][0]],
            #image_input_names,
            is_bgr=is_bgr,
            red_bias=red_bias,
            green_bias=green_bias,
            blue_bias=blue_bias,
            gray_bias=gray_bias,
            image_scale=image_scale)

        # Return the protobuf spec
        # model = _MLModel(self.builder.spec)

        print (self.builder.spec.description)

        return self.builder.spec, input_features, output_features


    @staticmethod
    def _get_padding(IR_node):
        auto_pads = IR_node.get_attr('auto_pads')
        if auto_pads is not None:
            if auto_pads == 'VALID':
                return auto_pads
            else:
                return 'SAME'

        pads = IR_node.get_attr('pads')
        if is_valid_padding(pads):
            return 'VALID'
        else:
            return 'SAME'

    def _emit_merge(self, IR_node, func):
        """
        Convert concat layer to coreml.
        """
        # Get input and output names
        input_names = [self.IR_graph.get_node(inp).real_name for inp in IR_node.in_edges]

        self.builder.add_elementwise(name=IR_node.name, input_names=input_names,
            output_name=IR_node.name, mode=func)

    def emit_Conv(self, IR_node):
        """
        Convert convolution layer to coreml.
        """
        has_bias = IR_node.get_attr('use_bias', False)
        is_deconv = False # TODO: Deconv

        # Get the weights.
        output_channels = IR_node.get_attr('kernel_shape')[-1]

        # Dimensions and weights
        if is_deconv:
            raise NotImplementedError()
            height, width, n_filters, channels = weightList[0].shape
            W = weightList[0].transpose([0,1,3,2])
            output_shape = output_blob_shape[:-1]
        else:
            W = self.weights_dict[IR_node.name]['weights']
            height, width, channels, n_filters = W.shape
            output_shape = None
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2]

        # Dilations
        dilations = IR_node.get_attr('dilations', [1, 1])
        if is_deconv and not dilations == [1, 1]:
            raise ValueError("Unsupported non-unity dilation for Deconvolution layer")

        groups = IR_node.get_attr('groups', 1)
        kernel_channels = channels

        padding = self._get_padding(IR_node).lower()

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        # print(self.IR_graph.get_parent(IR_node.name, [0]).layer)
        # print(input_name)
        # print(IR_node.real_name)

        self.builder.add_convolution(name=IR_node.real_name,
                                     kernel_channels=kernel_channels,
                                     output_channels=output_channels,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=padding,
                                     groups=groups,
                                     W=W,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=is_deconv,
                                     output_shape=output_shape,
                                     input_name=input_name,
                                     output_name=IR_node.real_name,
                                     dilation_factors=dilations)


    def emit_DepthwiseConv(self, IR_node):
        # depth-wise convolution

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        kernel_channels = 1
        is_deconv = False
        has_bias = IR_node.get_attr('use_bias', False)

        depth_multiplier = IR_node.get_attr('kernel_shape')[-1]

        W = self.weights_dict[IR_node.name]['weights']
        height, width, channels, n_filters = W.shape
        output_shape = None
        W = np.reshape(W,(height, width,1,channels * depth_multiplier))
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        # Dilations
        dilations = IR_node.get_attr('dilations', [1, 1])

        padding = self._get_padding(IR_node).lower()
        output_channels = W.shape[-1]
        groups = W.shape[-1]
        stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2]

        self.builder.add_convolution(name=IR_node.real_name,
                                     kernel_channels=kernel_channels,
                                     output_channels=output_channels,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=padding,
                                     groups=groups,
                                     W=W,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=is_deconv,
                                     output_shape=output_shape,
                                     input_name=input_name,
                                     output_name=IR_node.real_name,
                                     dilation_factors=dilations)


    def emit_Pool(self, IR_node):
        """
        Convert pooling layer to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name

        # Pooling layer type
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            layer_type_str = 'MAX'
        elif pooling_type == 'AVG':
            layer_type_str = 'AVERAGE'
        else:
            raise TypeError("Pooling type %s not supported" % pooling_type)

        # if it's global, set the global flag
        global_pooling = IR_node.get_attr('global_pooling', False)
        dim = len(IR_node.get_attr('strides')) - 2
        if global_pooling:
            if dim == 2:
                height, width = (0, 0)
                stride_height = stride_width = 0
                padding_type = 'VALID'
            elif dim == 1:
                raise NotImplementedError()
                global_pooling = False
                _, width, channels = keras_layer.input_shape
                height = 1
                stride_height, stride_width = height, width
                padding_type = 'VALID'
            else:
                raise NotImplementedError()

        else:
            height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1])
            stride_height, stride_width = tuple(IR_node.get_attr('strides')[1:-1])

            # Padding
            padding_type = self._get_padding(IR_node)

        self.builder.add_pooling(name=IR_node.name,
                                    height=height,
                                    width=width,
                                    stride_height=stride_height,
                                    stride_width=stride_width,
                                    layer_type=layer_type_str,
                                    padding_type=padding_type,
                                    input_name=input_name,
                                    output_name=IR_node.name,
                                    exclude_pad_area=True,
                                    is_global=global_pooling)


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)


    def emit_Crop(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name=IR_node.real_name

        is_1d = False
        border = IR_node.get_attr('border')

        if is_1d:
            raise ValueError("Unrecognized padding option: %s" % (str(border)))
        else:
            if type(border) is int:
                top = left = bottom = right = border
            elif type(border) is list:
                top, left = border[1], border [0]
                bottom, right = border[2], border [3]
            else:
                raise ValueError("Unrecognized padding option: %s" % (str(border)))

        # Now add the layer
        self.builder.add_crop(name = IR_node.name,
            left = left, right=right, top=top, bottom=bottom, offset = [0,0],
            input_names = [input_name], output_name=output_name
            )
        # assert False



    def emit_DataInput(self, IR_node):
        """ Layers that can be skipped. """
        return


    def emit_Dropout(self, IR_node):
        """ Layers that can be skipped (because they are train time only. """
        IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name


    def emit_FullyConnected(self, IR_node):
        """
        Convert a dense layer to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]

        has_bias = IR_node.get_attr('use_bias')

        # Get the weights from keras
        W = self.weights_dict[IR_node.name]['weights'].T
        Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None
        output_channels, input_channels = W.shape

        self.builder.add_inner_product(name=IR_node.name,
                                       W=W,
                                       b=Wb,
                                       input_channels=input_channels,
                                       output_channels=output_channels,
                                       has_bias=has_bias,
                                       input_name=input_name,
                                       output_name=IR_node.name)


    def emit_Flatten(self, IR_node):
        """
        Convert a flatten layer from keras to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]

        """
        # blob_order == 0 if the input blob needs not be rearranged
        # blob_order == 1 if the input blob needs to be rearranged
        blob_order = 0

        # using keras_layer.input.shape have a "?" (Dimension[None] at the front),
        # making a 3D tensor with unknown batch size 4D
        if len(keras_layer.input.shape) == 4:
            blob_order = 1
        """

        self.builder.add_flatten(name=IR_node.name, mode=1,
                                 input_name=input_name, output_name=IR_node.name)


    def emit_Reshape(self, IR_node):
        def ShapetrToTuple(string, batch_none = False):
            if batch_none == True:
                ls = [int(item) for item in string.split(', ')]
                ls.insert(0,None)
                return tuple(ls)
            else:
                ls = [int(item) for item in string.split(', ')]
                return tuple(ls)

        last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer
        input_shape_dims = last_node.attr["_output_shapes"].list.shape
        target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape

        input_shape = ShapetrToTuple(IRGraph.shapeToStr(input_shape_dims[0]),True)
        target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0]))

        def get_coreml_target_shape(target_shape):
            if len(target_shape) == 1: #(D,)
                coreml_shape = (1,target_shape[0],1,1)
            elif len(target_shape) == 2: #(S,D)
                coreml_shape = target_shape + (1,1)
            elif len(target_shape) == 3: #(H,W,C)
                coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1])
            else:
                coreml_shape = None
            return coreml_shape

        def get_mode(input_shape, target_shape):
            in_shape = input_shape[1:]
            if len(in_shape) == 3 or len(target_shape) == 3:
                    return 1
            else:
                return 0
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        new_shape = get_coreml_target_shape(target_shape)
        mode = get_mode(input_shape, target_shape)

        self.builder.add_reshape(
            name=IR_node.real_name,
            input_name=input_name,
            output_name=IR_node.real_name,
            target_shape=new_shape,
            mode=mode)



    def emit_Tanh(self, IR_node):
        assert False
        code = "{:<15} = Activation(name = '{}', activation = tanh)({})".format(
                IR_node.replace_scope(IR_node.name),
                IR_node.name,
                IR_node.replace_scope(IR_node.in_edges[0]))
        return code


    def _emit_activation(self, IR_node, act, params=None):
        # Get input and output names
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name
        self.builder.add_activation(name=IR_node.real_name,
            non_linearity=act,
            input_name=input_name,
            output_name=output_name,
            params=params)


    def emit_Relu(self, IR_node):
        self._emit_activation(IR_node, 'RELU')

    def emit_PRelu(self, IR_node):
        self._emit_activation(IR_node, 'PRELU', self.weights_dict[IR_node.name]['gamma'])


    def emit_Softmax(self, IR_node):
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]
        self.builder.add_softmax(name=IR_node.name, input_name=input_name,
                                 output_name=IR_node.name)


    def emit_Sigmoid(self, IR_node):
        assert False
        code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format(
                IR_node.replace_scope(IR_node.name),
                IR_node.name,
                IR_node.replace_scope(IR_node.in_edges[0]))
        return code

    def emit_Relu6(self, IR_node):
        # print(IR_node.name)
        layer = IR_node.real_name
        input_name, output_name = (IR_node.IR_layer.input[0], IR_node.IR_layer.name)
        # input_name =
        relu_output_name = output_name + '_relu'
        self.builder.add_activation(layer, 'RELU', input_name, relu_output_name)
        # negate it
        neg_output_name = relu_output_name + '_neg'
        self.builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name,
                neg_output_name,[-1.0, 0])
        # apply threshold
        clip_output_name = relu_output_name + '_clip'
        self.builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name,
                'threshold', alpha = -6.0)
        # negate it back
        self.builder.add_activation(
            layer + '_neg2',
            'LINEAR',
            clip_output_name,
            output_name,
            [-1.0, 0])

    def emit_Gather(self, IR_node):
        raise NotImplementedError()
        W = self.weights_dict[IR_node.name]['weights']
        if W.ndim == 2:
            vocab_size = W.shape[0]
            output_channels = W.shape[1]
            builder.add_embedding(
                name=IR_node.real_name,
                W = W,
                b = None,
                input_dim = vocab_size,
                output_channels = output_channels,
                has_bias=False,
                input_name=input_name,
                output_name=IR_node.real_name)
        else:
            raise NotImplementedError()

    def emit_RNNs(self, IR_node, func):
        assert False
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                    IR_node.IR_layer.attr['dropout'].f,
                    IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""

        code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format(
                IR_node.name,
                func,
                IR_node.IR_layer.attr['units'].i,
                IR_node.IR_layer.attr['use_bias'].b,
                dropout_str,
                IR_node.in_edges[0])

        return code


    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")


    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")


    def emit_Add(self, IR_node):
        self._emit_merge(IR_node, 'ADD')


    def emit_Concat(self, IR_node):
        self._emit_merge(IR_node, "CONCAT")


    def emit_BatchNorm(self, IR_node):
        """
        Convert a Batch Normalization layer.
        """

        # Get input and output names
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        # print(input_name)
        # print(IR_node.real_name)
        axis = IR_node.get_attr('axis', -1)
        nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size

        # Set parameters
        # Parameter arrangement in Keras: gamma, beta, mean, variance
        weights = self.weights_dict[IR_node.name]
        mean = weights['mean']
        std = weights['var']
        gamma = weights.get('scale', np.ones(mean.shape))
        beta = weights.get('bias', np.zeros(mean.shape))

        # compute adjusted parameters
        variance = std * std
        f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon'))
        gamma1 = gamma*f
        beta1 = beta - gamma*mean*f
        mean[:] = 0.0 #mean
        variance[:] = 1.0 - .00001 #stddev
        self.builder.add_batchnorm(
            name=IR_node.real_name,
            channels = nb_channels,
            gamma = gamma1,
            beta = beta1,
            mean = mean,
            variance = variance,
            input_name = input_name,
            output_name=IR_node.real_name)
        # assert False


    def emit_Pad(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name=IR_node.real_name
        is_1d = False
        padding = IR_node.get_attr('pads')

        if is_1d:
            raise ValueError("Unrecognized padding option: %s" % (str(padding)))
        else:
            if type(padding) is int:
                top = left = bottom = right = padding
            elif type(padding) is list:
                top, left = padding[1], padding [2]
                bottom, right = padding[5], padding [6]
            else:
                raise ValueError("Unrecognized padding option: %s" % (str(padding)))

        # Now add the layer
        self.builder.add_padding(name = IR_node.name,
            left = left, right=right, top=top, bottom=bottom, value = 0,
            input_name = input_name, output_name=output_name
            )


    def emit_Squeeze(self, IR_node):
        self.emit_Flatten(IR_node)
        # if IR_node.name != "MMdnn_Output" :
            # self.emit_Flatten(IR_node)
            # self.emit_Reshape(IR_node)


    def emit_SeparableConv(self, IR_node):

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = output_name=IR_node.real_name

        assert len(IR_node.get_attr("strides")) == 4
        strides = IR_node.get_attr('strides')
        stride_height, stride_width = (strides[1], strides[2])

        # Get the weights
        W0 = self.weights_dict[IR_node.name]['depthwise_filter']
        W1 = self.weights_dict[IR_node.name]['pointwise_filter']

        padding = IR_node.get_attr('auto_pad').split('_')[0].lower()
        has_bias = IR_node.get_attr('use_bias')
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        output_blob_shape = IR_node.get_attr('_output_shapes')
        shape = shape_to_list(output_blob_shape[0])
        output_channels = shape[-1]

        height, width, input_channels, depth_mult = W0.shape

        W0 = np.reshape(W0, (height, width, 1, input_channels * depth_mult))

        intermediate_name = input_name + '_intermin_'

        self.builder.add_convolution(name = IR_node.name + '_step_1',
             kernel_channels = 1,
             output_channels = input_channels * depth_mult,
             height = height,
             width = width,
             stride_height = stride_height,
             stride_width = stride_width,
             border_mode = padding,
             groups = input_channels,
             W = W0,
             b = None,
             has_bias = False,
             is_deconv = False,
             output_shape = None,
             input_name = input_name,
             output_name = intermediate_name,
             dilation_factors = [1,1])

        self.builder.add_convolution(name = IR_node.name + '_step_2',
                kernel_channels = input_channels * depth_mult,
                output_channels = output_channels,
                height = 1,
                width = 1,
                stride_height = 1,
                stride_width = 1,
                border_mode = padding,
                groups = 1,
                W = W1,
                b = b,
                has_bias = has_bias,
                is_deconv = False,
                output_shape = None,
                input_name = intermediate_name,
                output_name = output_name,
                dilation_factors = [1,1])
Exemplo n.º 3
0
class OnnxEmitter(Emitter):
    dtype_map = {graph_pb2.DT_FLOAT32: "TensorProto.FLOAT"}

    def __init__(self, architecture, weight):
        super(OnnxEmitter, self).__init__()
        if os.path.exists(architecture) == False:
            raise ValueError(
                "IR architecture file [{}] is not found.".format(architecture))
        else:
            self.IR_graph = IRGraph(architecture)
            self.IR_graph.build()

        if os.path.exists(weight) == False:
            raise ValueError(
                "IR weight file [{}] is not found.".format(weight))
        else:
            self._load_weights(weight)

    @property
    def header_code(self):
        return """import numpy as np
from onnx import helper, TensorProto
import onnx

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

"""

    def gen_code(self, phase):
        self.phase = phase
        self.add_body(0, self.header_code)

        self.inputs = []
        self.outputs = []
        self.nodes = []
        self.initializer = []

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("OnnxEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self._process_output_layers()

        self.add_body(
            1, "graph = helper.make_graph([{}], 'mmdnn', [{}], [{}], [{}])".
            format(', '.join(self.nodes), ', '.join(self.inputs),
                   ', '.join(self.outputs), ', '.join(self.initializer)))
        self.add_body(1, "return helper.make_model(graph)")
        return self.body_code

    def run(self, dstNetworkPath, dstWeightPath=None, phase='test'):
        super(OnnxEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        self.save_weights(self.weights_dict, dstWeightPath)

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [
                i.size for i in
                parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
            ] + [-1]
            self.weights_dict[IR_node.name]['weights'] = self.weights_dict[
                IR_node.name]['weights']
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], original_dims)

    def _process_output_layers(self):
        for name in self.IR_graph.output_layers:
            IR_node = self.IR_graph.get_node(name)
            shape_str = IRGraph.shapeToStr(
                IR_node.layer.attr["_output_shapes"].list.shape[0])
            if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED:
                IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32
            dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type]
            self.add_body(
                1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".
                format(IR_node.variable_name + '_out', IR_node.variable_name,
                       dtype_str, shape_str))
            self.outputs.append(IR_node.variable_name + '_out')

    def emit_DataInput(self, IR_node):
        shape = [
            dim.size if dim.size != -1 else 1
            for dim in IR_node.IR_layer.attr["shape"].shape.dim
        ]
        shape_str = ', '.join('%s' % i for i in shape)
        if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED:
            IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32
        dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type]
        self.add_body(
            1,
            "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format(
                IR_node.variable_name + '_orig',
                IR_node.variable_name + '_orig', dtype_str, shape_str))
        self.add_body(
            1,
            "{:15} = helper.make_node('Transpose', inputs=['{}'], outputs=['{}'], perm=[0, 3, 1, 2])"
            .format(IR_node.variable_name, IR_node.variable_name + '_orig',
                    IR_node.variable_name))
        self.inputs.append(IR_node.variable_name + '_orig')
        self.nodes.append(IR_node.variable_name)

    def emit_Conv(self, IR_node):
        dilations = list(IR_node.get_attr('dilations'))[1:-1]
        group = IR_node.get_attr('group', 1)
        kernel_shape = list(IR_node.get_attr('kernel_shape'))[:2]
        pads = IR_node.get_attr('pads')
        pad_length = len(pads)
        pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 +
                                                  1:pad_length - 1]
        strides = list(IR_node.get_attr('strides'))[1:-1]
        use_bias = IR_node.get_attr('use_bias')
        self.add_body(
            1, "{:15} = __weights_dict['{}']['weights']".format(
                IR_node.variable_name + '_weight_array', IR_node.name))
        self.add_body(
            1, "{} = {}.transpose([3,2,0,1])".format(
                IR_node.variable_name + '_weight_array',
                IR_node.variable_name + '_weight_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))"
            .format(IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))
        if use_bias:
            self.add_body(
                1, "{:15} = __weights_dict['{}']['bias']".format(
                    IR_node.variable_name + '_bias_array', IR_node.name))
            self.add_body(
                1,
                "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))"
                .format(IR_node.variable_name + '_bias',
                        IR_node.variable_name + '_bias',
                        IR_node.variable_name + '_bias_array',
                        IR_node.variable_name + '_bias_array',
                        IR_node.variable_name + '_bias_array'))
            self.add_body(
                1,
                "{:15} = helper.make_node('Conv', inputs=['{}', '{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node),
                        IR_node.variable_name + '_weight',
                        IR_node.variable_name + '_bias', IR_node.variable_name,
                        dilations, group, kernel_shape, pads, strides))
            self.nodes.append(IR_node.variable_name + '_bias')
        else:
            self.add_body(
                1,
                "{:15} = helper.make_node('Conv', inputs=['{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node),
                        IR_node.variable_name + '_weight',
                        IR_node.variable_name, dilations, group, kernel_shape,
                        pads, strides))
        self.nodes.append(IR_node.variable_name + '_weight')
        self.nodes.append(IR_node.variable_name)

    def emit_BatchNorm(self, IR_node):
        epsilon = IR_node.get_attr('epsilon')
        if IR_node.get_attr('scale'):
            self.add_body(
                1, "{:15} = __weights_dict['{}']['scale']".format(
                    IR_node.variable_name + '_scale_array', IR_node.name))
        else:
            self.add_body(
                1,
                "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype)"
                .format(IR_node.variable_name + '_scale_array', IR_node.name,
                        IR_node.name))
            self.add_body(
                1,
                "{:15}.fill(1)".format(IR_node.variable_name + '_scale_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))"
            .format(IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['bias']".format(
                IR_node.variable_name + '_bias_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))"
            .format(IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['mean']".format(
                IR_node.variable_name + '_mean_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))"
            .format(IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['var']".format(
                IR_node.variable_name + '_var_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))"
            .format(IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={})"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_var', IR_node.variable_name,
                    epsilon, 0 if self.phase == 'train' else 1))
        self.nodes.append(IR_node.variable_name + '_scale')
        self.nodes.append(IR_node.variable_name + '_bias')
        self.nodes.append(IR_node.variable_name + '_mean')
        self.nodes.append(IR_node.variable_name + '_var')
        self.nodes.append(IR_node.variable_name)

    def emit_Relu(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Relu', inputs=['{}'], outputs=['{}'])".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Add(self, IR_node):
        input_layers = ', '.join(("'" + self.IR_graph.get_parent(
            IR_node.variable_name, [num]).real_variable_name) + "'"
                                 for num in range(0, len(IR_node.in_edges)))
        self.add_body(
            1, "{:15} = helper.make_node('Add', inputs=[{}], outputs=['{}'])".
            format(IR_node.variable_name, input_layers, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if IR_node.layer.attr['global_pooling'].b:
            if pooling_type == 'AVG':
                self.add_body(
                    1,
                    "{:15} = helper.make_node('GlobalAveragePool', inputs=['{}'], outputs=['{}'])"
                    .format(IR_node.variable_name,
                            self.parent_variable_name(IR_node),
                            IR_node.variable_name))
                self.nodes.append(IR_node.variable_name)
            else:
                print("OnnxEmitter has not supported Global Pool type [%s]." %
                      (pooling_type))
                self.emit_UNKNOWN(IR_node)
        else:
            if pooling_type in ['AVG', 'MAX']:
                if pooling_type == 'AVG':
                    op_name = 'AveragePool'
                elif pooling_type == 'MAX':
                    op_name = 'MaxPool'
                kernel_shape = list(IR_node.get_attr('kernel_shape')[1:-1])
                pads = IR_node.get_attr('pads')
                pad_length = len(pads)
                pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 +
                                                          1:pad_length - 1]
                strides = list(IR_node.get_attr('strides')[1:-1])
                self.add_body(
                    1,
                    "{:15} = helper.make_node('{}', inputs=['{}'],outputs=['{}'], kernel_shape={}, pads={}, strides={})"
                    .format(IR_node.variable_name, op_name,
                            self.parent_variable_name(IR_node),
                            IR_node.variable_name, kernel_shape, pads,
                            strides))
                self.nodes.append(IR_node.variable_name)
            else:
                print("OnnxEmitter has not supported Pool type [%s]." %
                      (pooling_type))
                self.emit_UNKNOWN(IR_node)

    def emit_FullyConnected(self, IR_node):
        self.check_if_need_transpose(IR_node)
        self.add_body(
            1, "{:15} = __weights_dict['{}']['weights']".format(
                IR_node.variable_name + '_weight_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))"
            .format(IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['bias']".format(
                IR_node.variable_name + '_bias_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))"
            .format(IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('Gemm', inputs=['{}', '{}', '{}'],outputs=['{}'])"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_bias', IR_node.variable_name))
        self.nodes.append(IR_node.variable_name + '_weight')
        self.nodes.append(IR_node.variable_name + '_bias')
        self.nodes.append(IR_node.variable_name)

    def emit_Pad(self, IR_node):
        mode = IR_node.layer.attr['mode'].s.decode()
        pads = IR_node.get_attr('pads')
        pad_length = len(pads)
        pads = [0, 0] + pads[1:pad_length // 2 - 1] + [
            0, 0
        ] + pads[pad_length // 2 + 1:pad_length - 1]
        self.add_body(
            1,
            "{:15} = helper.make_node('Pad', inputs=['{}'], outputs=['{}'], mode='{}', pads={})"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, mode, pads))
        self.nodes.append(IR_node.variable_name)

    def emit_Concat(self, IR_node):
        axis = IR_node.get_attr('axis') - 2
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)
        self.add_body(
            1,
            "{:15} = helper.make_node('Concat', inputs=[{}], outputs=['{}'], axis={})"
            .format(IR_node.variable_name, inputs, IR_node.variable_name,
                    axis))
        self.nodes.append(IR_node.variable_name)

    def emit_Flatten(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Flatten', inputs=['{}'], outputs=['{}'])"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Softmax(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Softmax', inputs=['{}'], outputs=['{}'])"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Constant(self, IR_node):
        self.add_body(
            1, "{:15} = __weights_dict['{}']['value']".format(
                IR_node.variable_name + '_value_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))"
            .format(IR_node.variable_name, IR_node.variable_name,
                    IR_node.variable_name + '_value_array',
                    IR_node.variable_name + '_value_array',
                    IR_node.variable_name + '_value_array'))
        self.nodes.append(IR_node.variable_name)

    def emit_Sub(self, IR_node):
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)
        self.add_body(
            1,
            "{:15} = helper.make_node('Sub', inputs=[{}], outputs=['{}'], broadcast=1)"
            .format(IR_node.variable_name, inputs, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Mul(self, IR_node):
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)
        self.add_body(
            1,
            "{:15} = helper.make_node('Mul', inputs=[{}], outputs=['{}'], broadcast=1)"
            .format(IR_node.variable_name, inputs, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Dropout(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Dropout', inputs=['{}'], outputs=['{}'], is_test={}, ratio={})"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, 0 if self.phase == 'train' else 1,
                    1 - IR_node.get_attr('keep_prob')))
        self.nodes.append(IR_node.variable_name)

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)
Exemplo n.º 4
0
class Keras2Emitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16 : "float16",
        graph_pb2.DT_FLOAT32 : "float32",
        graph_pb2.DT_FLOAT64 : "float64",
        graph_pb2.DT_INT16 : "int16",
        graph_pb2.DT_INT32 : "int32",
        graph_pb2.DT_INT64 : "int64",
        graph_pb2.DT_UINT8 : "uint8",
        graph_pb2.DT_UINT16 : "uint16"
    }


    def __init__(self, model):
        super(Keras2Emitter, self).__init__()
        from six import string_types as _string_types
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            weight_path = model[1]

        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()


    @property
    def header_code(self):
        return """import keras
from keras.models import Model
from keras import layers
import keras.backend as K

def load_weights(model, weight_file):
    import numpy as np
    
    if weight_file == None:
        return
    
    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()                 

    for layer in model.layers:
        if layer.name in weights_dict:
            cur_dict = weights_dict[layer.name]
            current_layer_parameters = list()            
            if layer.__class__.__name__ == "BatchNormalization":
                if 'scale' in cur_dict:
                    current_layer_parameters.append(cur_dict['scale'])
                if 'bias' in cur_dict:
                    current_layer_parameters.append(cur_dict['bias'])
                current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']])
            elif layer.__class__.__name__ == "SeparableConv2D":
                current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']]
                if 'bias' in cur_dict:
                    current_layer_parameters.append(cur_dict['bias'])
            else:
                # rot weights
                current_layer_parameters = [cur_dict['weights']]
                if 'bias' in cur_dict:
                    current_layer_parameters.append(cur_dict['bias'])
            model.get_layer(layer.name).set_weights(current_layer_parameters)

    return model

def KitModel(weight_file = None):
        """

    
    def gen_codes(self, phase):
        self.add_body(0, self.header_code)
        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                self.add_body(1, func(current_node))
            else:
                print("KerasEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format(
            "model",
            ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.input_layers]),
            ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.output_layers])))
        self.add_body(1, ["load_weights(model, weight_file)", "return model"])

        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.body_codes


    @staticmethod
    def shapeToStr(shapes):
        return ', '.join('%s' % i for i in filter(lambda x:x > 0, shapes))


    def _emit_merge(self, IR_node, func):
        inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges)
        code = "{:<15} = layers.{}(name = '{}', inputs = [{}])".format(
            IR_node.replace_scope(IR_node.name),
            func,
            IR_node.name, 
            inputs)
        return code

    def _emit_convolution(self, IR_node, conv_type):
        filters = IR_node.IR_layer.attr["filter"].list.i[-1]
        filters_str = 'filters = {}'.format(filters) if conv_type.startswith('layer') else 'depth_multiplier = {}'.format(filters)
        kernel_size = ', '.join('%s' % i for i in IR_node.layer.attr['filter'].list.i[:-2])
        strides = ','.join('%s' % i for i in IR_node.IR_layer.attr["strides"].list.i[1:-1])
        use_bias = IR_node.IR_layer.attr["use_bias"].b 
        padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8')
        padding = padding.lower()

        return "{:<15} = {}(name = '{}', {}, kernel_size = ({}), strides = ({}), padding = '{}', use_bias = {})({})".format(
            IR_node.variable_name,
            conv_type,
            IR_node.name,
            filters_str,
            kernel_size,
            strides,
            padding,
            use_bias,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)


    def emit_Convolution(self, IR_node):
        dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2
        return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim))


    def emit_Pool(self, IR_node):
        dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2

        if IR_node.layer.attr['pooling_type'].s == b"MAX":
            pool_name = "MaxPooling{}D".format(dim)
        elif IR_node.layer.attr['pooling_type'].s == b"AVG":
            pool_name = "AveragePooling{}D".format(dim)
        else:
            assert False
        
        if IR_node.layer.attr['global_pooling'].b:
            ret = "{:<15} = layers.Global{}(name = \'{}\')({})".format(
                IR_node.replace_scope(IR_node.name),
                pool_name,
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
        else:
            for e in IR_node.IR_layer.attr["dilation_rate"].list.i:
                assert e == 1
            
            padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8')
            padding = padding.lower()

            pool_size = IR_node.IR_layer.attr['window_shape'].list.i[1:-1]            
            pool_size = ', '.join('%s' % i for i in pool_size)
            strides = IR_node.IR_layer.attr['strides'].list.i[1:-1]
            strides = ', '.join('%s' % i for i in strides)
            
            ret = "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format(
                IR_node.replace_scope(IR_node.name),
                pool_name,
                IR_node.name,
                pool_size,
                strides,
                padding,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)

        return ret


    def emit_UNKNOWN(self, IR_node):
        print (IR_node.name)


    def emit_DataInput(self, IR_node):
        shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape)
        dtype_str = ", dtype = '{}'".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else ""
        code = "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format(
                IR_node.variable_name,
                IR_node.name,
                shape_str,
                dtype_str)
        return code


    def emit_Dropout(self, IR_node):
        seed = 'None'
        if 'seed' in IR_node.IR_layer.attr:
            seed = IR_node.IR_layer.attr['seed'].i

        ret = "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format(
            IR_node.replace_scope(IR_node.name),
            IR_node.name,
            IR_node.IR_layer.attr["keep_prob"].f,
            seed,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)

        return ret
 

    def emit_FullyConnected(self, IR_node):
        return "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format(
                IR_node.variable_name,
                IR_node.name,
                IR_node.layer.attr["units"].i,
                IR_node.layer.attr["use_bias"].b,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)


    def emit_Flatten(self, IR_node):
        self.used_layers.add('Flatten')
        return "{:<15} = __flatten(name = '{}', input = {})".format(
            IR_node.variable_name,            
            IR_node.name,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
        

    def emit_Reshape(self, IR_node):
        shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i)
        return "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format(
            IR_node.variable_name,
            IR_node.name,
            shape_str,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)


    def emit_Tanh(self, IR_node):
        return "{:<15} = layers.Activation(name = '{}', activation = 'tanh')({})".format(
                IR_node.variable_name,
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)


    def emit_Relu(self, IR_node):
        code = "{:<15} = layers.Activation(name = '{}', activation = 'relu')({})".format(
                IR_node.replace_scope(IR_node.name),
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
        return code


    def emit_Softmax(self, IR_node):
        code = "{:<15} = layers.Activation(name = '{}', activation = 'softmax')({})".format(
                IR_node.replace_scope(IR_node.name), 
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
        return code


    def emit_Sigmoid(self, IR_node):
        code = "{:<15} = layers.Activation(name = '{}', activation = 'sigmoid')({})".format(
                IR_node.replace_scope(IR_node.name), 
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
        return code


    def emit_Embedding(self, IR_node):
        ret = "{:<15} = layers.Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
                IR_node.name, 
                IR_node.IR_layer.attr['input_dim'].i,
                IR_node.IR_layer.attr['output_dim'].i,
                IR_node.IR_layer.attr['mask_zero'].b,
                IR_node.in_edges[0])

        return ret


    def emit_RNNs(self, IR_node, func):
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                    IR_node.IR_layer.attr['dropout'].f,
                    IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""
        
        code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format(
                IR_node.name, 
                func,
                IR_node.IR_layer.attr['units'].i,
                IR_node.IR_layer.attr['use_bias'].b,
                dropout_str,
                IR_node.in_edges[0])

        return code


    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")


    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")


    def emit_Add(self, IR_node):
        code = self._emit_merge(IR_node, "add")
        return code


    def emit_Concat(self, IR_node):
        code = self._emit_merge(IR_node, "concatenate")
        return code


    def emit_BatchNorm(self, IR_node):
        axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1        
        return "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format(
                IR_node.variable_name,
                IR_node.name,                
                axis,
                IR_node.layer.attr['epsilon'].f,
                IR_node.layer.attr['bias'].b,
                IR_node.layer.attr['scale'].b,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)
    
    
    def emit_Pad(self, IR_node):
        if 'mode' not in IR_node.layer.attr or IR_node.IR_layer.attr['mode'].s == b"CONSTANT":
            func = "ZeroPadding"
        else:
            print (IR_node.IR_layer.attr['mode'].s)
            assert False

        dim = len(IR_node.IR_layer.attr['paddings'].list.i) // 2 - 2

        padding_str = ""
        for idx in range(1, dim + 1):
            padding_str += "({}, {}),".format(
                    IR_node.IR_layer.attr['paddings'].list.i[idx + idx],
                    IR_node.IR_layer.attr['paddings'].list.i[idx + idx + 1])

        return "{:<15} = layers.{}{}D(name = '{}', padding = ({}))({})".format(
                IR_node.variable_name,
                func,
                dim,
                IR_node.name,
                padding_str,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)

    def emit_Squeeze(self, IR_node):
        return self.emit_Flatten(IR_node)


    def emit_ReduceMean(self, IR_node):
        axes = ', '.join('%s' % i for i in IR_node.layer.attr['axes'].list.i)
        
        return "{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims = {}))({})".format(
                IR_node.replace_scope(IR_node.name),                
                axes,
                IR_node.layer.attr['keepdims'].b,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)


    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        code = "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format(
            IR_node.variable_name,
            IR_node.layer.attr['size'].i,
            IR_node.layer.attr['alpha'].f,
            IR_node.layer.attr['beta'].f,
            IR_node.layer.attr['k'].f,
            IR_node.name,
            self.IR_graph.get_parent(IR_node.name, [0]).variable_name)
        
        return code


    def emit_SeparableConv(self, IR_node):
        assert len(IR_node.layer.attr["strides"].list.i) == 4
        return self._emit_convolution(IR_node, "layers.SeparableConv2D")


    def emit_Relu6(self, IR_node):        
        return "{:<15} = layers.Activation(keras.applications.mobilenet.relu6, name = '{}')({})".format(
                IR_node.variable_name,
                IR_node.name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)

    
    def emit_DepthwiseConv(self, IR_node):
        return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D')


    def _layer_Flatten(self):
        self.add_body(0, '''
def __flatten(name, input):
    if input.shape.ndims > 2: return layers.Flatten(name = name)(input)
    else: return input
''')

    def _layer_LRN(self):
        self.add_body(0, '''
from keras.layers.core import Layer
class LRN(Layer):
    
    def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs):
        self.n = size
        self.alpha = alpha
        self.beta = beta
        self.k = k
        super(LRN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.shape = input_shape
        super(LRN, self).build(input_shape)

    def call(self, x, mask=None):
        half_n = self.n - 1
        squared = K.square(x)
        scale = self.k
        norm_alpha = self.alpha / (2 * half_n + 1)
        if K.image_dim_ordering() == "th":
            b, f, r, c = self.shape
            squared = K.expand_dims(squared, 0)
            squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0)))
            squared = K.squeeze(squared, 0)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, i:i+f, :, :]
        else:
            b, r, c, f = self.shape
            squared = K.expand_dims(squared, -1)
            squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n)))
            squared = K.squeeze(squared, -1)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, :, :, i:i+f]

        scale = K.pow(scale, self.beta)
        return x / scale

    def compute_output_shape(self, input_shape):
        return input_shape''')
Exemplo n.º 5
0
class MXNetEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "float16",
        graph_pb2.DT_FLOAT32: "float32",
        graph_pb2.DT_FLOAT64: "float64",
        graph_pb2.DT_INT32: "int32",
        graph_pb2.DT_UINT8: "uint8"
    }

    activation_map = {
        "relu": "Relu",
        "sigmoid": "Sigmoid",
        "tanh": "Tanh",
        "elu": "Elu"
    }

    transpose_map = {1: 2, 2: 3, -1: 1}

    channels_last = ['NDHWC', 'NHWC']

    def __init__(self, model):
        super(MXNetEmitter, self).__init__()
        from six import string_types as _string_types

        if isinstance(model, _string_types):
            network_path = model
            self.weight_loaded = False
        elif len(model) == 4:
            network_path = model[0]
            weight_path = model[1]
            self.input_shape = model[2]
            self.output_weights_file = model[3]
            self.weights = np.load(weight_path).item()
            self.weight_loaded = True
            self.output_weights = dict()
        else:
            raise ValueError("the # of input arguments [{}] is not supported" %
                             len(model))

        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()

    @property
    def header_code(self):
        return """import mxnet as mx
import numpy as np
import math

# mxnet-cpu only support channel first, default convert the model and weight as channel first

def RefactorModel():
"""

    def gen_code(self, phase):
        self.IR_layer_map = dict()
        self.add_body(0, self.header_code)
        for layer in self.IR_graph.topological_sort:
            self.IR_layer_map[layer] = self.IR_graph.get_node(layer)

        shape = dict()
        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if len(current_node.in_edges) == 0:
                current_node.in_edges.append('data')

            if node_type.lower() in MXNetEmitter.activation_map:
                func = getattr(self, "emit_Activation")
                line = func(current_node, node_type.lower())
                self.add_body(1, line)
            elif hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)
                self.add_body(1, line)
            else:
                print("MXNet Emitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

            if node_type == "DataInput":
                cur_shape = list()
                first = True
                for dim in current_node.IR_layer.attr["shape"].shape.dim:
                    if dim.size == -1 and first:
                        cur_shape.append(1)
                        print(
                            "Detect input layer [{}] using infer batch size, set it as default value [1]"
                            .format(current_node.name))
                    else:
                        if dim.size == -1:
                            print(
                                "Warning: user should change input size manually"
                            )
                        cur_shape.append(dim.size)
                    first = False

                cur_shape.insert(1, cur_shape.pop())
                shape[current_node.name] = ', '.join('%s' % i
                                                     for i in cur_shape)

        if self.weight_loaded:
            dirname = os.path.dirname(self.output_weights_file)
            if not os.path.exists(dirname):
                os.makedirs(self.output_weights_file)
            with open(self.output_weights_file, 'wb') as outfile:
                np.save(outfile, self.output_weights)

        comment = "\n    # if a GPU is available, change mx.cpu() to mx.gpu()"
        last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format(
            "model", ', '.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
            ]), ', '.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.input_layers
            ]))

        self.add_body(1, comment)
        self.add_body(1, last_line)
        self.add_body(1, "return model")

        weight_code = ""
        if not self.weight_loaded:
            weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n"

        weight_code += self.gen_weight_code(shape, phase)

        main_code = "if __name__ == '__main__':\n    model = RefactorModel()\n"
        if self.weight_loaded:
            main_code += "    # remember to adjust params path\n    model = deploy_weight(model, '{}')\n".format(
                self.output_weights_file)

        if phase == 'train':
            train_code = """def train(model):
    import logging
    logging.getLogger().setLevel(logging.DEBUG)
    model.fit(train_iter, # train data
            eval_data = val_iter, # validation data
            optimizer = 'sgd', # Defaults to 'sgd'
            optimizer_params = {'learning_rate':0.01}, # use fixed learning rate
            eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy'
            batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
            num_epoch = 10) # train for at most 10 dataset passes\n\n
"""
            code = self.body_code + weight_code + train_code + main_code
        else:
            test_code = """import matplotlib.pyplot as plt
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])


def get_image(url, show = False):
    import cv2
    # download and show the image
    fname = mx.test_utils.download(url)
    img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB)
    if img is None:
        return None
    if show:
        plt.imshow(img)
        plt.axis('off')
    # convert into format (batch, RGB, width, height)
    img = cv2.resize(img, (224, 224))
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)
    img = img[np.newaxis, :]
    return img


def predict(model, labels, url):
    # to show the image, change the argument show into True
    img = get_image(url, show = False)
    # compute the predict probabilities
    model.forward(Batch([mx.nd.array(img)]))
    prob = model.get_outputs()[0].asnumpy()
    # print the top-5
    prob = np.squeeze(prob)
    a = np.argsort(prob)[::-1]
    for i in a[0:5]:
        print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n
"""

            main_code += """
    # # call function predict
    # with open('synset.txt', 'r') as f:
    #     labels = [l.rstrip() for l in f]
    # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg')
"""

            code = self.body_code + weight_code + test_code + main_code

        return code

    def gen_weight_code(self, shape, phase):
        if len(shape) == 0:
            # var = raw_input("Input layer not detected, please type data shape manually(i.e. X, X, X, X): ")
            shape['data'] = ', '.join('%s' % i for i in self.input_shape)
        str = "def deploy_weight(model, weight_file):\n"
        str += """
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    arg_params = dict()
    aux_params = dict()
    for weight_name, weight_data in weights_dict.items():
        weight_name = str(weight_name)
        if "moving" in weight_name:
            aux_params[weight_name] = mx.nd.array(weight_data)
        else:
            arg_params[weight_name] = mx.nd.array(weight_data)

"""
        if phase == 'train':
            str += "    model.bind(for_training = True, data_shapes = ["
        else:
            str += "    model.bind(for_training = False, data_shapes = ["
        first = True
        for k, v in shape.items():
            if not first:
                str += ", "
            str += "('" + k + "', " + "(" + v + "))"
            first = False
        str += "])\n"
        str += "    model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True)\n\n    return model\n\n\n"
        return str

    @staticmethod
    def calculate_same_pad(data_shape, kernel, stride):
        if (data_shape % stride == 0):
            pad = max(kernel - stride, 0)
        else:
            pad = max(kernel - (data_shape % stride), 0)
        if pad % 2 == 0:
            return False, pad
        else:
            return True, pad

    @staticmethod
    def transfer_pad(pad_list):
        defuse_pad = False
        pad = list()

        assert len(pad_list) % 2 == 0
        mid = int(len(pad_list) / 2)
        pad_first = pad_list[1:mid - 1]
        pad_second = pad_list[mid + 1:-1]

        for i in range(0, mid - 2):
            if not pad_first[i] == pad_second[i]:
                defuse_pad = True

        if defuse_pad:
            pad.extend([0] * 4)
            for i in range(0, mid - 2):
                pad.extend([pad_first[i], pad_second[i]])
        else:
            pad = pad_first

        return defuse_pad, pad

    @staticmethod
    def transpose(data, dim):
        if dim == 1:
            data = data.transpose((2, 1, 0))
        elif dim == 2:
            data = data.transpose((3, 2, 0, 1))
        elif dim == 3:
            data = data.transpose((4, 3, 0, 1, 2))
        else:
            raise ValueError("The weight of dim {} cannot transpose" % dim)

        return data

    def set_pad(self, IR_node, code, pad, _max_pool):
        if _max_pool:
            constant_value = "-math.inf"
        else:
            constant_value = "0.0"

        code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format(
            IR_node.variable_name + "_pad", self.parent_variable_name(IR_node),
            tuple(pad), constant_value, IR_node.name + "_pad")

        for e in IR_node.in_edges:
            if e == 'data':
                continue
            self.IR_layer_map[e].out_edges = [
                x if not self.IR_layer_map[x].name == IR_node.variable_name
                else IR_node.variable_name + "_pad"
                for x in self.IR_layer_map[e].out_edges
            ]

        return code

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_FullyConnected(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            parent = self.IR_graph.get_parent(IR_node.name, [0])
            while parent.type == "Flatten":
                parent = self.IR_graph.get_parent(parent.name, [0])
            dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
            if dim > 2:
                original_dims = weight_dict['weights'].shape
                dims = [
                    i.size for i in
                    parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
                ] + [-1]
                weight_dict['weights'] = np.reshape(weight_dict['weights'],
                                                    dims)
                weight_dict['weights'] = np.transpose(
                    weight_dict['weights'],
                    [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
                weight_dict['weights'] = np.reshape(weight_dict['weights'],
                                                    original_dims)
            self.output_weights[IR_node.name +
                                "_weight"] = weight_dict['weights'].transpose(
                                    (1, 0))

        num_hidden = IR_node.IR_layer.attr["units"].i
        no_bias = not IR_node.IR_layer.attr["use_bias"].b
        if not no_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            num_hidden, no_bias, IR_node.name)

        return code

    def _emit_convolution(self, IR_node, pattern):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            weights = weight_dict['weights']

        dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

        kernel = list()
        for idx in range(0, dim):
            kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        dilate = list()
        for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
            dilate.append(e)
        dilate = ', '.join('%s' % i for i in dilate)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(
                IR_node.IR_layer.attr["pads"].list.i)

        num_filter = 0
        if pattern == "Deconvolution":
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
        else:
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1]

        no_bias = not IR_node.IR_layer.attr["use_bias"].b
        if not no_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        if pattern == "DepthwiseConv":
            num_group = num_filter
            pattern = "Convolution"
        else:
            num_group = IR_node.get_attr('group', 1)

        # layout = IR_node.IR_layer.attr["data_format"].s
        if dim == 1:
            layout = 'NCW'
        elif dim == 2:
            layout = 'NCHW'
        elif dim == 3:
            layout = 'NCDHW'

        if self.weight_loaded:
            # if layout not in MXNetEmitter.channels_last:
            weights = MXNetEmitter.transpose(weights, dim)
            self.output_weights[IR_node.name + "_weight"] = weights

        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name, pattern,
                self.parent_variable_name(IR_node), tuple(kernel),
                tuple(stride), dilate, tuple(pad), num_filter, num_group,
                no_bias, layout, IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, False)
            code += "\n    {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name, pattern, IR_node.variable_name + "_pad",
                tuple(kernel), tuple(stride), dilate, num_filter, num_group,
                no_bias, layout, IR_node.name)

        return code

    def emit_Conv(self, IR_node):
        return self._emit_convolution(IR_node, "Convolution")

    def emit_DepthwiseConv(self, IR_node):
        return self._emit_convolution(IR_node, "DepthwiseConv")

    def emit_ConvTranspose(self, IR_node):
        return self._emit_convolution(IR_node, "Deconvolution")

    def emit_DataInput(self, IR_node):
        shape = list()
        shape.extend(IR_node.IR_layer.attr["shape"].list.i)

        code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name,
                                                  IR_node.name)
        return code

    # Add LeakyReLU Elu(slope not support)
    def emit_Activation(self, IR_node, act_type):

        act_type = act_type
        func_name = ""

        if act_type == "elu":
            func_name = "LeakyReLU"
        else:
            func_name = "Activation"

        code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format(
            IR_node.variable_name, func_name,
            self.parent_variable_name(IR_node), act_type, IR_node.name)

        return code

    def emit_BatchNorm(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]

        # axis = IR_node.IR_layer.attr["axis"].i
        axis = 1
        eps = IR_node.IR_layer.attr["epsilon"].f
        momentum = IR_node.IR_layer.attr["momentum"].f

        fix_gamma = not IR_node.IR_layer.attr["scale"].b

        if self.weight_loaded:
            if not fix_gamma:
                self.output_weights[IR_node.name +
                                    "_gamma"] = weight_dict['scale']
            self.output_weights[IR_node.name + "_beta"] = weight_dict['bias']

        # not supported yet
        use_global_stats = "False"
        if self.weight_loaded:
            self.output_weights[IR_node.name +
                                "_moving_var"] = weight_dict['var']
            self.output_weights[IR_node.name +
                                "_moving_mean"] = weight_dict['mean']

        code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axis,
            eps, momentum, fix_gamma, use_global_stats, IR_node.name)

        return code

    def emit_Pool(self, IR_node):

        global_pool = IR_node.IR_layer.attr["global_pooling"].b

        kernel = list()
        if global_pool:
            kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2)
        else:
            for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]:
                kernel.append(e)

        pool_type = IR_node.get_attr('pooling_type').lower()

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(
                IR_node.IR_layer.attr["pads"].list.i)
        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                global_pool, tuple(kernel), pool_type, tuple(stride),
                tuple(pad), IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, pool_type == "max")
            code += "\n    {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format(
                IR_node.variable_name, IR_node.variable_name + "_pad",
                global_pool, tuple(kernel), pool_type, tuple(stride),
                IR_node.name)

        return code

    def emit_SoftmaxOutput(self, IR_node):

        code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node))

        return code

    def emit_Softmax(self, IR_node):

        code = ""

        if len(IR_node.out_edges) == 0:
            code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node))
        else:
            axis = IR_node.IR_layer.attr["dim"].i
            code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                axis, IR_node.name)

        return code

    def emit_Squeeze(self, IR_node):
        return self.emit_Flatten(IR_node)

    # def emit_ConvTranspose(self, IR_node):
    #     if self.weight_loaded:
    #         weight_dict = self.weights[IR_node.name]
    #         weights = weight_dict['weights']

    #     dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

    #     kernel = list()
    #     for idx in range(0, dim):
    #         kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

    #     stride = list()
    #     for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
    #         stride.append(e)

    #     dilate = list()
    #     for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
    #         dilate.append(e)
    #     dilate = ', '.join('%s' % i for i in dilate)

    #     defuse_pad = False
    #     pad = list()
    #     if "pads" in IR_node.IR_layer.attr:
    #         output_shape = list()
    #         for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
    #             output_shape.append(e.size)

    #         # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad")
    #         defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i)
    #     pad = ', '.join('%s' % i for i in pad)

    #     kernel = ', '.join('%s' % i for i in kernel)
    #     stride = ', '.join('%s' % i for i in stride)

    #     num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
    #     no_bias = not IR_node.IR_layer.attr["use_bias"].b
    #     if not no_bias and self.weight_loaded:
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias']

    #     # layout = IR_node.IR_layer.attr["data_format"].s
    #     if dim == 1:
    #         layout = 'NCW'
    #     elif dim == 2:
    #         layout = 'NCHW'
    #     elif dim == 3:
    #         layout = 'NCDHW'

    #     if self.weight_loaded:
    #         # if layout not in MXNetEmitter.channels_last:
    #         weights = MXNetEmitter.transpose(weights, dim)
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights

    #     code = ""
    #     if not defuse_pad:
    #         code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name),
    #                 IR_node.replace_scope(IR_node.in_edges[0]),
    #                 kernel,
    #                 stride,
    #                 dilate,
    #                 pad,
    #                 num_filter,
    #                 no_bias,
    #                 layout,
    #                 IR_node.replace_scope(IR_node.name))
    #     else:
    #         code = self.set_pad(IR_node, code, pad)
    #         code += "\n    {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name))

    #     return code

    def emit_Embedding(self, IR_node):

        input_dim = IR_node.IR_layer.attr["input_dim"].i
        output_dim = IR_node.IR_layer.attr["output_dim"].i
        dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type,
                                           "float32")

        code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            input_dim, output_dim, dtype, IR_node.name)

        return code

    # def emit_LeakyReLU(self, IR_node):

    #     # IR only support Elu, the same problem with func emit_Activation

    #     code = "{:<15} = mx.sym.LeakyReLU(data = {}, )".format()

    #     return code
    #     raise NotImplementedError

    def emit_Dropout(self, IR_node):
        p = IR_node.IR_layer.attr["keep_prob"].f
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode(
        ) if 'mode' in IR_node.layer.attr else 'training'
        code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), p, mode,
            IR_node.name)

        return code

    # reverse cannot support yet
    def emit_Reshape(self, IR_node):

        shape = list()
        for e in IR_node.IR_layer.attr["shape"].list.i:
            shape.append(e)
        shape = ', '.join('%s' % i for i in shape)
        reverse = False

        code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), shape,
            reverse, IR_node.name)

        return code

    def emit_Flatten(self, IR_node):
        # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node))
        code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)

        return code

    @staticmethod
    def _convert_axis(IR_node, axis):
        ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1

    def emit_Concat(self, IR_node):
        dim = MXNetEmitter._convert_axis(IR_node,
                                         IR_node.IR_layer.attr["axis"].i)
        code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format(
            IR_node.variable_name, ', '.join(
                self.IR_graph.get_node(s).real_variable_name
                for s in IR_node.in_edges), dim, IR_node.name)

        return code

    def emit_Cast(self, IR_node):

        dtype = IR_node.IR_layer.attr["dtype"].type

        code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), dtype,
            IR_node.name)

        return code

    def emit_Expand_dims(self, IR_node):

        axis = IR_node.IR_layer.attr["axis"].i

        code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axis,
            IR_node.name)

        return code

    def emit_Pad(self, IR_node):
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode()
        pad_width = list()
        pad_width.extend([0] * 4)
        padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1]
        for padding_pair in padding:
            pad_width.extend(padding_pair)

        pad_width = ', '.join('%s' % i for i in pad_width)

        code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), mode,
            pad_width, IR_node.name)

        return code

    def emit_Add(self, IR_node):
        code = "{:<15} = mx.sym.broadcast_add({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))

        return code

    def emit_Mul(self, IR_node):

        code = "{:<15} = mx.sym.broadcast_mul({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))

        return code

    def emit_ReduceMean(self, IR_node):
        axes = IR_node.layer.attr['axes'].list.i[:]
        axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes)

        code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axes,
            IR_node.layer.attr['keepdims'].b)

        return code

    def emit_LRN(self, IR_node):
        code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f,
            IR_node.layer.attr['k'].f, IR_node.layer.attr['size'].i * 2 - 1,
            IR_node.name)

        return code
Exemplo n.º 6
0
class CntkEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "np.float16",
        graph_pb2.DT_FLOAT32: "np.float32",
        graph_pb2.DT_FLOAT64: "np.float64",
        graph_pb2.DT_INT16: "np.int16",
        graph_pb2.DT_INT32: "np.int32",
        graph_pb2.DT_INT64: "np.int64",
        graph_pb2.DT_UINT8: "np.uint8",
        graph_pb2.DT_UINT16: "np.uint16"
    }

    def __init__(self, model):
        from six import string_types as _string_types
        super(CntkEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CntkEmitter, self)._build()

    @property
    def header_code(self):
        return """import numpy as np
import cntk
from cntk import ops, layers
from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

"""

    def gen_code(self, phase='test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CntkEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(
            1, "return {}".format(','.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
            ])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.body_code

    @staticmethod
    def _shapeToStr(shapes):
        new_shape = filter(lambda x: x > -1, [dim.size for dim in shapes.dim])
        return ', '.join('%s' % i for i in new_shape)

    @staticmethod
    def is_valid_padding(auto_pad, pads):
        """
        different from utils.is_valid_padding
        """
        if auto_pad:
            if auto_pad == 'VALID':
                return True
            elif auto_pad.startswith('SAME'):
                return False
            else:
                raise ValueError("Unknown padding type{}.".format(auto_pad))

        else:
            lens = len(pads)
            assert lens % 2 == 0
            for i in range(0, lens // 2):
                if pads[i] != 0:
                    return False
            return True

    @staticmethod
    def is_ceil_mode(pads):
        lens = len(pads)
        for i in range(lens // 2 + 1, lens - 1):
            if pads[i] == pads[i - lens // 2]:
                return False
        else:
            return True

    def _defuse_padding(self, IR_node):
        auto_pad = IR_node.get_attr('auto_pad')
        if auto_pad:
            input_node = self.parent_variable_name(IR_node)
            if auto_pad == 'VALID':
                padding = False
            elif auto_pad.startswith("SAME"):
                padding = True
            else:
                raise ValueError("Unknown padding type [{}].".format(auto_pad))

            return input_node, padding

        else:
            padding = IR_node.get_attr('pads')
            if not is_valid_padding(padding):
                dim = len(padding) // 2
                padding_str = list()
                for i in xrange(1, dim):
                    padding_str.append((padding[i], padding[i + dim]))
                input_node = IR_node.variable_name + '_pad'
                self.add_body(
                    1, "{:<15} = cntk.pad({}, pattern={})".format(
                        input_node, self.parent_variable_name(IR_node),
                        padding_str))

            else:
                input_node = self.parent_variable_name(IR_node)

            return input_node, False

    def emit_Conv(self, IR_node):
        if self.weight_loaded:
            self.used_layers.add('Conv')
            input_node, padding = self._defuse_padding(IR_node)

            dim = len(IR_node.get_attr('strides')) - 2
            padding = [False] + [padding] * dim

            self.add_body(
                1,
                "{:<15} = convolution({}, is_transpose={}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')"
                .format(IR_node.variable_name, input_node,
                        IR_node.type == 'ConvTranspose',
                        tuple(IR_node.get_attr('strides')[1:-1]), padding,
                        tuple(IR_node.get_attr('dilations', [1])),
                        IR_node.get_attr('group', 1), IR_node.name))

        else:
            self.add_body(
                1,
                "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n"
                .format(
                    IR_node.variable_name, IR_node.name,
                    IR_node.get_attr('kernel_shape')[-1],
                    ', '.join('%s' % i for i in
                              IR_node.layer.attr["kernel_shape"].list.i[:-2]),
                    ', '.join(
                        '%s' % i
                        for i in IR_node.layer.attr['strides'].list.i[1:-1]),
                    IR_node.get_attr('auto_pad') != 'VALID',
                    IR_node.get_attr('use_bias'),
                    self.parent_variable_name(IR_node)))

    def emit_Pool(self, IR_node):
        input_node = self.IR_graph.get_node(
            IR_node.in_edges[0]).real_variable_name
        if IR_node.layer.attr['global_pooling'].b:
            self.used_layers.add('GlobalPooling')
            self.add_body(
                1, "{:<15} = global_pooling({}, '{}', name = '{}')".format(
                    IR_node.variable_name, input_node,
                    IR_node.get_attr('pooling_type'), IR_node.name))
        else:
            for e in IR_node.get_attr('dilations', []):
                assert e == 1

            dim = len(IR_node.get_attr('kernel_shape')) - 2
            padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'),
                                                IR_node.get_attr('pads'))
            padding = [False] + [padding] * dim
            ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads'))

            pooling_type = IR_node.get_attr('pooling_type')
            if pooling_type == 'MAX':
                pooling_type = cntk.MAX_POOLING
            elif pooling_type == 'AVG':
                pooling_type = cntk.AVG_POOLING
            else:
                raise ValueError

            if self.weight_loaded:
                self.used_layers.add(IR_node.type)
                self.add_body(
                    1,
                    "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})"
                    .format(IR_node.variable_name, input_node, pooling_type,
                            tuple(IR_node.get_attr('kernel_shape')[1:-1]),
                            tuple(IR_node.get_attr('strides')[1:-1]), padding,
                            ceil_out_dim))

            else:
                raise NotImplementedError

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)

    def emit_DataInput(self, IR_node):
        shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape)
        dtype_str = ", dtype = {}".format(
            self.dtype_map[IR_node.layer.attr['dtype'].
                           type]) if 'dtype' in IR_node.layer.attr else ""
        self.add_body(
            1, "{:<15} = cntk.input_variable(({},) {}, name='{}')".format(
                IR_node.variable_name, shape_str, dtype_str, IR_node.name))

    def emit_Dropout(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        if self.phase == 'train':
            self.add_body(
                1, "{:<15} = Dropout({}, name = '{}')({})".format(
                    IR_node.variable_name, 1 - IR_node.get_attr('keep_prob'),
                    IR_node.name, parent.real_variable_name))
        else:
            IR_node.real_name = parent.real_name

    def emit_FullyConnected(self, IR_node):
        input_node = self.parent_variable_name(IR_node)
        if self.weight_loaded:
            self.used_layers.add(IR_node.type)
            self.add_body(
                1, "{:<15} = dense({}, name = '{}')".format(
                    IR_node.variable_name, input_node, IR_node.name))

        else:
            self.add_body(
                1, "{:<15} = Dense({}, bias = {}, name = '{}')({})".format(
                    IR_node.variable_name, IR_node.layer.attr["units"].i,
                    IR_node.layer.attr['use_bias'].b, IR_node.name,
                    input_node))

    def emit_Flatten(self, IR_node):
        self.add_body(
            1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_Reshape(self, IR_node):
        self.add_body(
            1, "{:<15} = cntk.reshape({}, shape={}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                tuple(IR_node.get_attr('shape')), IR_node.name))

    def _emit_activation(self, IR_node, op_name):
        self.add_body(
            1, "{:<15} = layers.Activation(activation = {}, name = '{}')({})".
            format(IR_node.variable_name, op_name, IR_node.name,
                   self.parent_variable_name(IR_node)))

    def emit_Tanh(self, IR_node):
        self._emit_activation(IR_node, 'ops.tanh')

    def emit_Relu(self, IR_node):
        self._emit_activation(IR_node, 'ops.relu')

    def emit_Softmax(self, IR_node):
        self._emit_activation(IR_node, 'ops.softmax')

    def emit_Sigmoid(self, IR_node):
        self._emit_activation(IR_node, 'ops.sigmoid')

    def emit_RNNs(self, IR_node, func):
        assert False

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Add(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' + '.join(
                self.IR_graph.get_node(i).real_variable_name
                for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(IR_node.variable_name,
                                                  inputs))

    def emit_Sub(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' - '.join(
                self.IR_graph.get_node(i).real_variable_name
                for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(IR_node.variable_name,
                                                  inputs))

    def emit_Mul(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' * '.join(
                self.IR_graph.get_node(i).real_variable_name
                for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(IR_node.variable_name,
                                                  inputs))

    def emit_Constant(self, IR_node):
        self.add_body(
            1, "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".
            format(IR_node.variable_name, IR_node.name))

    def emit_Concat(self, IR_node):
        inputs = ', '.join(
            self.IR_graph.get_node(i).real_variable_name
            for i in IR_node.in_edges)
        self.add_body(
            1, "{:<15} = cntk.splice({}, axis={}, name='{}')".format(
                IR_node.variable_name, inputs,
                IR_node.get_attr('axis') - 1, IR_node.name))

    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            1,
            "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('epsilon'), IR_node.name))

    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode') == 'constant':
            mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(
                IR_node.get_attr('constant_values', 0.0))
        elif IR_node.get_attr('mode') == 'reflect':
            mode = 'mode = ops.REFLECT_PAD'
        elif IR_node.get_attr('mode') == 'SYMMETRIC':
            mode = 'mode = ops.SYMMETRIC_PAD'
        else:
            assert False

        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:]

        self.add_body(
            1, "{:<15} = ops.pad({}, pattern={}, {})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                padding, mode))

    def emit_Squeeze(self, IR_node):
        IR_node.real_name = self.IR_graph.get_node(
            IR_node.in_edges[0]).real_name

    def emit_Log(self, IR_node):
        self.add_body(
            1, "{:<15} = _cntk.log({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_Exp(self, IR_node):
        self.add_body(
            1, "{:<15} = _cntk.exp({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_Reciprocal(self, IR_node):
        self.add_body(
            1, "{:<15} = _cntk.reciprocal({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_ReduceMean(self, IR_node):
        self.add_body(
            1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')),
                IR_node.name))

    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            1,
            "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f,
                IR_node.layer.attr['beta'].f, IR_node.name))

    def emit_LeakRelu(self, IR_node):
        self.add_body(
            1, "{:<15} = _cntk.relu({}) - {} * _cntk.relu(-{})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('alpha'), self.parent_variable_name(IR_node)))

    def emit_ConvTranspose(self, IR_node):
        self.emit_Conv(IR_node)

    def emit_Crop(self, IR_node):
        self.used_layers.add(IR_node.type)
        output_shape = IR_node.get_attr('_output_shapes')[0]
        output_shape = shape_to_list(output_shape)[1:]
        self.add_body(
            1, "{:<15} = _crop({}, {}, {}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('border')[:2], output_shape,
                IR_node.real_name))

    def _layer_Crop(self):
        self.add_body(
            0, '''
def _crop(input, border, output_shape, **kwargs):
    dim = len(output_shape)
    output_shape = [output_shape[-1]] + output_shape[:-1]
    ref_tensor = np.zeros(shape=output_shape, dtype=np.float32)

    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = cntk.crop_manual(node_input=input, node_referent=ref_tensor, offset_x=border[0], offset_y=border[1])
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
''')

    def _layer_LRN(self):
        self.add_body(
            0, """
def lrn(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = BlockApiSetup.lrn(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_FullyConnected(self):
        self.add_body(
            0, """
def dense(input, name, **kwargs):
    w = __weights_dict[name]['weights']
    b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None
    return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input)
""")

    def _layer_Conv(self):
        self.add_body(
            0, """
def convolution(input, is_transpose, name, **kwargs):
    dim = __weights_dict[name]['weights'].ndim

    if is_transpose:
        weight = np.transpose(__weights_dict[name]['weights'], [dim - 2, dim - 1] + list(range(0, dim - 2)))
        kwargs.pop('groups', None)
    else:
        weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2)))
    w = cntk.Parameter(init=weight, name=name + '_weight')

    input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2)))

    if is_transpose:
        layer = ops.convolution_transpose(w, input, **kwargs)
    else:
        layer = ops.convolution(w, input, **kwargs)
    if 'bias' in __weights_dict[name]:
        bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2))
        b = cntk.Parameter(init=bias, name=name + '_bias')
        layer = layer + b
    layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0])
    return layer
""")

    def _layer_Pool(self):
        self.add_body(
            0, """
def pooling(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = ops.pooling(input, **kwargs)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_GlobalPooling(self):
        self.add_body(
            0, """
def global_pooling(input, type, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_BatchNorm(self):
        self.add_body(
            0, """
def batch_normalization(input, name, epsilon, **kwargs):
    mean = cntk.Parameter(init = __weights_dict[name]['mean'],
        name = name + "_mean")
    var = cntk.Parameter(init = __weights_dict[name]['var'],
        name = name + "_var")

    layer = (input - mean) / cntk.sqrt(var + epsilon)
    if 'scale' in __weights_dict[name]:
        scale = cntk.Parameter(init = __weights_dict[name]['scale'],
            name = name + "_scale")
        layer = scale * layer

    if 'bias' in __weights_dict[name]:
        bias = cntk.Parameter(init = __weights_dict[name]['bias'],
            name = name + "_bias")
        layer = layer + bias

    return layer
""")
Exemplo n.º 7
0
class MXNetEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "float16",
        graph_pb2.DT_FLOAT32: "float32",
        graph_pb2.DT_FLOAT64: "float64",
        graph_pb2.DT_INT32: "int32",
        graph_pb2.DT_UINT8: "uint8"
    }

    activation_map = {
        "relu": "Relu",
        "sigmoid": "Sigmoid",
        "tanh": "Tanh",
        "elu": "Elu"
    }

    transpose_map = {1: 2, 2: 3, -1: 1}

    naive_scope_pattern = []

    channels_last = ['NDHWC', 'NHWC']

    def __init__(self, model):
        super(MXNetEmitter, self).__init__()
        from six import string_types as _string_types

        if isinstance(model, _string_types):
            network_path = model
            self.weight_loaded = False
        elif len(model) == 3:
            network_path = model[0]
            weight_path = model[1]
            self.output_weights_file = model[2]
            self.weights = np.load(weight_path).item()
            self.weight_loaded = True
            self.output_weights = dict()
        else:
            raise ValueError("the # of input arguments [{}] is not supported" %
                             len(model))

        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()

        folder = Folder(self.IR_graph, self.weights)
        folder.fold()

    @property
    def header_code(self):
        return """import mxnet as mx
import numpy as np
import math

# mxnet-cpu only support channel first, default convert the model and weight as channel first

def RefactorModel():
"""

    def gen_code(self, phase):
        self.IR_layer_map = dict()
        self.add_body(0, self.header_code)
        for layer in self.IR_graph.topological_sort:
            self.IR_layer_map[layer] = self.IR_graph.get_node(layer)

        shape = dict()
        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if len(current_node.in_edges) == 0:
                current_node.in_edges.append('data')

            if node_type.lower() in MXNetEmitter.activation_map:
                func = getattr(self, "emit_Activation")
                line = func(
                    current_node,
                    MXNetEmitter.activation_map[node_type.lower()].lower())
                self.add_body(1, line)

            elif hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)
                if line != None:
                    self.add_body(1, line)
            else:
                print("MXNet Emitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

            if node_type == "DataInput":
                cur_shape = list()
                first = True
                for dim in current_node.IR_layer.attr["shape"].shape.dim:
                    if dim.size == -1 and first:
                        cur_shape.append(1)
                        print(
                            "Detect input layer [{}] using infer batch size, set it as default value [1]"
                            .format(current_node.name))
                    else:
                        if dim.size == -1:
                            print(
                                "Warning: user should change input size manually"
                            )
                        cur_shape.append(dim.size)
                    first = False

                cur_shape.insert(1, cur_shape.pop())
                shape[current_node.name] = ', '.join('%s' % i
                                                     for i in cur_shape)
                self.input_name_shape = {current_node.name: tuple(cur_shape)}

        if self.weight_loaded:
            fullpath = os.path.abspath(self.output_weights_file)
            dirname = os.path.dirname(fullpath)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            with open(self.output_weights_file, 'wb') as outfile:
                np.save(outfile, self.output_weights)

        comment = "\n    # if a GPU is available, change mx.cpu() to mx.gpu()"
        # We use the real_name for specifying the input layer in data_names
        # since MXNet API wants the actual name of the layer. On the other
        # hand, the module API wants the last symbol in the symbol chain, so
        # for the output node we need to use the actual python variable name
        # of the last layer (real_variable_name).
        last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format(
            "model", ', '.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
                if self.IR_graph.get_node(name).type != 'Pack'
                and self.IR_graph.get_node(name).type != 'Shape'
            ]), ', '.join([
                self.IR_graph.get_node(name).real_name
                for name in self.IR_graph.input_layers
                if self.IR_graph.get_node(name).type != 'Const'
            ]))

        self.add_body(1, comment)
        self.add_body(1, last_line)
        self.add_body(1, "return model")

        self.add_body(0, "")
        for code in self.layers_codes.values():
            self.add_body(0, code)

        weight_code = ""
        if not self.weight_loaded:
            weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n"

        weight_code += self.gen_weight_code(shape, phase)

        main_code = "if __name__ == '__main__':\n    model = RefactorModel()\n"
        if self.weight_loaded:
            main_code += "    # remember to adjust params path\n    model = deploy_weight(model, '{}')\n".format(
                self.output_weights_file)

        if phase == 'train':
            train_code = """def train(model):
    import logging
    logging.getLogger().setLevel(logging.DEBUG)
    model.fit(train_iter, # train data
            eval_data = val_iter, # validation data
            optimizer = 'sgd', # Defaults to 'sgd'
            optimizer_params = {'learning_rate':0.01}, # use fixed learning rate
            eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy'
            batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
            num_epoch = 10) # train for at most 10 dataset passes\n\n
"""
            code = self.body_code + weight_code + train_code + main_code
        else:
            test_code = """from collections import namedtuple
Batch = namedtuple('Batch', ['data'])


def get_image(url, show=False):
    import cv2
    # download and show the image
    fname = mx.test_utils.download(url)
    img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB)
    if img is None:
        return None
    if show:
        import matplotlib.pyplot as plt
        plt.imshow(img)
        plt.axis('off')
    # convert into format (batch, RGB, width, height)
    img = cv2.resize(img, (224, 224))
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)
    img = img[np.newaxis, :]
    return img


def predict(model, labels, url):
    # to show the image, change the argument show into True
    img = get_image(url, show = False)
    # compute the predict probabilities
    model.forward(Batch([mx.nd.array(img)]))
    prob = model.get_outputs()[0].asnumpy()
    # print the top-5
    prob = np.squeeze(prob)
    a = np.argsort(prob)[::-1]
    for i in a[0:5]:
        print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n
"""

            main_code += """
    # # call function predict
    # with open('synset.txt', 'r') as f:
    #     labels = [l.rstrip() for l in f]
    # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg')
"""

            code = self.body_code + weight_code + test_code + main_code

        return code

    def gen_weight_code(self, shape, phase):
        str = "def deploy_weight(model, weight_file):\n"
        str += """
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    arg_params = dict()
    aux_params = dict()
    for weight_name, weight_data in weights_dict.items():
        weight_name = str(weight_name)
        if "moving" in weight_name:
            aux_params[weight_name] = mx.nd.array(weight_data)
        else:
            arg_params[weight_name] = mx.nd.array(weight_data)

"""
        if phase == 'train':
            str += "    model.bind(for_training = True, data_shapes = ["
        else:
            str += "    model.bind(for_training = False, data_shapes = ["
        first = True
        for k, v in shape.items():
            if not first:
                str += ", "
            str += "('" + k + "', " + "(" + v + "))"
            first = False
        str += "])\n"
        str += "    model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True, allow_extra=True)\n\n    return model\n\n\n"
        return str

    @staticmethod
    def calculate_same_pad(data_shape, kernel, stride):
        if (data_shape % stride == 0):
            pad = max(kernel - stride, 0)
        else:
            pad = max(kernel - (data_shape % stride), 0)
        if pad % 2 == 0:
            return False, pad
        else:
            return True, pad

    @staticmethod
    def transfer_pad(pad_list):
        defuse_pad = False
        pad = list()

        assert len(pad_list) % 2 == 0
        mid = int(len(pad_list) / 2)
        pad_first = pad_list[1:mid - 1]
        pad_second = pad_list[mid + 1:-1]

        for i in range(0, mid - 2):
            if not pad_first[i] == pad_second[i]:
                defuse_pad = True

        if defuse_pad:
            pad.extend([0] * 4)
            for i in range(0, mid - 2):
                pad.extend([pad_first[i], pad_second[i]])
        else:
            pad = pad_first

        return defuse_pad, pad

    @staticmethod
    def transpose(data, dim):
        if dim == 1:
            data = data.transpose((2, 1, 0))
        elif dim == 2:
            data = data.transpose((3, 2, 0, 1))
        elif dim == 3:
            data = data.transpose((4, 3, 0, 1, 2))
        else:
            raise ValueError("The weight of dim {} cannot transpose" % dim)

        return data

    def set_pad(self, IR_node, code, pad, _max_pool):
        if _max_pool:
            constant_value = "float('-inf')"
        else:
            constant_value = "0.0"

        code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format(
            IR_node.variable_name + "_pad", self.parent_variable_name(IR_node),
            tuple(pad), constant_value, IR_node.name + "_pad")

        for e in IR_node.in_edges:
            e = e.split(':')[0]
            if e == 'data':
                continue
            self.IR_layer_map[e].out_edges = [
                x if not self.IR_layer_map[x.split(':')[0]].name
                == IR_node.variable_name else IR_node.variable_name + "_pad"
                for x in self.IR_layer_map[e].out_edges
            ]

        return code

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_FullyConnected(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            parent = self.IR_graph.get_parent(IR_node.name, [0])
            while parent.type == "Flatten" or parent.type == 'Dropout':
                parent = self.IR_graph.get_parent(parent.name, [0])
            dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
            if dim > 2:
                original_dims = weight_dict['weights'].shape
                dims = [
                    i.size for i in
                    parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
                ] + [-1]
                weight_dict['weights'] = np.reshape(weight_dict['weights'],
                                                    dims)
                weight_dict['weights'] = np.transpose(
                    weight_dict['weights'],
                    [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
                weight_dict['weights'] = np.reshape(weight_dict['weights'],
                                                    original_dims)
            self.output_weights[IR_node.name +
                                "_weight"] = weight_dict['weights'].transpose(
                                    (1, 0))

        num_hidden = IR_node.IR_layer.attr["units"].i
        no_bias = not IR_node.IR_layer.attr["use_bias"].b
        if not no_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            num_hidden, no_bias, IR_node.name)

        return code

    def _emit_convolution(self, IR_node, pattern):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            weights = weight_dict['weights']

        dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

        kernel = list()
        for idx in range(0, dim):
            kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        dilate = list()
        for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
            dilate.append(e)
        if dilate == []: dilate = [1, 1]
        dilate = ', '.join('%s' % i for i in dilate)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(
                IR_node.IR_layer.attr["pads"].list.i)

        num_filter = 0
        if pattern == "Deconvolution":
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
        else:
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1]

        use_bias = IR_node.get_attr('use_bias', False)
        if use_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        if pattern == "DepthwiseConv":
            num_group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
            num_filter = num_filter * num_group
            pattern = "Convolution"
            if self.weight_loaded:
                weights = np.swapaxes(weights, -1, -2)

        else:
            num_group = IR_node.get_attr('group', 1)

        # layout = IR_node.IR_layer.attr["data_format"].s
        if dim == 1:
            layout = 'NCW'
        elif dim == 2:
            layout = 'NCHW'
        elif dim == 3:
            layout = 'NCDHW'

        if self.weight_loaded:
            # if layout not in MXNetEmitter.channels_last:
            weights = MXNetEmitter.transpose(weights, dim)
            self.output_weights[IR_node.name + "_weight"] = weights

        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name, pattern,
                self.parent_variable_name(IR_node), tuple(kernel),
                tuple(stride), dilate, tuple(pad), num_filter, num_group,
                not use_bias, layout, IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, False)
            code += "\n    {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name, pattern, IR_node.variable_name + "_pad",
                tuple(kernel), tuple(stride), dilate, num_filter, num_group,
                not use_bias, layout, IR_node.name)

        return code

    def emit_Conv(self, IR_node):
        return self._emit_convolution(IR_node, "Convolution")

    def emit_DepthwiseConv(self, IR_node):
        return self._emit_convolution(IR_node, "DepthwiseConv")

    def emit_ConvTranspose(self, IR_node):
        return self._emit_convolution(IR_node, "Deconvolution")

    def emit_DataInput(self, IR_node):
        shape = list()
        shape.extend(IR_node.IR_layer.attr["shape"].list.i)

        code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name,
                                                  IR_node.name)
        return code

    # Add LeakyReLU Elu(slope not support)
    def emit_Activation(self, IR_node, act_type):

        act_type = act_type
        func_name = ""

        if act_type == "elu":
            func_name = "LeakyReLU"
        else:
            func_name = "Activation"

        code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format(
            IR_node.variable_name, func_name,
            self.parent_variable_name(IR_node), act_type, IR_node.name)

        return code

    def emit_BatchNorm(self, IR_node):
        IR_node_after = self.IR_graph.get_son(IR_node.name, [0])
        if IR_node_after.type == 'Scale':
            if self.weight_loaded:
                weight_dict = self.weights[IR_node.name]
                weight_dict_scale = self.weights[IR_node_after.name]

            # axis = IR_node.IR_layer.attr["axis"].i
            axis = 1
            eps = IR_node.IR_layer.attr["epsilon"].f
            momentum = IR_node.IR_layer.attr["momentum"].f

            fix_gamma = not IR_node.IR_layer.attr["scale"].b

            if self.weight_loaded:
                if not fix_gamma:
                    #     self.output_weights[IR_node.name + "_gamma"] = np.multiply(weight_dict['scale'], weight_dict_scale['scale'])
                    # self.output_weights[IR_node.name + "_beta"] = np.multiply(weight_dict['bias'], weight_dict_scale['scale']) + weight_dict_scale['bias']
                    self.output_weights[IR_node.name +
                                        "_gamma"] = weight_dict['scale']
                self.output_weights[IR_node.name +
                                    "_beta"] = weight_dict['bias']

            # not supported yet
            use_global_stats = "False"
            if self.weight_loaded:
                self.output_weights[IR_node.name +
                                    "_moving_var"] = weight_dict['var']
                self.output_weights[IR_node.name +
                                    "_moving_mean"] = weight_dict['mean']

            code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name)

            return code

        else:
            if self.weight_loaded:
                weight_dict = self.weights[IR_node.name]

            # axis = IR_node.IR_layer.attr["axis"].i
            axis = 1
            eps = IR_node.IR_layer.attr["epsilon"].f
            momentum = IR_node.IR_layer.attr["momentum"].f

            fix_gamma = not IR_node.IR_layer.attr["scale"].b

            if self.weight_loaded:
                if not fix_gamma:
                    self.output_weights[IR_node.name +
                                        "_gamma"] = weight_dict['scale']
                self.output_weights[IR_node.name +
                                    "_beta"] = weight_dict['bias']

            # not supported yet
            use_global_stats = "False"
            if self.weight_loaded:
                self.output_weights[IR_node.name +
                                    "_moving_var"] = weight_dict['var']
                self.output_weights[IR_node.name +
                                    "_moving_mean"] = weight_dict['mean']

            code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name)

            return code

    def emit_Scale(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]

        # axis = IR_node.IR_layer.attr["axis"].i
        axis = 1
        eps = 0.0
        momentum = 0.0

        fix_gamma = not IR_node.IR_layer.attr["scale"].b

        if self.weight_loaded:
            if not fix_gamma:
                self.output_weights[IR_node.name +
                                    "_gamma"] = weight_dict['scale']
            self.output_weights[IR_node.name + "_beta"] = weight_dict['bias']

        # not supported yet
        use_global_stats = "False"
        if self.weight_loaded:
            self.output_weights[IR_node.name +
                                "_moving_var"] = weight_dict['scale_var']
            self.output_weights[IR_node.name +
                                "_moving_mean"] = weight_dict['scale_mean']

        code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axis,
            eps, momentum, fix_gamma, use_global_stats, IR_node.name)

        return code

    def emit_Pool(self, IR_node):

        global_pool = IR_node.IR_layer.attr["global_pooling"].b

        kernel = list()
        if global_pool:
            kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2)
        else:
            for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]:
                kernel.append(e)

        pool_type = IR_node.get_attr('pooling_type').lower()

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(
                IR_node.IR_layer.attr["pads"].list.i)
        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                global_pool, tuple(kernel), pool_type, tuple(stride),
                tuple(pad), IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, pool_type == "max")
            code += "\n    {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format(
                IR_node.variable_name, IR_node.variable_name + "_pad",
                global_pool, tuple(kernel), pool_type, tuple(stride),
                IR_node.name)

        return code

    def emit_SoftmaxOutput(self, IR_node):

        code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node))

        return code

    def emit_Softmax(self, IR_node):

        code = ""

        if len(IR_node.out_edges) == 0:
            code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node))
        else:
            axis = IR_node.IR_layer.attr["dim"].i
            code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                axis, IR_node.name)

        return code

    def emit_Squeeze(self, IR_node):
        return self.emit_Flatten(IR_node)

    # def emit_ConvTranspose(self, IR_node):
    #     if self.weight_loaded:
    #         weight_dict = self.weights[IR_node.name]
    #         weights = weight_dict['weights']

    #     dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

    #     kernel = list()
    #     for idx in range(0, dim):
    #         kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

    #     stride = list()
    #     for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
    #         stride.append(e)

    #     dilate = list()
    #     for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
    #         dilate.append(e)
    #     dilate = ', '.join('%s' % i for i in dilate)

    #     defuse_pad = False
    #     pad = list()
    #     if "pads" in IR_node.IR_layer.attr:
    #         output_shape = list()
    #         for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
    #             output_shape.append(e.size)

    #         # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad")
    #         defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i)
    #     pad = ', '.join('%s' % i for i in pad)

    #     kernel = ', '.join('%s' % i for i in kernel)
    #     stride = ', '.join('%s' % i for i in stride)

    #     num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
    #     no_bias = not IR_node.IR_layer.attr["use_bias"].b
    #     if not no_bias and self.weight_loaded:
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias']

    #     # layout = IR_node.IR_layer.attr["data_format"].s
    #     if dim == 1:
    #         layout = 'NCW'
    #     elif dim == 2:
    #         layout = 'NCHW'
    #     elif dim == 3:
    #         layout = 'NCDHW'

    #     if self.weight_loaded:
    #         # if layout not in MXNetEmitter.channels_last:
    #         weights = MXNetEmitter.transpose(weights, dim)
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights

    #     code = ""
    #     if not defuse_pad:
    #         code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name),
    #                 IR_node.replace_scope(IR_node.in_edges[0]),
    #                 kernel,
    #                 stride,
    #                 dilate,
    #                 pad,
    #                 num_filter,
    #                 no_bias,
    #                 layout,
    #                 IR_node.replace_scope(IR_node.name))
    #     else:
    #         code = self.set_pad(IR_node, code, pad)
    #         code += "\n    {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name))

    #     return code

    def emit_Embedding(self, IR_node):

        input_dim = IR_node.IR_layer.attr["input_dim"].i
        output_dim = IR_node.IR_layer.attr["output_dim"].i
        dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type,
                                           "float32")

        weight_dict = self.weights[IR_node.name]

        if self.weight_loaded:
            self.output_weights[IR_node.name +
                                "_weight"] = weight_dict['weights']

        code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = '{}', name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            input_dim, output_dim, dtype, IR_node.name)

        return code

    def emit_LeakyRelu(self, IR_node):
        alpha = IR_node.IR_layer.attr['alpha'].f
        code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), alpha,
            IR_node.name)
        return code

    def emit_PRelu(self, IR_node):
        slope = IR_node.get_attr('gamma')
        code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, act_type = '{}', name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), slope,
            'prelu', IR_node.name)
        return code

    def emit_Elu(self, IR_node):
        alpha = IR_node.IR_layer.attr['alpha'].f
        code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, act_type = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), alpha,
            'elu', IR_node.name)
        return code

    def emit_Dropout(self, IR_node):
        p = IR_node.IR_layer.attr["keep_prob"].f
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode(
        ) if 'mode' in IR_node.layer.attr else 'training'
        code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), p, mode,
            IR_node.name)

        return code

    # reverse cannot support yet
    def emit_Reshape(self, IR_node):
        shape = list()
        for e in IR_node.IR_layer.attr["shape"].list.i:
            shape.append(e)
        shape = ', '.join('%s' % i for i in shape)
        reverse = False

        code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), shape,
            reverse, IR_node.name)

        return code

    def emit_Flatten(self, IR_node):
        # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node))
        code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)

        return code

    @staticmethod
    def _convert_axis(IR_node, axis):
        ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1

    def emit_Concat(self, IR_node):
        dim = MXNetEmitter._convert_axis(IR_node,
                                         IR_node.IR_layer.attr["axis"].i)
        code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format(
            IR_node.variable_name, ', '.join(
                self.parent_variable_name(IR_node, [idx])
                for idx in range(len(IR_node.in_edges))), dim, IR_node.name)

        return code

    def emit_Cast(self, IR_node):
        dtype = IR_node.IR_layer.attr["dtype"].type
        code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), dtype,
            IR_node.name)

        return code

    def emit_Expand_dims(self, IR_node):
        axis = IR_node.IR_layer.attr["axis"].i
        code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axis,
            IR_node.name)

        return code

    def emit_Pad(self, IR_node):
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode()
        pad_width = list()
        pad_width.extend([0] * 4)
        padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1]
        for padding_pair in padding:
            pad_width.extend(padding_pair)

        pad_width = ', '.join('%s' % i for i in pad_width)

        code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), mode,
            pad_width, IR_node.name)

        return code

    def emit_Add(self, IR_node):
        code = "{:<15} = mx.sym.broadcast_add({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))

        return code

    def emit_Mul(self, IR_node):

        code = "{:<15} = mx.sym.broadcast_mul({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))

        return code

    def emit_ReduceMean(self, IR_node):
        axes = IR_node.layer.attr['axes'].list.i[:]
        axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes)

        code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axes,
            IR_node.layer.attr['keepdims'].b)

        return code

    def emit_LRN(self, IR_node):
        code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f,
            IR_node.layer.attr['k'].f, IR_node.layer.attr['size'].i * 2 - 1,
            IR_node.name)

        return code

    def emit_Constant(self, IR_node):
        # save the constant into weight dict
        if IR_node.get_attr('value'):
            value = IR_node.get_attr('value')
        else:
            value = self.weights[IR_node.name]['value']

        if not isinstance(value, list):
            self.output_weights[IR_node.name + '_weight'] = [
                value
            ]  # mxnet's bug, it does not surpport scalar weight.
            code = "{:<15} = mx.sym.var(name = '{}', shape=(1,))".format(
                IR_node.variable_name, IR_node.name + '_weight')
        else:
            shape = np.array(value).shape
            self.output_weights[IR_node.name + '_weight'] = value

            code = "{:<15} = mx.sym.var(name = '{}', shape={})".format(
                IR_node.variable_name, IR_node.name + '_weight', shape)

        return code

    def emit_Sub(self, IR_node):
        code = "{:<15} = mx.sym.broadcast_sub({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))

        return code

    def emit_Relu6(self, IR_node):
        codes = list()
        codes.append(self.emit_Activation(IR_node, 'relu'))
        old_name = IR_node.variable_name
        IR_node.real_name = IR_node.real_name + "_clip"
        codes.append(
            "{:<15} = mx.sym.clip({}, a_min=0, a_max=6, name='{}')".format(
                IR_node.real_variable_name, old_name, IR_node.real_name))

        return codes

    def emit_Slice(self, IR_node):

        starts = IR_node.get_attr('starts')
        starts = [starts[0], starts[-1]] + starts[1:-1]
        ends = IR_node.get_attr('ends')
        ends = [ends[0], ends[-1]] + ends[1:-1]
        ends = [i if i else None for i in ends]
        strides = IR_node.get_attr('strides')
        if strides:
            strides = [strides[0], strides[-1]] + strides[1:-1]

        code = "{:<15} = mx.sym.slice({}, begin={}, end={}, step={}, name='{}')".format(
            IR_node.real_variable_name, self.parent_variable_name(IR_node),
            starts, ends, strides, IR_node.name)
        return code

    def emit_Const(self, IR_node):
        pass

    def emit_Shape(self, IR_node):
        code = "{:<15} = mx.sym.var(init = mx.init.Constant({}.infer_shape({}={})[1][0]), name='{}')".format(
            IR_node.real_variable_name, self.parent_variable_name(IR_node),
            list(self.input_name_shape.keys())[0],
            list(self.input_name_shape.values())[0], IR_node.name)
        return code

    def emit_Pack(self, IR_node):
        pass

    def emit_Unsqueeze(self, IR_node):
        axis = IR_node.get_attr('axes')[0]
        code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), axis,
            IR_node.name)

        return code

    def emit_Unstack(self, IR_node):
        squeeze_axis = axis = IR_node.get_attr('axis')
        num = IR_node.get_attr('num')
        if num is None:
            args_str = ""
            for input_name in self.IR_graph.input_layers:
                if self.IR_graph.get_node(input_name).type != 'Const':
                    args_str += '{}={}, '.format(
                        self.IR_graph.get_node(input_name).real_variable_name,
                        self.data_input_shape[input_name])

            args_str = args_str[:-2]
            num_outputs = "{}.infer_shape({})[1][0][{}]".format(
                IR_node.variable_name, args_str, axis)
        else:
            num_outputs = num

        code = "{:<15} = mx.sym.split({}, num_outputs={}, axis={}, squeeze_axis={})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            num_outputs, axis, squeeze_axis)
        return code

    def emit_Fill(self, IR_node):
        value = IR_node.get_attr('value')
        code = "{:<15} = mx.sym.full({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), value)
        return code

    def emit_Split(self, IR_node):
        axis = IR_node.get_attr('axis')
        num_outputs = IR_node.get_attr('split')

        if isinstance(num_outputs, list):
            raise NotImplementedError()
        code = "{:<15} = mx.sym.split({}, num_outputs={}, axis={})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            num_outputs, axis)

        return code

    def emit_Sigmoid(self, IR_node):
        code = "{:<15} = mx.sym.sigmoid(data={}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_Tanh(self, IR_node):
        code = "{:<15} = mx.sym.tanh(data={}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_Maxmum(self, IR_node):
        code = "{:<15} = mx.sym.maxmum({}, {}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]), IR_node.name)
        return code

    def emit_Minimum(self, IR_node):
        code = "{:<15} = mx.sym.minimum({}, {}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]), IR_node.name)
        return code

    def emit_Scope(self, IR_node):
        import re
        pattern = IR_node.pattern

        if pattern not in self.naive_scope_pattern and re.sub(
                r'(_\d+)*$', '',
                IR_node.pattern) not in self.naive_scope_pattern:
            origi_pattern = re.sub(r'(_\d+)*$', '', IR_node.pattern)
            func = getattr(self, "_emit_" + origi_pattern)
            code = func(IR_node)
        else:
            code = "{:<15} = __{}({})".format(
                IR_node.real_variable_name, IR_node.pattern, ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges))
            self._gen_scope_code(IR_node)
        return code

    def _gen_scope_code(self, scope_node):
        def _get_weight_related_op_name(node):
            weight_related_ops = [
                'Constant', 'Conv', 'FullyConnected', 'BatchNorm'
            ]
            op_type = node.type
            if op_type in weight_related_ops:
                return op_type, node.name

        def _scope_func(params, code, return_var):
            code = """
    def __call__(self, {}):
{}
        return {}
    """.format(params, code, ', '.join(return_var))
            return code

        class_inits = dict()

        body_code = str()
        for node_name in scope_node.topology_list:
            node = self.IR_graph.get_node(node_name)
            node_type = node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(node)
                if line != None:
                    body_code += "        " + line + '\n'
                    inits = _get_weight_related_op_name(node)
                    if inits:
                        if class_inits.get(inits[0], None):
                            class_inits[inits[0]].append(inits[1])
                        else:
                            class_inits[inits[0]] = list([inits[1]])
            else:
                print("MXNetEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(node)

        # param_code does not need parameter slice.
        param_code = ', '.join('%s' %
                               self.IR_graph.get_node(s).real_variable_name
                               for s in scope_node.in_edges)
        function_code = _scope_func(param_code, body_code,
                                    scope_node.return_variables)

        return class_inits, function_code

    def _emit_gru_cell(self, IR_node):
        if not self.layers_codes.get(IR_node.pattern, None):
            class_inits, func_code = self._gen_scope_code(IR_node)
            variables, variable_codes, init_code, func_code = self.process_inits_func_code(
                class_inits, func_code)

            states = [
                self.IR_graph.get_node(s).real_variable_name
                for s in IR_node.in_edges
            ]
            states.pop(0)
            states_code = ', '.join(states)

            class_code = '''
class _{}(mx.rnn.BaseRNNCell):
    def __init__(self, {}):

{}

{}

            '''.format(IR_node.pattern, ', '.join(variables), init_code,
                       func_code)
            self.layers_codes[IR_node.pattern] = class_code

            if not hasattr(self, 'pattern_variables'):
                self.pattern_variables = {IR_node.pattern: variables}
            else:
                self.pattern_variables[IR_node.pattern] = variables

            code = variable_codes
            code.append("{:<15} = _{}({})({})".format(
                IR_node.real_variable_name, IR_node.pattern,
                ', '.join(variables), ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges)))
        else:
            code = "{:<15} = _{}({})({})".format(
                IR_node.real_variable_name, IR_node.pattern,
                ', '.join(self.pattern_variables[IR_node.pattern]), ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges))

        return code

    def _emit_h_zero(self, IR_node):
        code = "{:<15} = mx.sym.full((1, {}), {})".format(
            IR_node.variable_name, IR_node.get_attr('fill_size'),
            IR_node.get_attr('fill_value'))
        return code

    def _emit_lstm_cell(self, IR_node):

        if not self.layers_codes.get(IR_node.pattern, None):
            class_inits, func_code = self._gen_scope_code(IR_node)
            variables, variable_codes, init_code, func_code = self.process_inits_func_code(
                class_inits, func_code)

            states = [
                self.IR_graph.get_node(s).real_variable_name
                for s in IR_node.in_edges
            ]
            states.pop(0)
            states_code = ', '.join(states)

            class_code = '''
class _{}(mx.rnn.BaseRNNCell):
    def __init__(self, {}):

{}

{}

            '''.format(IR_node.pattern, ', '.join(variables), init_code,
                       func_code)
            self.layers_codes[IR_node.pattern] = class_code

            if not hasattr(self, 'pattern_variables'):
                self.pattern_variables = {IR_node.pattern: variables}
            else:
                self.pattern_variables[IR_node.pattern] = variables

            code = variable_codes
            code.append("{:<15} = _{}({})({})".format(
                IR_node.real_variable_name, IR_node.pattern,
                ', '.join(variables), ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges)))
        else:
            code = "{:<15} = _{}({})({})".format(
                IR_node.real_variable_name, IR_node.pattern,
                ', '.join(self.pattern_variables[IR_node.pattern]), ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges))

        return code

    def process_inits_func_code(self, class_inits, func_code):
        init_code = str()
        variables = list()
        variable_codes = list()
        for k, v in class_inits.items():
            if k == 'FullyConnected':
                for i, name in enumerate(class_inits[k]):
                    variable_name = self.IR_graph.get_node(name).variable_name
                    variables.append("W_" + variable_name)
                    variable_codes.append(
                        "W_{:<15} = mx.sym.var(name='{}_weight')".format(
                            variable_name, name))
                    init_code += "        self.W_{} = W_{}\n".format(
                        variable_name, variable_name)

                    if self.weight_loaded and self.weights[name].get(
                            'bias', None).any() != None:
                        variable_codes.append(
                            "B_{:<15} = mx.sym.var(name='{}_bias')".format(
                                variable_name, name))
                        variables.append("B_" + variable_name)
                        init_code += "        self.B_{} = B_{}\n".format(
                            variable_name, variable_name)
                        func_code = func_code.replace(
                            "name = '{}'".format(name),
                            "name = '{}', weight = self.W_{}, bias = self.B_{}"
                            .format(name, variable_name, variable_name))
                    else:
                        func_code = func_code.replace(
                            "name = '{}'".format(name),
                            "name = '{}', weight = self.W_{}".format(
                                name, variable_name))
            elif k == 'Constant':
                for name in class_inits[k]:
                    variable_name = self.IR_graph.get_node(
                        name.replace('_weight', '')).variable_name
                    variables.append(variable_name)
                    constant_line = self.emit_Constant(
                        self.IR_graph.get_node(name.replace('_weight', '')))
                    variable_codes.append("{:<15} = {}".format(
                        variable_name, '='.join(constant_line.split('=')[1:])))
                    init_code += "        self.{} = {}\n".format(
                        variable_name, variable_name)
                    func_code = func_code.replace(
                        constant_line,
                        constant_line.split('=')[0] + ' = self.' +
                        constant_line.split('=')[0])
            else:
                raise NotImplementedError

        return variables, variable_codes, init_code, func_code
Exemplo n.º 8
0
class CntkEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16 : "np.float16",
        graph_pb2.DT_FLOAT32 : "np.float32",
        graph_pb2.DT_FLOAT64 : "np.float64",
        graph_pb2.DT_INT16 : "np.int16",
        graph_pb2.DT_INT32 : "np.int32",
        graph_pb2.DT_INT64 : "np.int64",
        graph_pb2.DT_UINT8 : "np.uint8",
        graph_pb2.DT_UINT16 : "np.uint16"
    }


    def __init__(self, model):
        from six import string_types as _string_types
        super(CntkEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CntkEmitter, self)._build()


    @property
    def header_code(self):
        return """import numpy as np
import cntk
from cntk import ops, layers
from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

"""


    def gen_code(self, phase = 'test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CntkEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(1, "return {}".format(
            ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.body_code


    @staticmethod
    def _shapeToStr(shapes):
        new_shape = filter(lambda x:x >- 1, [dim.size for dim in shapes.dim])
        return ', '.join('%s' % i for i in new_shape)


    @staticmethod
    def is_valid_padding(auto_pad, pads):
        """
        different from utils.is_valid_padding
        """
        if auto_pad:
            if auto_pad == 'VALID':
                return True
            elif auto_pad.startswith('SAME'):
                return False
            else:
                raise ValueError("Unknown padding type{}.".format(auto_pad))

        else:
            lens = len(pads)
            assert lens % 2 == 0
            for i in range(0, lens // 2):
                if pads[i] != 0:
                    return False
            return True

    @staticmethod
    def is_ceil_mode(pads):
        lens = len(pads)
        for i in range(lens // 2 + 1, lens - 1):
            if pads[i] == pads[i - lens // 2]:
                return False
        else:
            return True


    def emit_Conv(self, IR_node):
        if self.weight_loaded:
            self.used_layers.add(IR_node.type)
            dim = len(IR_node.get_attr('strides')) - 2
            padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads'))
            padding = [False] + [padding] * dim
            self.add_body(1, "{:<15} = convolution({}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                tuple(IR_node.get_attr('strides')[1:-1]),
                padding,
                tuple(IR_node.get_attr('dilations', [1])),
                IR_node.get_attr('group', 1),
                IR_node.name))

        else:
            self.add_body(1, "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n".format(
                IR_node.variable_name,
                IR_node.name,
                IR_node.get_attr('kernel_shape')[-1],
                ', '.join('%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[:-2]),
                ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]),
                IR_node.get_attr('auto_pad') != 'VALID',
                IR_node.get_attr('use_bias'),
                self.parent_variable_name(IR_node)))


    def emit_Pool(self, IR_node):
        input_node = self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name
        if IR_node.layer.attr['global_pooling'].b:
            self.used_layers.add('GlobalPooling')
            self.add_body(1, "{:<15} = global_pooling({}, '{}', name = '{}')".format(
                IR_node.variable_name,
                input_node,
                IR_node.get_attr('pooling_type'),
                IR_node.name))
        else:
            for e in IR_node.get_attr('dilations', []):
                assert e == 1

            dim = len(IR_node.get_attr('kernel_shape')) - 2
            padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads'))
            padding = [False] + [padding] * dim
            ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads'))

            pooling_type = IR_node.get_attr('pooling_type')
            if pooling_type == 'MAX':
                pooling_type = cntk.MAX_POOLING
            elif pooling_type == 'AVG':
                pooling_type = cntk.AVG_POOLING
            else:
                raise ValueError

            if self.weight_loaded:
                self.used_layers.add(IR_node.type)
                self.add_body(1, "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})".format(
                    IR_node.variable_name,
                    input_node,
                    pooling_type,
                    tuple(IR_node.get_attr('kernel_shape')[1:-1]),
                    tuple(IR_node.get_attr('strides')[1:-1]),
                    padding,
                    ceil_out_dim
                    ))

            else:
                raise NotImplementedError


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)


    def emit_DataInput(self, IR_node):
        shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape)
        dtype_str = ", dtype = {}".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else ""
        self.add_body(1, "{:<15} = cntk.input_variable(({},) {}, name='{}')".format(
            IR_node.variable_name,
            shape_str,
            dtype_str,
            IR_node.name))


    def emit_Dropout(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        if self.phase == 'train':
            self.add_body(1, "{:<15} = Dropout({}, name = '{}')({})".format(
                IR_node.variable_name,
                1 - IR_node.get_attr('keep_prob'),
                IR_node.name,
                parent.real_variable_name))
        else:
            IR_node.real_name = parent.real_name


    def emit_FullyConnected(self, IR_node):
        input_node = self.parent_variable_name(IR_node)
        if self.weight_loaded:
            self.used_layers.add(IR_node.type)
            self.add_body(1, "{:<15} = dense({}, name = '{}')".format(
                IR_node.variable_name,
                input_node,
                IR_node.name))

        else:
            self.add_body(1, "{:<15} = Dense({}, bias = {}, name = '{}')({})".format(
                IR_node.variable_name,
                IR_node.layer.attr["units"].i,
                IR_node.layer.attr['use_bias'].b,
                IR_node.name,
                input_node))


    def emit_Flatten(self, IR_node):
        self.add_body(1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.name))


    def emit_Reshape(self, IR_node):
        self.add_body(1, "{:<15} = cntk.reshape({}, shape={}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            tuple(IR_node.get_attr('shape')),
            IR_node.name))


    def _emit_activation(self, IR_node, op_name):
        self.add_body(1, "{:<15} = layers.Activation(activation = {}, name = '{}')({})".format(
            IR_node.variable_name,
            op_name,
            IR_node.name,
            self.parent_variable_name(IR_node)))


    def emit_Tanh(self, IR_node):
        self._emit_activation(IR_node, 'ops.tanh')


    def emit_Relu(self, IR_node):
        self._emit_activation(IR_node, 'ops.relu')


    def emit_Softmax(self, IR_node):
        self._emit_activation(IR_node, 'ops.softmax')


    def emit_Sigmoid(self, IR_node):
        self._emit_activation(IR_node, 'ops.sigmoid')


    def emit_RNNs(self, IR_node, func):
        assert False


    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")


    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")


    def emit_Add(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' + '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(
                IR_node.variable_name,
                inputs))

    def emit_Sub(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' - '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(
                IR_node.variable_name,
                inputs))


    def emit_Mul(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' * '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges)
            self.add_body(1, "{:<15} = {}".format(
                IR_node.variable_name,
                inputs))


    def emit_Constant(self, IR_node):
        self.add_body(1, "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".format(
            IR_node.variable_name, IR_node.name
        ))


    def emit_Concat(self, IR_node):
        inputs = ', '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges)
        self.add_body(1, "{:<15} = cntk.splice({}, axis={}, name='{}')".format(
            IR_node.variable_name,
            inputs,
            IR_node.get_attr('axis') - 1,
            IR_node.name))


    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(1, "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('epsilon'),
            IR_node.name))


    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode') == 'constant':
            mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(IR_node.get_attr('constant_values', 0.0))
        elif IR_node.get_attr('mode') == 'reflect':
            mode = 'mode = ops.REFLECT_PAD'
        elif IR_node.get_attr('mode') == 'SYMMETRIC':
            mode = 'mode = ops.SYMMETRIC_PAD'
        else:
            assert False

        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:]

        self.add_body(1, "{:<15} = ops.pad({}, pattern={}, {})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            padding,
            mode))


    def emit_Squeeze(self, IR_node):
        IR_node.real_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name


    def emit_Log(self, IR_node):
        self.add_body(1, "{:<15} = _cntk.log({}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.name))


    def emit_Exp(self, IR_node):
        self.add_body(1, "{:<15} = _cntk.exp({}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.name))


    def emit_Reciprocal(self, IR_node):
        self.add_body(1, "{:<15} = _cntk.reciprocal({}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.name))


    def emit_ReduceMean(self, IR_node):
        self.add_body(1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')),
            IR_node.name))


    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(1, "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.layer.attr['size'].i,
            IR_node.layer.attr['alpha'].f,
            IR_node.layer.attr['beta'].f,
            IR_node.name))


    def _layer_LRN(self):
        self.add_body(0, """
def lrn(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = BlockApiSetup.lrn(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")


    def _layer_FullyConnected(self):
        self.add_body(0, """
def dense(input, name, **kwargs):
    w = __weights_dict[name]['weights']
    b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None
    return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input)
""")


    def _layer_Conv(self):
        self.add_body(0, """
def convolution(input, name, **kwargs):
    dim = __weights_dict[name]['weights'].ndim

    weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2)))
    w = cntk.Parameter(init=weight, name=name + '_weight')

    input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2)))

    layer = ops.convolution(w, input, **kwargs)
    if 'bias' in __weights_dict[name]:
        bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2))
        b = cntk.Parameter(init=bias, name=name + '_bias')
        layer = layer + b
    layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0])
    return layer
""")


    def _layer_Pool(self):
        self.add_body(0, """
def pooling(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = ops.pooling(input, **kwargs)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")


    def _layer_GlobalPooling(self):
        self.add_body(0, """
def global_pooling(input, type, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")


    def _layer_BatchNorm(self):
        self.add_body(0, """
def batch_normalization(input, name, epsilon, **kwargs):
    mean = cntk.Parameter(init = __weights_dict[name]['mean'],
        name = name + "_mean")
    var = cntk.Parameter(init = __weights_dict[name]['var'],
        name = name + "_var")

    layer = (input - mean) / cntk.sqrt(var + epsilon)
    if 'scale' in __weights_dict[name]:
        scale = cntk.Parameter(init = __weights_dict[name]['scale'],
            name = name + "_scale")
        layer = scale * layer

    if 'bias' in __weights_dict[name]:
        bias = cntk.Parameter(init = __weights_dict[name]['bias'],
            name = name + "_bias")
        layer = layer + bias

    return layer
""")
Exemplo n.º 9
0
class PytorchEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "float16",
        graph_pb2.DT_FLOAT32: "float32",
        graph_pb2.DT_FLOAT64: "float64",
        graph_pb2.DT_INT16: "int16",
        graph_pb2.DT_INT32: "int32",
        graph_pb2.DT_INT64: "int64",
        graph_pb2.DT_UINT8: "uint8",
        graph_pb2.DT_UINT16: "uint16"
    }

    # Base Functions
    def __init__(self, model):
        super(PytorchEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            weight_path = model[1]

        self.init_code = str()
        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()
        self._load_weights(weight_path)

    def run(self, dstNetworkPath, dstWeightPath=None, phase='test'):
        super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)

    def add_init(self, indent, codes):
        if isinstance(codes, _string_types):
            codes = [codes]
        for code in codes:
            self.init_code += ("    " * indent) + code + '\n'

    @property
    def header_code(self):
        return """import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict

class KitModel(nn.Module):
"""

    def gen_code(self, phase):
        self.add_init(
            1, """
    def __init__(self, weight_file):
        super(KitModel, self).__init__()
        global __weights_dict
        __weights_dict = load_weights(weight_file)
""")

        self.add_body(1, "def forward(self, x):")

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)

            else:
                print("Pytorch Emitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(
            2, "return {}".format(','.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
            ])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.header_code + '\n' + self.init_code + '\n' + self.body_code

    def _defuse_padding(self, IR_node, extra_str=""):
        input_node = self.parent_variable_name(IR_node)
        if IR_node.get_attr('auto_pad') == 'VALID':
            return input_node

        if is_valid_padding(IR_node.get_attr("pads")) == True:
            return input_node

        padding = self._convert_padding(IR_node)
        input_node = IR_node.variable_name + '_pad'
        self.add_body(
            2, "{:<15} = F.pad({}, {}{})".format(
                input_node, self.parent_variable_name(IR_node), padding,
                extra_str))

        return input_node

    def emit_Conv(self, IR_node):
        self.used_layers.add('Conv')

        dim = len(IR_node.get_attr('strides')) - 2

        in_channels = IR_node.get_attr('kernel_shape')[-2]
        filter = IR_node.get_attr('kernel_shape')[-1]
        kernel = IR_node.get_attr('kernel_shape')[:-2]
        strides = IR_node.get_attr('strides')[1:-1]

        if IR_node.type == 'DepthwiseConv':
            group = in_channels
            filter *= group

        else:
            group = IR_node.get_attr('group', 1)

        self.add_init(
            2,
            "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})"
            .format(
                IR_node.variable_name,
                dim,
                IR_node.name,
                in_channels,
                filter,
                tuple(kernel),
                tuple(strides),
                # padding,
                group,
                IR_node.get_attr('use_bias')))

        input_node = self._defuse_padding(IR_node)
        self.add_body(
            2,
            "{:<15} = self.{}({})".format(IR_node.variable_name,
                                          IR_node.variable_name, input_node))

        if self.weight_loaded:
            if IR_node.type == 'DepthwiseConv':
                self.weights_dict[IR_node.name]['weights'] = np.swapaxes(
                    self.weights_dict[IR_node.name]['weights'], -1, -2)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim + 1, dim] + list(range(0, dim)))

    @staticmethod
    def is_ceil_mode(pads):
        lens = len(pads)
        for i in range(lens // 2 + 1, lens - 1):
            if pads[i] == pads[i - lens // 2]:
                return False
        else:
            return True

    def emit_Pool(self, IR_node):
        dim = len(IR_node.get_attr('strides')) - 2

        if IR_node.get_attr('pooling_type') == "MAX":
            pool_name = "max_pool{}d".format(dim)
            # exstr = ", value=float('-Inf')"
        elif IR_node.get_attr('pooling_type') == "AVG":
            pool_name = "avg_pool{}d".format(dim)
            # exstr = ""
        else:
            raise ValueError()

        if IR_node.layer.attr['global_pooling'].b:
            self.add_body(
                2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".
                format(IR_node.variable_name, pool_name,
                       self.parent_variable_name(IR_node),
                       self.parent_variable_name(IR_node)))

        else:
            for e in IR_node.get_attr('dilations', []):
                assert e == 1

            pool_size = IR_node.get_attr('kernel_shape')[1:-1]
            strides = IR_node.get_attr('strides')[1:-1]

            padding = IR_node.get_attr('pads')[1:dim]
            ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads'))

            # input_node = self._defuse_padding(IR_node, exstr)
            self.add_body(
                2,
                "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={})"
                .format(IR_node.variable_name, pool_name,
                        self.parent_variable_name(IR_node), tuple(pool_size),
                        tuple(strides), tuple(padding), ceil_mode))

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_DataInput(self, IR_node):
        # Ignore it in Pytorch
        IR_node.real_name = 'x'

    def emit_Dropout(self, IR_node):
        self.add_body(
            2,
            "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.layer.attr["keep_prob"].f))

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [
                i.size for i in
                parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
            ] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], original_dims)

    def emit_FullyConnected(self, IR_node):
        self.used_layers.add(IR_node.type)
        in_features = 1
        for i in self.IR_graph.get_parent(
                IR_node.name,
            [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]:
            in_features *= i.size

        self.add_init(
            2,
            "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})"
            .format(IR_node.variable_name, IR_node.name, in_features,
                    IR_node.layer.attr["units"].i,
                    IR_node.IR_layer.attr["use_bias"].b))

        input_node = self.parent_variable_name(IR_node)
        if len(
                self.IR_graph.get_parent(
                    IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2:
            input_node = "{}.view({}.size(0), -1)".format(
                input_node, input_node)
        self.add_body(
            2,
            "{:<15} = self.{}({})".format(IR_node.variable_name,
                                          IR_node.variable_name, input_node))

        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'], (1, 0))

    def emit_Flatten(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name
        self.add_body(
            2,
            "{:<15} = {}.view({}.size(0), -1)".format(IR_node.variable_name,
                                                      parent, parent))

    def emit_Reshape(self, IR_node):
        shape_list = IR_node.get_attr('shape')
        shape_str = ','.join([str(int(i)) for i in shape_list])
        self.add_body(
            2, "{:<15} = torch.reshape(input = {}, shape = ({}))".format(
                IR_node.variable_name,
                self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name,
                shape_str))

    def emit_Tanh(self, IR_node):
        self.add_body(
            2, "{:<15} = F.tanh({})".format(
                IR_node.variable_name,
                self.IR_graph.get_parent(IR_node.name,
                                         [0]).real_variable_name))

    def emit_Relu(self, IR_node):
        self.add_body(
            2, "{:<15} = F.relu({})".format(
                IR_node.variable_name,
                self.IR_graph.get_parent(IR_node.name,
                                         [0]).real_variable_name))

    def emit_Relu6(self, IR_node):
        self.add_body(
            2, "{:<15} = F.relu6({})".format(
                IR_node.variable_name,
                self.IR_graph.get_parent(IR_node.name,
                                         [0]).real_variable_name))

    def emit_Softmax(self, IR_node):
        self.add_body(
            2, "{:<15} = F.softmax({})".format(
                IR_node.variable_name,
                self.IR_graph.get_parent(IR_node.name,
                                         [0]).real_variable_name))

    def emit_Sigmoid(self, IR_node):
        code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format(
            IR_node.variable_name, IR_node.name,
            self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)
        return code

    def emit_Embedding(self, IR_node):
        raise NotImplementedError()
        ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
            IR_node.name, IR_node.IR_layer.attr['input_dim'].i,
            IR_node.IR_layer.attr['output_dim'].i,
            IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0])

        return ret

    def emit_RNNs(self, IR_node, func):
        raise NotImplementedError()
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                IR_node.IR_layer.attr['dropout'].f,
                IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""

        code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format(
            IR_node.name, func, IR_node.IR_layer.attr['units'].i,
            IR_node.IR_layer.attr['use_bias'].b, dropout_str,
            IR_node.in_edges[0])

        return code

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Add(self, IR_node):
        self.add_body(
            2, "{:<15} = {}".format(
                IR_node.variable_name,
                ' + '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def emit_Sub(self, IR_node):
        self.add_body(
            2, "{:<15} = {}".format(
                IR_node.variable_name,
                ' - '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def emit_Mul(self, IR_node):
        self.add_body(
            2, "{:<15} = {}".format(
                IR_node.variable_name,
                ' * '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def emit_Constant(self, IR_node):
        self.add_init(
            2,
            "self.{:<15} = torch.autograd.Variable(torch.Tensor(__weights_dict['{}']['value']), requires_grad=False)"
            .format(IR_node.variable_name, IR_node.name))

        # self.add_init(2, "self.{:<15} = torch.from_numpy(__weights_dict['{}']['value'])".format(
        #     IR_node.variable_name,
        #     IR_node.name))
        IR_node.real_name = "self." + IR_node.variable_name

    def _convert_axis(self, IR_node, axis):
        ndim = len(
            self.IR_graph.get_parent(IR_node.name,
                                     [0]).get_attr('_output_shapes')[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1

    def emit_Concat(self, IR_node):
        axis = self._convert_axis(IR_node, IR_node.get_attr('axis'))
        self.add_body(
            2, "{:<15} = torch.cat(({}), {})".format(
                IR_node.variable_name,
                ', '.join(
                    self.IR_graph.get_node(s).real_variable_name
                    for s in IR_node.in_edges),
                axis,
            ))

    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2

        self.add_init(
            2,
            "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})"
            .format(
                IR_node.variable_name,
                dim,
                IR_node.name,
                IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1].
                size,
                IR_node.layer.attr['epsilon'].f,
                IR_node.layer.attr['momentum'].f,
            ))

        self.add_body(
            2,
            "{:<15} = self.{}({})".format(IR_node.variable_name,
                                          IR_node.variable_name,
                                          self.parent_variable_name(IR_node)))

    def emit_Squeeze(self, IR_node):
        self.add_body(
            2, "{:<15} = torch.squeeze({})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    @staticmethod
    def _convert_padding(IR_node):
        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:-1]
        new_padding = []
        for pad in padding:
            new_padding.insert(0, pad)
        return tuple(np.array(new_padding).reshape(-1).tolist())

    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode') == 'constant':
            mode = "mode = 'constant', value = {}".format(0)
        elif IR_node.get_attr('mode') == 'reflect':
            mode = "mode = 'reflect'"
        elif IR_node.get_attr('mode') == 'SYMMETRIC':
            mode = "mode = 'replicate'"
        else:
            assert False

        padding = self._convert_padding(IR_node)
        self.add_body(
            2, "{:<15} = F.pad({}, {}, {})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                padding, mode))

    def emit_ReduceMean(self, IR_node):
        axes = [
            self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes')
        ]
        input_node = self.parent_variable_name(IR_node)
        for axis in sorted(axes, reverse=True):
            self.add_body(
                2, "{:<15} = torch.mean({}, {}, {})".format(
                    IR_node.variable_name, input_node, axis,
                    IR_node.get_attr("keepdims")))
            input_node = IR_node.variable_name

    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            2,
            "{:<15} = self.LRN(size = {}, alpha = {}, beta = {})({})".format(
                IR_node.variable_name, IR_node.layer.attr['size'].i * 2 - 1,
                IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f,
                self.parent_variable_name(IR_node)))

    def emit_DepthwiseConv(self, IR_node):
        self.emit_Conv(IR_node)

    def emit_Slice(self, IR_node):
        starts = IR_node.get_attr('starts')
        starts = [starts[0], starts[-1]] + starts[1:-1]
        ends = IR_node.get_attr('ends')
        ends = [ends[0], ends[-1]] + ends[1:-1]
        extra_str = ""
        for idx, _ in enumerate(starts):
            if idx:
                extra_str += ", "
            extra_str += "{}:".format(starts[idx])
            if ends[idx]:
                extra_str += "{}".format(ends[idx])

        self.add_body(
            2, "{:<15} = {}[{}]".format(IR_node.variable_name,
                                        self.parent_variable_name(IR_node),
                                        extra_str))

    def _layer_Conv(self):
        self.add_body(
            0, """
    @staticmethod
    def __conv(dim, name, **kwargs):
        if   dim == 1:  layer = nn.Conv1d(**kwargs)
        elif dim == 2:  layer = nn.Conv2d(**kwargs)
        elif dim == 3:  layer = nn.Conv3d(**kwargs)
        else:           raise NotImplementedError()

        layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        return layer""")

    def _layer_FullyConnected(self):
        self.add_body(
            0, """
    @staticmethod
    def __dense(name, **kwargs):
        layer = nn.Linear(**kwargs)
        layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        return layer""")

    def _layer_BatchNorm(self):
        self.add_body(
            0, """
    @staticmethod
    def __batch_normalization(dim, name, **kwargs):
        if   dim == 1:  layer = nn.BatchNorm1d(**kwargs)
        elif dim == 2:  layer = nn.BatchNorm2d(**kwargs)
        elif dim == 3:  layer = nn.BatchNorm3d(**kwargs)
        else:           raise NotImplementedError()

        if 'scale' in __weights_dict[name]:
            layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale']))
        else:
            layer.weight.data.fill_(1)

        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        else:
            layer.bias.data.fill_(0)

        layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean']))
        layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var']))
        return layer""")

    def _layer_LRN(self):
        self.add_body(
            0, """
    class LRN(nn.Module):
        def __init__(self, size=1, alpha=1.0, beta=0.75, ACROSS_CHANNELS=False):
            super(KitModel.LRN, self).__init__()
            self.ACROSS_CHANNELS = ACROSS_CHANNELS
            if self.ACROSS_CHANNELS:
                self.average=nn.AvgPool3d(kernel_size=(size, 1, 1),
                        stride=1,
                        padding=(int((size-1.0)/2), 0, 0))
            else:
                self.average=nn.AvgPool2d(kernel_size=size,
                        stride=1,
                        padding=int((size-1.0)/2))
            self.alpha = alpha
            self.beta = beta

        def forward(self, x):
            if self.ACROSS_CHANNELS:
                div = x.pow(2).unsqueeze(1)
                div = self.average(div).squeeze(1)
                div = div.mul(self.alpha).add(1.0).pow(self.beta)
            else:
                div = x.pow(2)
                div = self.average(div)
                div = div.mul(self.alpha).add(1.0).pow(self.beta)
            x = x.div(div)
            return x""")
Exemplo n.º 10
0
class CntkEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "np.float16",
        graph_pb2.DT_FLOAT32: "np.float32",
        graph_pb2.DT_FLOAT64: "np.float64",
        graph_pb2.DT_INT16: "np.float16",  # Cntk does not support Int.
        graph_pb2.DT_INT32: "np.float32",  # Cntk does not support Int.
        graph_pb2.DT_INT64: "np.float64",  # Cntk does not support Int.
        graph_pb2.DT_UINT8: "np.uint8",
        graph_pb2.DT_UINT16: "np.uint16"
    }

    naive_scope_pattern = ['gru_cell', 'lstm_cell']

    def __init__(self, model):
        from six import string_types as _string_types
        super(CntkEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CntkEmitter, self)._build()
        self.yolo_parameter = []
        folder = Folder(self.IR_graph, self.weights_dict)
        folder.fold()

    @property
    def header_code(self):
        return """import numpy as np
import cntk
from cntk import ops, layers
from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file, allow_pickle=True).item()
    except:
        weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

"""

    def gen_code(self, phase='test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)
                if line:
                    self.add_body(1, line)
            else:
                print("CntkEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(
            1, "return {}".format(','.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
            ])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        self.add_body(0, "")
        for code in self.layers_codes.values():
            self.add_body(0, code)

        return self.body_code

    @staticmethod
    def _shapeToStr(shapes):
        new_shape = filter(lambda x: x > -1, [dim.size for dim in shapes.dim])
        return ', '.join('%s' % i for i in new_shape)

    @staticmethod
    def is_valid_padding(auto_pad, pads):
        """
        different from utils.is_valid_padding
        """
        if auto_pad:
            if auto_pad == 'VALID':
                return True
            elif auto_pad.startswith('SAME'):
                return False
            else:
                raise ValueError("Unknown padding type{}.".format(auto_pad))

        else:
            lens = len(pads)
            assert lens % 2 == 0
            for i in range(0, lens // 2):
                if pads[i] != 0:
                    return False
            return True

    @staticmethod
    def is_ceil_mode(pads):
        lens = len(pads)
        for i in range(lens // 2 + 1, lens - 1):
            if pads[i] == pads[i - lens // 2]:
                return False
        else:
            return True

    def _defuse_padding(self, IR_node):
        auto_pad = IR_node.get_attr('auto_pad')
        if auto_pad:
            input_node = self.parent_variable_name(IR_node)
            if auto_pad == 'VALID':
                padding = False
            elif auto_pad.startswith("SAME"):
                padding = True
            else:
                raise ValueError("Unknown padding type [{}].".format(auto_pad))

            return input_node, padding

        else:
            padding = IR_node.get_attr('pads')
            if not is_valid_padding(padding):
                dim = len(padding) // 2
                padding_str = list()
                for i in xrange(1, dim):
                    padding_str.append((padding[i], padding[i + dim]))
                input_node = IR_node.variable_name + '_pad'
                self.add_body(
                    1, "{:<15} = cntk.pad({}, pattern={})".format(
                        input_node, self.parent_variable_name(IR_node),
                        padding_str))

            else:
                input_node = self.parent_variable_name(IR_node)

            return input_node, False

    def emit_Conv(self, IR_node):
        codes = list()
        if self.weight_loaded:
            self.used_layers.add('Conv')
            input_node, padding = self._defuse_padding(IR_node)

            dim = len(IR_node.get_attr('strides')) - 2
            padding = [False] + [padding] * dim

            if IR_node.type == 'DepthwiseConv':
                groups = IR_node.get_attr('kernel_shape')[-2]
                codes.append(
                    "__weights_dict['{}']['weights'] = np.swapaxes(__weights_dict['{}']['weights'], -1, -2)"
                    .format(IR_node.real_name, IR_node.real_name))
            else:
                groups = IR_node.get_attr('group', 1)

            codes.append(
                "{:<15} = convolution({}, is_transpose={}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')"
                .format(IR_node.variable_name, input_node,
                        IR_node.type == 'ConvTranspose',
                        tuple(IR_node.get_attr('strides')[1:-1]), padding,
                        tuple(IR_node.get_attr('dilations',
                                               [1])), groups, IR_node.name))

        else:
            codes.append(
                "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n"
                .format(
                    IR_node.variable_name, IR_node.name,
                    IR_node.get_attr('kernel_shape')[-1],
                    ', '.join('%s' % i for i in
                              IR_node.layer.attr["kernel_shape"].list.i[:-2]),
                    ', '.join(
                        '%s' % i
                        for i in IR_node.layer.attr['strides'].list.i[1:-1]),
                    IR_node.get_attr('auto_pad') != 'VALID',
                    IR_node.get_attr('use_bias'),
                    self.parent_variable_name(IR_node)))
        return codes

    def emit_Pool(self, IR_node):
        input_node = self.IR_graph.get_node(
            IR_node.in_edges[0]).real_variable_name
        if IR_node.layer.attr['global_pooling'].b:
            self.used_layers.add('GlobalPooling')
            code = "{:<15} = global_pooling({}, '{}', name = '{}')".format(
                IR_node.variable_name, input_node,
                IR_node.get_attr('pooling_type'), IR_node.name)
        else:
            for e in IR_node.get_attr('dilations', []):
                assert e == 1

            dim = len(IR_node.get_attr('kernel_shape')) - 2
            padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'),
                                                IR_node.get_attr('pads'))
            padding = [False] + [padding] * dim
            ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads'))

            pooling_type = IR_node.get_attr('pooling_type')
            if pooling_type == 'MAX':
                pooling_type = cntk.MAX_POOLING
            elif pooling_type == 'AVG':
                pooling_type = cntk.AVG_POOLING
            else:
                raise ValueError

            if self.weight_loaded:
                self.used_layers.add(IR_node.type)
                code = "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})".format(
                    IR_node.variable_name, input_node, pooling_type,
                    tuple(IR_node.get_attr('kernel_shape')[1:-1]),
                    tuple(IR_node.get_attr('strides')[1:-1]), padding,
                    ceil_out_dim)
            else:
                raise NotImplementedError
        return code

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)

    def emit_DataInput(self, IR_node):

        shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape)

        dtype_str = ", dtype = {}".format(
            self.dtype_map[IR_node.layer.attr['dtype'].
                           type]) if 'dtype' in IR_node.layer.attr else ""
        code = "{:<15} = cntk.sequence.input_variable(({},) {}, name='{}')".format(
            IR_node.variable_name, shape_str, dtype_str, IR_node.name)
        return code

    def emit_Dropout(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        if self.phase == 'train':
            code = "{:<15} = Dropout({}, name = '{}')({})".format(
                IR_node.variable_name, 1 - IR_node.get_attr('keep_prob'),
                IR_node.name, parent.real_variable_name)
            return code
        else:
            IR_node.real_name = parent.real_name

    def emit_FullyConnected(self, IR_node):
        input_node = self.parent_variable_name(IR_node)
        if self.weight_loaded:
            self.used_layers.add(IR_node.type)
            code = "{:<15} = dense({}, name = '{}')".format(
                IR_node.variable_name, input_node, IR_node.name)

        else:
            code = "{:<15} = Dense({}, bias = {}, name = '{}')({})".format(
                IR_node.variable_name, IR_node.layer.attr["units"].i,
                IR_node.layer.attr['use_bias'].b, IR_node.name, input_node)
        return code

    def emit_Flatten(self, IR_node):
        code = "{:<15} = ops.reshape({}, (-1,), name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_Reshape(self, IR_node):
        code = "{:<15} = cntk.reshape({}, shape={}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            tuple(IR_node.get_attr('shape')), IR_node.name)
        return code

    def _emit_activation(self, IR_node, op_name):
        code = "{:<15} = layers.Activation(activation = {}, name = '{}')({})".format(
            IR_node.variable_name, op_name, IR_node.name,
            self.parent_variable_name(IR_node))
        return code

    def emit_Tanh(self, IR_node):
        return self._emit_activation(IR_node, 'ops.tanh')

    def emit_Relu(self, IR_node):
        return self._emit_activation(IR_node, 'ops.relu')

    def emit_Softmax(self, IR_node):
        return self._emit_activation(IR_node, 'ops.softmax')

    def emit_Sigmoid(self, IR_node):
        return self._emit_activation(IR_node, 'ops.sigmoid')

    def emit_RNNs(self, IR_node, func):
        assert False

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Add(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' + '.join(
                self.parent_variable_name(IR_node, i)
                for i in IR_node.in_edges)
            code = "{:<15} = {}".format(IR_node.variable_name, inputs)
            return code

    def emit_Sub(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' - '.join(
                self.parent_variable_name(IR_node, i)
                for i in IR_node.in_edges)
            code = "{:<15} = {}".format(IR_node.variable_name, inputs)
            return code

    def emit_Mul(self, IR_node):
        if len(IR_node.in_edges) > 1:
            inputs = ' * '.join(
                self.parent_variable_name(IR_node, i)
                for i in IR_node.in_edges)
            code = "{:<15} = {}".format(IR_node.variable_name, inputs)
            return code

    def emit_Constant(self, IR_node):
        if IR_node.get_attr('value'):
            code = "{:<15} = cntk.Constant(value={})".format(
                IR_node.variable_name, IR_node.get_attr('value'))
        else:
            code = "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".format(
                IR_node.variable_name, IR_node.name)
        return code

    def emit_Concat(self, IR_node):
        inputs = ', '.join(
            self.parent_variable_name(IR_node, i) for i in IR_node.in_edges)
        for s in IR_node.in_edges:
            node = self.IR_graph.get_node(s)

        code = "{:<15} = cntk.splice({}, axis={}, name='{}')".format(
            IR_node.variable_name,
            inputs,
            IR_node.get_attr('axis') - 1,  # why -1 ?
            IR_node.name)
        return code

    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        code = "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('epsilon'), IR_node.name)
        return code

    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode') == 'constant':
            mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(
                IR_node.get_attr('constant_values', 0.0))
        elif IR_node.get_attr('mode') == 'reflect':
            mode = 'mode = ops.REFLECT_PAD'
        elif IR_node.get_attr('mode') == 'SYMMETRIC':
            mode = 'mode = ops.SYMMETRIC_PAD'
        else:
            assert False

        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:]

        code = "{:<15} = ops.pad({}, pattern={}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), padding,
            mode)
        return code

    def emit_Squeeze(self, IR_node):
        IR_node.real_name = self.IR_graph.get_node(
            IR_node.in_edges[0]).real_name

    def emit_Log(self, IR_node):
        code = "{:<15} = _cntk.log({}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_Exp(self, IR_node):
        code = "{:<15} = _cntk.exp({}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_Embedding(self, IR_node):

        codes = list()
        codes.append(
            "{}_P = cntk.one_hot({}, __weights_dict['{}']['weights'].shape[0])"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.name))

        codes.append(
            "{:<15} = layers.Embedding(weights=__weights_dict['{}']['weights'])({}_P)"
            .format(
                IR_node.variable_name,
                # IR_node.get_attr('output_dim'),
                IR_node.name,
                IR_node.variable_name))

        return codes

    def emit_Reciprocal(self, IR_node):
        code = "{:<15} = _cntk.reciprocal({}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.name)
        return code

    def emit_ReduceMean(self, IR_node):
        code = "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')),
            IR_node.name)
        return code

    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        output_name = IR_node.variable_name
        input_name = self.parent_variable_name(IR_node)
        IR_name = IR_node.name
        size = IR_node.get_attr('size')
        depth_radius = int(size / 2)
        alpha = IR_node.get_attr('alpha')
        #alpha = alpha / size
        beta = IR_node.get_attr('beta')
        bias = IR_node.get_attr('bias')

        code = "{:<15} = lrn({}, k={}, n={}, alpha={}, beta={}, name='{}')".format(
            output_name, input_name, bias, depth_radius + 1, alpha, beta,
            IR_name)
        return code

    # ??
    def emit_LeakRelu(self, IR_node):
        code = "{:<15} = _cntk.relu({}) - {} * _cntk.relu(-{})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('alpha'), self.parent_variable_name(IR_node))
        return code

    def emit_LeakyRelu(self, IR_node):
        self.used_layers.add(IR_node.type)
        code = "{:<15} = _leaky_relu({}, {}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('alpha'), IR_node.name)
        return code

    def emit_UpSampling2D(self, IR_node):
        self.used_layers.add(IR_node.type)
        code = "{:<15} = Upsampling2D({}, stride = {}, name = '{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('scales')[0], IR_node.name)
        return code

    def emit_ConvTranspose(self, IR_node):
        return self.emit_Conv(IR_node)

    def emit_yolo(self, IR_node):
        self.used_layers.add(IR_node.type)
        code = "{:<15} = {}".format(IR_node.variable_name,
                                    self.parent_variable_name(IR_node))
        # print(IR_node.layer)
        self.yolo_parameter = [
            IR_node.get_attr('anchors'),
            IR_node.get_attr('classes'),
            IR_node.get_attr("ignore_thresh"),
            IR_node.get_attr("jitter")
        ]
        # assert False
        return code

    def emit_Crop(self, IR_node):
        self.used_layers.add(IR_node.type)
        output_shape = IR_node.get_attr('_output_shapes')[0]
        output_shape = shape_to_list(output_shape)[1:]
        code = "{:<15} = _crop({}, {}, {}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('border')[:2], output_shape, IR_node.real_name)
        return code

    def emit_Relu6(self, IR_node):
        codes = list()
        codes.append(self.emit_Relu(IR_node))
        codes.append("{:<15} = cntk.clip({}, 0, 6, name='{}_clip')".format(
            IR_node.variable_name + "_clip", IR_node.variable_name,
            IR_node.name))
        IR_node.real_name = IR_node.name + '_clip'
        return codes

    def emit_DepthwiseConv(self, IR_node):
        return self.emit_Conv(IR_node)

    # def emit_Unstack(self, IR_node):
    # num_str = "{}.shape[{}]".format(self.parent_variable_name(IR_node), IR_node.get_attr('axis'))
    # axis = IR_node.get_attr('axis')
    # parent_variable_shape = "list({}.shape)".format(self.parent_variable_name(IR_node)
    #         if self.IR_graph.get_parent(IR_node.name, [0]).type != 'Embedding'
    #             else self.parent_variable_name(IR_node)+'.E')
    # if axis==1:
    #     shape_str = "tuple([{}[0]*{}[{}], 1].extend({}[{}+1:]))".format(
    #         parent_variable_shape,
    #         parent_variable_shape,
    #         str(axis),
    #         parent_variable_shape,
    #         str(axis))
    # else:
    #     shape_str = "tuple([{}[0]*{}[{}]].extend({}[1:{}]).append(1).extend({}[{}+1:]))".format(
    #         parent_variable_shape,
    #         parent_variable_shape,
    #         str(axis),
    #         parent_variable_shape,
    #         str(axis),
    #         parent_variable_shape,
    #         str(axis))
    # code = "{:<15} = cntk.reshape({}, {}, name='{}')".format(
    #     IR_node.variable_name,
    #     self.parent_variable_name(IR_node),
    #     shape_str,
    #     IR_node.variable_name)
    # code = "{: <15} = cntk.reshape({}, {}.shape, name='{}')".format(
    #     IR_node.variable_name,
    #     self.parent_variable_name(IR_node),
    #     self.parent_variable_name(IR_node),
    #     IR_node.name
    # )
    # return code

    def emit_Shape(self, IR_node):
        parent_node = self.IR_graph.get_parent(IR_node.name, [0])
        code = "{:<15} = {}.shape".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node)
            if parent_node.type != 'Embedding' else
            self.parent_variable_name(IR_node) + ".E")
        return code

    def emit_Slice(self, IR_node):
        starts = IR_node.get_attr('starts')
        if len(starts) > 1:
            starts = [starts[0], starts[-1]] + starts[1:-1]
        ends = IR_node.get_attr('ends')
        if len(ends) > 1:
            ends = [ends[0], ends[-1]] + ends[1:-1]
        extra_str = ""
        for idx, _ in enumerate(starts):
            if idx:
                extra_str += ", "
            extra_str += "{}:".format(starts[idx])
            if ends[idx]:
                extra_str += "{}".format(ends[idx])
        code = "{:<15} = {}[{}]".format(IR_node.variable_name,
                                        self.parent_variable_name(IR_node),
                                        extra_str)
        return code

    def emit_Split(self, IR_node):
        self.used_layers.add(IR_node.type)
        axis = IR_node.get_attr('axis')
        split_num = IR_node.get_attr('split')
        code = "{:<15} = split(input={}, axis={}, split_num={})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            str(axis), str(split_num))

        return code

    # def emit_Fill(self, IR_node):
    #     code = "{:<15} = cntk.Constant({}, {}, name='{}')".format(
    #         IR_node.variable_name,
    #         IR_node.get_attr('value'),
    #         self.parent_variable_name(IR_node),
    #         IR_node.name)
    #     return code

    def emit_Unsqueeze(self, IR_node):
        code = "{:<15} = cntk.expand_dims({}, axis={}, name='{}')".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('axes')[0], IR_node.name)
        return code

    def emit_Scope(self, IR_node):
        pattern = IR_node.pattern
        if pattern not in self.naive_scope_pattern and re.sub(
                r'(_\d+)*$', '',
                IR_node.pattern) not in self.naive_scope_pattern:
            func = getattr(self, "_emit_" + pattern)
            code = func(IR_node)
        else:
            code = "{:<15} = __{}({})".format(
                IR_node.real_variable_name, IR_node.pattern, ', '.join(
                    self.parent_variable_name(IR_node, s)
                    for s in IR_node.in_edges))
            self._gen_scope_code(IR_node)
        return code

    def _gen_scope_code(self, scope_node):
        def _scope_func(scope_name, params, code, return_var):
            code = """
def __{}({}):
{}
    return {}
    """.format(scope_name, params, code, ', '.join(return_var))
            return code

        if not self.layers_codes.get(scope_node.pattern, None):
            body_code = str()
            for node_name in scope_node.topology_list:
                node = self.IR_graph.get_node(node_name)
                node_type = node.type

                if hasattr(self, "emit_" + node_type):
                    func = getattr(self, "emit_" + node_type)
                    line = func(node)
                    if line != None:
                        body_code += "    " + line + '\n'
                else:
                    print("CntkEmitter has not supported operator [%s]." %
                          (node_type))
                    self.emit_UNKNOWN(node)

            # param_code does not need parameter slice.
            input_params = scope_node.input_params
            param_code = ', '.join(input_params)
            function_code = _scope_func(scope_node.pattern, param_code,
                                        body_code, scope_node.return_variables)

            self.layers_codes[scope_node.pattern] = function_code

    def _emit_h_zero(self, IR_node):
        code = "{:<15} = cntk.Constant({}, (1, {}))".format(
            IR_node.variable_name, IR_node.get_attr('fill_value'),
            IR_node.get_attr('fill_size'))
        return code

    def _layer_Crop(self):
        self.add_body(
            0, '''
def _crop(input, border, output_shape, **kwargs):
    dim = len(output_shape)
    output_shape = [output_shape[-1]] + output_shape[:-1]
    ref_tensor = np.zeros(shape=output_shape, dtype=np.float32)

    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = cntk.crop_manual(node_input=input, node_referent=ref_tensor, offset_x=border[0], offset_y=border[1])
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
''')

    def _layer_LeakyRelu(self):
        self.add_body(
            0, '''
def _leaky_relu(x, leak, name):
    return cntk.param_relu(cntk.constant((np.ones(x.shape)*leak).astype(np.float32)), x, name = name)
''')

    def _layer_yolo(self):
        self.add_body(
            0, '''
def yolo_parameter():
    return {}
'''.format(self.yolo_parameter))

    def _layer_upsample(self):
        self.add_body(
            0, '''
def Upsampling2D(x, stride, name):
    assert stride == 2
    xr = cntk.reshape(x, (x.shape[0], 1, x.shape[1], 1, x.shape[2]))
    xx = cntk.splice(xr, xr, axis = -2)
    xy = cntk.splice(xx, xx, axis = -4)
    r = cntk.reshape(xy, (x.shape[0] * 2, x.shape[1] * 2, x.shape[2]), name = name)
    return r
''')

    def _layer_LRN(self):
        self.add_body(
            0, """
def lrn(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = BlockApiSetup.lrn(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_FullyConnected(self):
        self.add_body(
            0, """
def dense(input, name, **kwargs):
    w = __weights_dict[name]['weights']
    b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None
    return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input)
""")

    def _layer_Conv(self):
        self.add_body(
            0, """
def convolution(input, is_transpose, name, **kwargs):
    dim = __weights_dict[name]['weights'].ndim

    if is_transpose:
        weight = np.transpose(__weights_dict[name]['weights'], [dim - 2, dim - 1] + list(range(0, dim - 2)))
        kwargs.pop('groups', None)
    else:
        weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2)))
    w = cntk.Parameter(init=weight, name=name + '_weight')

    input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2)))

    if is_transpose:
        layer = ops.convolution_transpose(w, input, **kwargs)
    else:
        layer = ops.convolution(w, input, **kwargs)
    if 'bias' in __weights_dict[name]:
        bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2))
        b = cntk.Parameter(init=bias, name=name + '_bias')
        layer = layer + b
    layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0])
    return layer
""")

    def _layer_Pool(self):
        self.add_body(
            0, """
def pooling(input, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = ops.pooling(input, **kwargs)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_GlobalPooling(self):
        self.add_body(
            0, """
def global_pooling(input, type, **kwargs):
    dim = len(input.output.shape)
    input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1)))
    layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input)
    layer = cntk.transpose(layer, list(range(1, dim)) + [0])
    return layer
""")

    def _layer_BatchNorm(self):
        self.add_body(
            0, """
def batch_normalization(input, name, epsilon, **kwargs):
    mean = cntk.Parameter(init = __weights_dict[name]['mean'],
        name = name + "_mean")
    var = cntk.Parameter(init = __weights_dict[name]['var'],
        name = name + "_var")

    layer = (input - mean) / cntk.sqrt(var + epsilon)
    if 'scale' in __weights_dict[name]:
        scale = cntk.Parameter(init = __weights_dict[name]['scale'],
            name = name + "_scale")
        layer = scale * layer

    if 'bias' in __weights_dict[name]:
        bias = cntk.Parameter(init = __weights_dict[name]['bias'],
            name = name + "_bias")
        layer = layer + bias

    return layer
""")

    def _layer_Split(self):
        self.add_body(
            0, """
def split(input, axis, split_num):
        split_len = input.shape[axis]
        res = []
        st = 0
        for i in range(split_num):
            ed = st + split_len//split_num
            res.append(cntk.slice(input, axis, st, ed))
            st += split_len//split_num

        return res
        """)
Exemplo n.º 11
0
class CaffeEmitter(Emitter):

    def __init__(self, model):
        from six import string_types as _string_types
        super(CaffeEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CaffeEmitter, self)._build()


    @property
    def header_code(self):
        return """import numpy as np
import sys, argparse
import caffe
from caffe import layers as L
from caffe import params as P
from caffe import to_proto
from six import text_type as _text_type


__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    n = caffe.NetSpec()
"""

    @property
    def end_code(self):
        return """    return n

def make_net(prototxt):
    n = KitModel()
    with open(prototxt, 'w') as fpb:
        print(n.to_proto(), file=fpb)

def gen_weight(weight_file, model, prototxt):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

    net = caffe.Net(prototxt, caffe.TRAIN)

    for key in __weights_dict:
        if 'weights' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['weights']
        elif 'mean' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['mean']
            net.params[key][1].data.flat = __weights_dict[key]['var']
            if 'scale' in __weights_dict[key]:
                net.params[key][2].data.flat = __weights_dict[key]['scale']
        elif 'scale' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['scale']
        if 'bias' in __weights_dict[key]:
            net.params[key][1].data.flat = __weights_dict[key]['bias']
    net.save(model)
    return net



if __name__=='__main__':
    parser = argparse.ArgumentParser(description='Generate caffe model and prototxt')
    parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file')
    parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt')
    parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel')
    args = parser.parse_args()
    make_net(args.prototxt)
    gen_weight(args.weight_file, args.model, args.prototxt)

"""

    def gen_code(self, phase = 'test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        # for test
        with open("graph.txt", 'w') as f:
            for layer in self.IR_graph.topological_sort:
                current_node = self.IR_graph.get_node(layer)
                print("========current_node=========\n{}".format(current_node.layer))
        # test end

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type
            #print("========current_node={}".format(current_node.layer))

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CaffeEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        # self.add_body(1, "return n.{}".format(
        #     ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers])))

        self.add_body(0, "")
        #for test
        self.add_body(0,self.end_code)

        return self.body_code


    def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'):
        super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)



    @staticmethod
    def _shapeToStr(shapes):
        return [dim.size if dim.size > 0 else 1 for dim in shapes.dim]



    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims)


    def emit_Conv(self, IR_node):
        self.add_body(1, "n.{:<15} = L.Convolution(n.{}, kernel_size={}, stride={}, num_output={}, pad={}, group={}, \
bias_term={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('kernel_shape')[0],
            IR_node.get_attr('strides')[1],
            IR_node.get_attr('kernel_shape')[-1],
            IR_node.get_attr('pads')[1],
            IR_node.get_attr('group', 1),
            IR_node.get_attr('use_bias', False)))

        dim = len(IR_node.get_attr('strides')) - 2
        if self.weight_loaded:
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim)))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)

        # keys = []
        # for key in self.weights_dict[IR_node.name].keys():
        #     keys.append(key)
        # print("=======Layer: {}, keys: {}".format(IR_node.name, keys))

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            pooling_type = P.Pooling.MAX
        elif pooling_type == 'AVG':
            pooling_type = P.Pooling.AVE
        elif pooling_type == 'STOCHASTIC':
            pooling_type = P.Pooling.STOCHASTIC
        else:
            raise ValueError

        if IR_node.layer.attr['global_pooling'].b:
            self.used_layers.add('GlobalPooling')
            self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=true, ntop=1)".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                pooling_type,
                IR_node.get_attr('strides')[1]))
        else:
            self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                pooling_type,
                IR_node.get_attr('kernel_shape')[1],
                IR_node.get_attr('pads')[1],
                IR_node.get_attr('pads')[2],
                IR_node.get_attr('strides')[1]))


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)


    def emit_DataInput(self, IR_node):
        shape = self._shapeToStr(IR_node.get_attr('shape'))
        shape = [shape[0], shape[-1]] + shape[1:-1]
        self.add_body(1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format(
            IR_node.variable_name,
            shape))


    def emit_Dropout(self, IR_node):
        in_place = True
        self.add_body(1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            1 - IR_node.get_attr('keep_prob'),
            in_place))


    def emit_FullyConnected(self, IR_node):
        self.add_body(1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.layer.attr["units"].i,
            IR_node.get_attr('use_bias', False)))
        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)


    def emit_BatchNorm(self, IR_node):
        self.add_body(1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('epsilon'),
            self.phase == 'test'
        ))
        scale_layer_var_name = IR_node.variable_name + "_scale"
        self.add_body(1, "n.{:<15} = L.Scale(n.{}, bias_term={}, ntop=1)".format(
            scale_layer_var_name,
            IR_node.variable_name,
            IR_node.get_attr('bias', False)
        ))
        IR_node.real_name = IR_node.name + "_scale"
        if self.weight_loaded:
            self.weights_dict[scale_layer_var_name] = dict()
            self.weights_dict[scale_layer_var_name]['scale'] = self.weights_dict[IR_node.name]['scale']
            self.weights_dict[scale_layer_var_name]['bias'] = self.weights_dict[IR_node.name]['bias']
            self.weights_dict[IR_node.name].pop('scale', None)
            self.weights_dict[IR_node.name].pop('bias', None)
            self.weights_dict[IR_node.name]['scale'] = 1
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)


    def emit_LRN(self, IR_node):
        self.add_body(1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('size') * 2 - 1,
            IR_node.get_attr('alpha'),
            IR_node.get_attr('beta'),
            IR_node.get_attr('k')
        ))


    def emit_Add(self, IR_node):
        input_layers = ', '.join(('n.' + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) for num in range(0, len(IR_node.in_edges)))
        self.add_body(1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format(
            IR_node.variable_name,
            input_layers,
        ))

    def emit_Flatten(self, IR_node):
        IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name


    def emit_Concat(self, IR_node):
        axis_array = (2, 3, 1, 0)
        axis = axis_array.index(IR_node.get_attr('axis'))
        input_layers = ', '.join(('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges)
        self.add_body(1, "n.{:<15} = L.Concat({}, axis={})".format(
            IR_node.variable_name,
            input_layers,
            axis
        ))

    # def emit_Tanh(self, IR_node):
    #     self._emit_activation(IR_node, 'ops.tanh')


    def emit_Relu(self, IR_node):
        in_place = True
        self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            in_place))



    def emit_Softmax(self, IR_node):
        self.add_body(1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node)))
Exemplo n.º 12
0
class OnnxEmitter(Emitter):
    dtype_map = {graph_pb2.DT_FLOAT32: "TensorProto.FLOAT"}

    transpose_map = {1: 2, 2: 3, -1: 1}

    def __init__(self, architecture, weight):
        super(OnnxEmitter, self).__init__()
        if os.path.exists(architecture) == False:
            raise ValueError(
                "IR architecture file [{}] is not found.".format(architecture))
        else:
            self.IR_graph = IRGraph(architecture)
            self.IR_graph.build()

        if os.path.exists(weight) == False:
            raise ValueError(
                "IR weight file [{}] is not found.".format(weight))
        else:
            self._load_weights(weight)

    @property
    def header_code(self):
        return """import numpy as np
from onnx import helper, TensorProto
import onnx

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file, allow_pickle=True).item()
    except:
        weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

"""

    def gen_code(self, phase):
        self.phase = phase
        self.add_body(0, self.header_code)

        self.inputs = []
        self.outputs = []
        self.nodes = []
        self.initializer = []

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("OnnxEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self._process_output_layers()

        self.add_body(
            1, "graph = helper.make_graph([{}], 'mmdnn', [{}], [{}], [{}])".
            format(', '.join(self.nodes), ', '.join(self.inputs),
                   ', '.join(self.outputs), ', '.join(self.initializer)))
        self.add_body(
            1,
            "return helper.make_model(graph, opset_imports=[helper.make_opsetid('', 6)])"
        )
        return self.body_code

    def run(self, dstNetworkPath, dstWeightPath=None, phase='test'):
        super(OnnxEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        self.save_weights(self.weights_dict, dstWeightPath)

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten' or parent.type == 'Dropout':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [
                i.size for i in
                parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
            ] + [-1]
            self.weights_dict[IR_node.name]['weights'] = self.weights_dict[
                IR_node.name]['weights']
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], original_dims)

    def _process_output_layers(self):
        for name in self.IR_graph.output_layers:
            IR_node = self.IR_graph.get_node(
                self.IR_graph.get_node(name).real_name)
            # omit node of some type
            if IR_node.type == 'Shape' or IR_node.type == 'Pack':
                continue
            shape_str = IRGraph.shapeToStr(
                IR_node.layer.attr["_output_shapes"].list.shape[0])

            if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED:
                IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32
            dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type]
            self.add_body(
                1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".
                format(IR_node.variable_name + '_out', IR_node.variable_name,
                       dtype_str, shape_str))
            self.outputs.append(IR_node.variable_name + '_out')

    def emit_DataInput(self, IR_node):
        shape = [
            dim.size if dim.size != -1 else 1
            for dim in IR_node.IR_layer.attr["shape"].shape.dim
        ]
        shape_str = ', '.join('%s' % i for i in shape)
        if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED:
            IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32
        dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type]
        self.add_body(
            1,
            "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format(
                IR_node.variable_name + '_orig',
                IR_node.variable_name + '_orig', dtype_str, shape_str))
        self.add_body(
            1,
            "{:15} = helper.make_node('Transpose', inputs=['{}'], outputs=['{}'], perm=[0, 3, 1, 2], name='{}')"
            .format(IR_node.variable_name, IR_node.variable_name + '_orig',
                    IR_node.variable_name, IR_node.variable_name))
        self.inputs.append(IR_node.variable_name + '_orig')
        self.nodes.append(IR_node.variable_name)

    def emit_Conv(self, IR_node):
        kernel_shape = list(IR_node.get_attr('kernel_shape'))[:-2]
        dilations = list(
            IR_node.get_attr('dilations', [1] * (len(kernel_shape) + 2)))[1:-1]
        group = IR_node.get_attr('group', 1)
        if IR_node.type == 'DepthwiseConv':
            group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
            self.weights_dict[IR_node.name]['weights'] = np.swapaxes(
                self.weights_dict[IR_node.name]['weights'], -1, -2)
        pads = IR_node.get_attr('pads')
        pad_length = len(pads)
        pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 +
                                                  1:pad_length - 1]
        strides = list(IR_node.get_attr('strides'))[1:-1]
        use_bias = IR_node.get_attr('use_bias')
        self.add_body(
            1, "{:15} = __weights_dict['{}']['weights']".format(
                IR_node.variable_name + '_weight_array', IR_node.name))
        self.add_body(
            1, "{} = {}.transpose([3,2,0,1])".format(
                IR_node.variable_name + '_weight_array',
                IR_node.variable_name + '_weight_array'))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_weight_init',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))

        if use_bias:
            self.add_body(
                1, "{:15} = __weights_dict['{}']['bias'].squeeze()".format(
                    IR_node.variable_name + '_bias_array', IR_node.name))

            self.add_body(
                1,
                "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
                .format(IR_node.variable_name + '_bias',
                        IR_node.variable_name + '_bias',
                        IR_node.variable_name + '_bias_array',
                        IR_node.variable_name + '_bias_array'))

            self.add_body(
                1,
                "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
                .format(IR_node.variable_name + '_bias_init',
                        IR_node.variable_name + '_bias',
                        IR_node.variable_name + '_bias_array',
                        IR_node.variable_name + '_bias_array',
                        IR_node.variable_name + '_bias_array'))
            self.add_body(
                1,
                "{:15} = helper.make_node('Conv', inputs=['{}', '{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={}, name='{}')"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node),
                        IR_node.variable_name + '_weight',
                        IR_node.variable_name + '_bias', IR_node.variable_name,
                        dilations, group, kernel_shape, pads, strides,
                        IR_node.variable_name))
            # self.nodes.append(IR_node.variable_name + '_bias')
            self.initializer.append(IR_node.variable_name + '_bias_init')
            self.inputs.append(IR_node.variable_name + '_bias')
        else:
            self.add_body(
                1,
                "{:15} = helper.make_node('Conv', inputs=['{}', '{}'], outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={}, name='{}')"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node),
                        IR_node.variable_name + '_weight',
                        IR_node.variable_name, dilations, group, kernel_shape,
                        pads, strides, IR_node.variable_name))
        # self.nodes.append(IR_node.variable_name + '_weight')
        self.initializer.append(IR_node.variable_name + '_weight_init')
        self.inputs.append(IR_node.variable_name + '_weight')
        self.nodes.append(IR_node.variable_name)

    def emit_BatchNorm(self, IR_node):
        epsilon = IR_node.get_attr('epsilon')
        if IR_node.get_attr('scale'):
            self.add_body(
                1, "{:15} = __weights_dict['{}']['scale'].squeeze()".format(
                    IR_node.variable_name + '_scale_array', IR_node.name))
        else:
            self.add_body(
                1,
                "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype).squeeze()"
                .format(IR_node.variable_name + '_scale_array', IR_node.name,
                        IR_node.name))
            self.add_body(
                1,
                "{:15}.fill(1)".format(IR_node.variable_name + '_scale_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={})"
            .format(IR_node.variable_name + '_scale_init',
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['bias'].squeeze()".format(
                IR_node.variable_name + '_bias_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_bias_init',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))

        self.add_body(
            1, "{:15} = __weights_dict['{}']['mean'].squeeze()".format(
                IR_node.variable_name + '_mean_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_mean_init',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array'))

        self.add_body(
            1, "{:15} = __weights_dict['{}']['var'].squeeze()".format(
                IR_node.variable_name + '_var_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_var_init',
                    IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array'))

        self.add_body(
            1,
            "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_var', IR_node.variable_name,
                    epsilon, 0 if self.phase == 'train' else 1,
                    IR_node.variable_name))
        self.initializer.append(IR_node.variable_name + '_scale_init')
        self.initializer.append(IR_node.variable_name + '_bias_init')
        self.initializer.append(IR_node.variable_name + '_mean_init')
        self.initializer.append(IR_node.variable_name + '_var_init')
        self.inputs.append(IR_node.variable_name + '_scale')
        self.inputs.append(IR_node.variable_name + '_bias')
        self.inputs.append(IR_node.variable_name + '_mean')
        self.inputs.append(IR_node.variable_name + '_var')
        self.nodes.append(IR_node.variable_name)

    def emit_Scale(self, IR_node):
        dims = [
            i.size
            for i in IR_node.layer.attr['_output_shapes'].list.shape[0].dim[1:]
        ]
        units = dims[-1]
        epsilon = 1e-5
        if IR_node.get_attr('scale'):
            self.add_body(
                1, "{:15} = __weights_dict['{}']['scale'].squeeze()".format(
                    IR_node.variable_name + '_scale_array', IR_node.name))
        else:
            self.add_body(
                1,
                "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype).squeeze()"
                .format(IR_node.variable_name + '_scale_array', IR_node.name,
                        IR_node.name))
            self.add_body(
                1,
                "{:15}.fill(1)".format(IR_node.variable_name + '_scale_array'))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={})"
            .format(IR_node.variable_name + '_scale_init',
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array',
                    IR_node.variable_name + '_scale_array'))
        self.add_body(
            1, "{:15} = __weights_dict['{}']['bias'].squeeze()".format(
                IR_node.variable_name + '_bias_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_bias_init',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))
        self.add_body(
            1, "{:15} = np.zeros({}, dtype=np.float32)".format(
                IR_node.variable_name + '_mean_array', units))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_mean_init',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array',
                    IR_node.variable_name + '_mean_array'))
        self.add_body(
            1, "{:15} = np.ones({}, dtype=np.float32)".format(
                IR_node.variable_name + '_var_array', units))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_var_init',
                    IR_node.variable_name + '_var',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array',
                    IR_node.variable_name + '_var_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_scale',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_mean',
                    IR_node.variable_name + '_var', IR_node.variable_name,
                    epsilon, 0 if self.phase == 'train' else 1,
                    IR_node.variable_name))
        self.inputs.append(IR_node.variable_name + '_scale')
        self.inputs.append(IR_node.variable_name + '_bias')
        self.inputs.append(IR_node.variable_name + '_mean')
        self.inputs.append(IR_node.variable_name + '_var')
        self.initializer.append(IR_node.variable_name + '_scale_init')
        self.initializer.append(IR_node.variable_name + '_bias_init')
        self.initializer.append(IR_node.variable_name + '_mean_init')
        self.initializer.append(IR_node.variable_name + '_var_init')
        self.nodes.append(IR_node.variable_name)

    def emit_Relu(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Relu', inputs=['{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Add(self, IR_node):
        input_layers = ', '.join((
            "'" +
            self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) +
                                 "'"
                                 for num in range(0, len(IR_node.in_edges)))
        self.add_body(
            1,
            "{:15} = helper.make_node('Add', inputs=[{}], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, input_layers, IR_node.variable_name,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if IR_node.layer.attr['global_pooling'].b:
            if pooling_type == 'AVG':
                self.add_body(
                    1,
                    "{:15} = helper.make_node('GlobalAveragePool', inputs=['{}'], outputs=['{}'], name='{}')"
                    .format(IR_node.variable_name,
                            self.parent_variable_name(IR_node),
                            IR_node.variable_name, IR_node.variable_name))
                self.nodes.append(IR_node.variable_name)
            else:
                print("OnnxEmitter has not supported Global Pool type [%s]." %
                      (pooling_type))
                self.emit_UNKNOWN(IR_node)
        else:
            if pooling_type in ['AVG', 'MAX']:
                if pooling_type == 'AVG':
                    op_name = 'AveragePool'
                elif pooling_type == 'MAX':
                    op_name = 'MaxPool'
                kernel_shape = list(IR_node.get_attr('kernel_shape')[1:-1])
                pads = IR_node.get_attr('pads')
                pad_length = len(pads)
                pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 +
                                                          1:pad_length - 1]
                strides = list(IR_node.get_attr('strides')[1:-1])
                self.add_body(
                    1,
                    "{:15} = helper.make_node('{}', inputs=['{}'],outputs=['{}'], kernel_shape={}, pads={}, strides={}, name='{}')"
                    .format(IR_node.variable_name, op_name,
                            self.parent_variable_name(IR_node),
                            IR_node.variable_name, kernel_shape, pads, strides,
                            IR_node.variable_name))
                self.nodes.append(IR_node.variable_name)
            else:
                print("OnnxEmitter has not supported Pool type [%s]." %
                      (pooling_type))
                self.emit_UNKNOWN(IR_node)

    def emit_FullyConnected(self, IR_node):
        self.check_if_need_transpose(IR_node)
        use_bias = IR_node.get_attr('use_bias', True)
        units = IR_node.get_attr('units')

        self.add_body(
            1, "{:15} = __weights_dict['{}']['weights']".format(
                IR_node.variable_name + '_weight_array', IR_node.name))

        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_weight_init',
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array',
                    IR_node.variable_name + '_weight_array'))

        if use_bias:
            self.add_body(
                1, "{:15} = __weights_dict['{}']['bias'].squeeze()".format(
                    IR_node.variable_name + '_bias_array', IR_node.name))
        else:
            self.add_body(
                1, "{:15} = np.zeros({})".format(
                    IR_node.variable_name + '_bias_array', units))
        self.add_body(
            1,
            "{:15} = helper.make_tensor_value_info('{}', onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], list({}.shape))"
            .format(IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))

        self.add_body(
            1,
            "{:15} = helper.make_tensor(name='{}', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float))"
            .format(IR_node.variable_name + '_bias_init',
                    IR_node.variable_name + '_bias',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array',
                    IR_node.variable_name + '_bias_array'))
        self.add_body(
            1,
            "{:15} = helper.make_node('Gemm', inputs=['{}', '{}', '{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_weight',
                    IR_node.variable_name + '_bias', IR_node.variable_name,
                    IR_node.variable_name))
        self.initializer.append(IR_node.variable_name + '_weight_init')
        self.initializer.append(IR_node.variable_name + '_bias_init')
        self.inputs.append(IR_node.variable_name + '_weight')
        self.inputs.append(IR_node.variable_name + '_bias')
        self.nodes.append(IR_node.variable_name)

    def emit_Pad(self, IR_node):
        mode = IR_node.layer.attr['mode'].s.decode()
        pads = IR_node.get_attr('pads')
        pad_length = len(pads)
        pads = [0, 0] + pads[1:pad_length // 2 - 1] + [
            0, 0
        ] + pads[pad_length // 2 + 1:pad_length - 1]
        self.add_body(
            1,
            "{:15} = helper.make_node('Pad', inputs=['{}'], outputs=['{}'], mode='{}', pads={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, mode, pads, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Concat(self, IR_node):
        axis = IR_node.get_attr('axis') - 2
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)
        self.add_body(
            1,
            "{:15} = helper.make_node('Concat', inputs=[{}], outputs=['{}'], axis={}, name='{}')"
            .format(IR_node.variable_name, inputs, IR_node.variable_name, axis,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Flatten(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Flatten', inputs=['{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Softmax(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Softmax', inputs=['{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Constant(self, IR_node):
        if IR_node.get_attr('value'):
            value = 'np.array({}, dtype=np.float32)'.format(
                IR_node.get_attr('value'))
            self.add_body(
                1, "{:15} = {}".format(IR_node.variable_name + '_value_array',
                                       value))
        else:
            self.add_body(
                1, "{:15} = __weights_dict['{}']['value']".format(
                    IR_node.variable_name + '_value_array', IR_node.name))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)), name='{}')"
            .format(IR_node.variable_name, IR_node.variable_name,
                    IR_node.variable_name + '_value_array',
                    IR_node.variable_name + '_value_array',
                    IR_node.variable_name + '_value_array',
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Sub(self, IR_node):
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)
        self.add_body(
            1,
            "{:15} = helper.make_node('Sub', inputs=[{}], outputs=['{}'], broadcast=1, name='{}')"
            .format(IR_node.variable_name, inputs, IR_node.variable_name,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Mul(self, IR_node):
        inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name +
                           "'" for i in IR_node.in_edges)

        if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[
                IR_node.name]:
            self.add_body(
                1,
                "{:15} = np.array([__weights_dict['{}']['weights']])".format(
                    IR_node.variable_name + '_weight_array', IR_node.name))
            self.add_body(
                1,
                "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}), name='{}')"
                .format(IR_node.variable_name + '_weight',
                        IR_node.variable_name + '_weight',
                        IR_node.variable_name + '_weight_array',
                        IR_node.variable_name + '_weight_array',
                        IR_node.variable_name + '_weight_array',
                        IR_node.variable_name + '_weight'))
            inputs += ', ' + ''.join("'" + IR_node.variable_name + "_weight'")
            self.nodes.append(IR_node.variable_name + '_weight')

        self.add_body(
            1,
            "{:15} = helper.make_node('Mul', inputs=[{}], outputs=['{}'], broadcast=1, name='{}')"
            .format(IR_node.variable_name, inputs, IR_node.variable_name,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Dropout(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Dropout', inputs=['{}'], outputs=['{}'], is_test={}, ratio={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, 0 if self.phase == 'train' else 1,
                    1 - IR_node.get_attr('keep_prob'), IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Squeeze(self, IR_node):
        IR_node.real_name = self.IR_graph.get_node(
            IR_node.in_edges[0]).real_name

    def emit_ReduceMean(self, IR_node):
        axes = IR_node.layer.attr['axes'].list.i[:]
        axes = ','.join('%s' % OnnxEmitter.transpose_map[i] for i in axes)
        self.add_body(
            1,
            "{:15} = helper.make_node('ReduceMean', inputs=['{}'], outputs=['{}'], axes=[{}], keepdims={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, axes,
                    1 if IR_node.layer.attr['keepdims'].b else 0,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Reshape(self, IR_node):
        shape = [
            item if item != -1 else 1 for item in IR_node.get_attr('shape')
        ]
        if len(shape) == 4:
            shape = [shape[i] for i in [0, 3, 1, 2]]
        shape_str = ', '.join('%s' % i for i in shape)
        self.add_body(
            1, "{:15} = np.array([{}], dtype=np.int64)".format(
                IR_node.variable_name + '_shape_array', shape_str))
        self.add_body(
            1,
            "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}), name='{}')"
            .format(IR_node.variable_name + '_shape',
                    IR_node.variable_name + '_shape',
                    IR_node.variable_name + '_shape_array',
                    IR_node.variable_name + '_shape_array',
                    IR_node.variable_name + '_shape_array',
                    IR_node.variable_name + '_shape'))
        self.add_body(
            1,
            "{:15} = helper.make_node('Reshape', inputs=['{}', '{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name + '_shape', IR_node.variable_name,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name + '_shape')
        self.nodes.append(IR_node.variable_name)

    def emit_LRN(self, IR_node):
        alpha = IR_node.get_attr('alpha')
        beta = IR_node.get_attr('beta')
        bias = IR_node.get_attr('bias', 1.0)
        size = IR_node.get_attr('size') * 2 - 1
        self.add_body(
            1,
            "{:15} = helper.make_node('LRN', inputs=['{}'], outputs=['{}'], alpha={}, beta={}, bias={}, size={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, alpha, beta, bias, size,
                    IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Relu6(self, IR_node):
        self.add_body(
            1,
            "{:15} = helper.make_node('Clip', inputs=['{}'], outputs=['{}'], min=0.0, max=6.0, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_DepthwiseConv(self, IR_node):
        self.emit_Conv(IR_node)

    def emit_Slice(self, IR_node):
        if self.IR_graph.get_parent(IR_node.name, [0]).type == 'Shape':
            pass
        else:
            starts = IR_node.get_attr('starts')
            starts = [starts[0], starts[-1]] + starts[1:-1]
            ends = IR_node.get_attr('ends')
            ends = [ends[0], ends[-1]] + ends[1:-1]
            ends = [i if i != 0 else sys.maxsize for i in ends]
            self.add_body(
                1,
                "{:15} = helper.make_node('Slice', inputs=['{}'], outputs=['{}'], starts={}, ends={}, name='{}')"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node),
                        IR_node.variable_name, starts, ends,
                        IR_node.variable_name))
            self.nodes.append(IR_node.variable_name)

    def emit_LeakyRelu(self, IR_node):
        alpha = IR_node.get_attr('alpha')
        self.add_body(
            1,
            "{:15} = helper.make_node('LeakyRelu', inputs=['{}'], outputs=['{}'], alpha={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, alpha, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_PRelu(self, IR_node):
        slope = IR_node.get_attr('gamma')
        self.add_body(
            1,
            "{:15} = helper.make_node('PRelu', inputs=['{}'], outputs=['{}'], slope={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, slope, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_SpaceToDepth(self, IR_node):
        blocksize = IR_node.get_attr('blocksize')
        self.add_body(
            1,
            "{:15} = helper.make_node('SpaceToDepth', inputs=['{}'], outputs=['{}'], blocksize={}, name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, blocksize, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_Sigmoid(self, IR_node):
        self.add_body(
            1,
            "{: <15} = helper.make_node('Sigmoid', inputs=['{}'], outputs=['{}'], name='{}')"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.variable_name, IR_node.variable_name))
        self.nodes.append(IR_node.variable_name)

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)
Exemplo n.º 13
0
class CoreMLEmitter(Emitter):
    def __init__(self, architecture, weight):
        super(CoreMLEmitter, self).__init__()
        if os.path.exists(architecture) == False:
            raise ValueError(
                "IR architecture file [{}] is not found.".format(architecture))
        else:
            self.IR_graph = IRGraph(architecture)
            self.IR_graph.build()

        if os.path.exists(weight) == False:
            raise ValueError(
                "IR weight file [{}] is not found.".format(weight))
        else:
            self._load_weights(weight)

    def _get_inout(self):
        input_features = []
        output_features = []
        for input_node in self.IR_graph.input_layers:
            if self.IR_graph.get_node(input_node).type == 'Const':
                continue
            shape = shape_to_list(
                self.IR_graph.get_node(input_node).get_attr('shape'))
            shape = _infer_coreml_input_shape(shape)
            input_features.append((str(input_node), shape))
            print("CoreML Model Input Layer: [{}] {}".format(
                input_node, shape))

        for output_node in self.IR_graph.output_layers:

            node = self.IR_graph.get_node(output_node)

            if node.type == 'Pack':
                continue

            node.out_edges.append(node.name)
            shape = node.get_attr('_output_shapes')
            if shape:
                shape = shape_to_list(shape[0])
            else:
                shape = [1]

            shape = _infer_coreml_input_shape(shape)

            output_features.append((str(output_node), shape))

            print("CoreML Model Output Layer: [{}] {}".format(
                output_node, shape))

        return list(input_features), list(output_features)

    def _connect_coreml_layers(self):
        for layer in self.builder.nn_spec.layers:
            for i, out_node in enumerate(layer.output):
                layer.output[i] = self.IR_graph.get_node(out_node).real_name

    def gen_model(self,
                  input_names=None,
                  output_names=None,
                  image_input_names=None,
                  is_bgr=False,
                  red_bias=0.0,
                  green_bias=0.0,
                  blue_bias=0.0,
                  gray_bias=0.0,
                  image_scale=1.0,
                  class_labels=None,
                  predicted_feature_name=None,
                  predicted_probabilities_output=''):

        input_features, output_features = self._get_inout()
        is_classifier = class_labels is not None
        mode = 'classifier' if is_classifier else None
        self.builder = _NeuralNetworkBuilder(input_features,
                                             output_features,
                                             mode=mode)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            print("Converting layer {}({})".format(current_node.name,
                                                   current_node.type))
            node_type = current_node.type
            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CoreMLEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)
                assert False

        # Add classifier classes (if applicable)
        if is_classifier:
            classes_in = class_labels
            if isinstance(classes_in, _string_types):
                if not os.path.isfile(classes_in):
                    raise ValueError(
                        "Path to class labels [{}] does not exist.".format(
                            classes_in))
                with open(classes_in, 'r') as f:
                    classes = f.read()
                classes = classes.splitlines()
            elif type(classes_in) is list:  # list[int or str]
                classes = classes_in
            else:
                raise ValueError(
                    'Class labels must be a list of integers / strings, or a file path'
                )

            if predicted_feature_name is not None:
                self.builder.set_class_labels(
                    classes,
                    predicted_feature_name=predicted_feature_name,
                    prediction_blob=predicted_probabilities_output)
            else:
                self.builder.set_class_labels(classes)

        # Set pre-processing paramsters
        self.builder.set_pre_processing_parameters(
            image_input_names=[input_features[0][0]],
            #image_input_names,
            is_bgr=is_bgr,
            red_bias=red_bias,
            green_bias=green_bias,
            blue_bias=blue_bias,
            gray_bias=gray_bias,
            image_scale=image_scale)

        # Return the protobuf spec
        # model = _MLModel(self.builder.spec)

        print(self.builder.spec.description)

        return self.builder.spec, input_features, output_features

    @staticmethod
    def _get_padding(IR_node):

        auto_pad = IR_node.get_attr('auto_pad')
        if auto_pad is not None:
            if auto_pad == 'VALID':
                pass
            else:
                return 'SAME'

        pads = IR_node.get_attr('pads', [0, 0, 0, 0, 0, 0, 0, 0])

        return pads

    def _emit_merge(self, IR_node, func):
        """
        Convert concat layer to coreml.
        """
        # Get input and output names
        input_names = [
            self.IR_graph.get_node(inp).real_name for inp in IR_node.in_edges
        ]

        self.builder.add_elementwise(name=IR_node.name,
                                     input_names=input_names,
                                     output_name=IR_node.name,
                                     mode=func)

    def emit_Conv(self, IR_node):
        """
        Convert convolution layer to coreml.
        """
        has_bias = IR_node.get_attr('use_bias', False)
        is_deconv = False

        # Dimensions and weights
        kernel_shape = IR_node.get_attr('kernel_shape')

        if len(kernel_shape) == 4:
            height, width, input_channels, output_channels = kernel_shape
        elif len(kernel_shape) == 5:
            depth, height, width, input_channels, output_channels = kernel_shape
        else:
            raise NotImplementedError()

        output_shape = None

        # W should have shape (height, width, kernel_channels, output_channels), where kernel_channel = input_channels / groups
        W = self.weights_dict[IR_node.name]['weights']
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        stride_height, stride_width = IR_node.get_attr(
            'strides')[1], IR_node.get_attr('strides')[2]

        # Dilations
        dilations = IR_node.get_attr('dilations', [1, 1])
        if is_deconv and not dilations == [1, 1]:
            raise ValueError(
                "Unsupported non-unity dilation for Deconvolution layer")

        groups = IR_node.get_attr('group', 1)

        kernel_channels = input_channels // groups
        padding = self._get_padding(IR_node)

        if isinstance(padding, list):
            border_mode = "valid"
            # see protobuf
            padding_top, padding_left, padding_bottom, padding_right = padding[
                1], padding[2], padding[5], padding[6]
        else:
            border_mode = "same"
            padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name

        self.builder.add_convolution(name=IR_node.real_name,
                                     kernel_channels=kernel_channels,
                                     output_channels=output_channels,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=border_mode,
                                     groups=groups,
                                     W=W,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=is_deconv,
                                     output_shape=output_shape,
                                     input_name=input_name,
                                     padding_top=padding_top,
                                     padding_left=padding_left,
                                     padding_bottom=padding_bottom,
                                     padding_right=padding_right,
                                     output_name=IR_node.real_name,
                                     dilation_factors=dilations)

    def emit_ConvTranspose(self, IR_node):
        """
        Convert convolution layer to coreml.
        """

        # assert False
        has_bias = IR_node.get_attr('use_bias', False)
        is_deconv = True

        # Get the weights.

        kernel_shape = IR_node.get_attr('kernel_shape')

        if len(kernel_shape) == 4:
            height, width, output_channels, kernel_channels = kernel_shape
            W = self.weights_dict[IR_node.name]['weights']
            W = W.reshape(kernel_shape)
            W = W.transpose((0, 1, 3, 2))
        elif len(kernel_shape) == 5:
            depth, height, width, output_channels, kernel_channels = kernel_shape
            W = self.weights_dict[IR_node.name]['weights']
            W = W.reshape(kernel_shape)
            W = W.transpose((0, 1, 2, 4, 3))
        else:
            raise NotImplementedError()

        output_shape = None
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        stride_height, stride_width = IR_node.get_attr(
            'strides')[1], IR_node.get_attr('strides')[2]

        # Dilations
        dilations = IR_node.get_attr('dilations', [1, 1])
        if is_deconv and not dilations == [1, 1]:
            raise ValueError(
                "Unsupported non-unity dilation for Deconvolution layer")

        groups = IR_node.get_attr('group', 1)

        padding = self._get_padding(IR_node)

        if isinstance(padding, list):
            border_mode = "valid"
            # see protobuf
            padding_top, padding_left, padding_bottom, padding_right = padding[
                1], padding[2], padding[5], padding[6]
        else:
            border_mode = "same"
            padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name

        self.builder.add_convolution(name=IR_node.real_name,
                                     kernel_channels=kernel_channels,
                                     output_channels=output_channels,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=border_mode,
                                     groups=groups,
                                     W=W,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=is_deconv,
                                     output_shape=output_shape,
                                     input_name=input_name,
                                     padding_top=padding_top,
                                     padding_left=padding_left,
                                     padding_bottom=padding_bottom,
                                     padding_right=padding_right,
                                     output_name=IR_node.real_name,
                                     dilation_factors=dilations)

    def emit_DepthwiseConv(self, IR_node):
        # depth-wise convolution

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        kernel_channels = 1
        is_deconv = False
        has_bias = IR_node.get_attr('use_bias', False)

        depth_multiplier = IR_node.get_attr('kernel_shape')[-1]

        W = self.weights_dict[IR_node.name]['weights']
        height, width, channels, n_filters = W.shape
        output_shape = None
        W = np.reshape(W, (height, width, 1, channels * depth_multiplier))
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        # Dilations
        dilations = IR_node.get_attr('dilations', [1, 1])

        padding = self._get_padding(IR_node)

        if isinstance(padding, list):
            border_mode = "valid"
            # see protobuf
            padding_top, padding_left, padding_bottom, padding_right = padding[
                1], padding[2], padding[5], padding[6]
        else:
            border_mode = "same"
            padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0

        output_channels = W.shape[-1]
        groups = W.shape[-1]
        stride_height, stride_width = IR_node.get_attr(
            'strides')[1], IR_node.get_attr('strides')[2]

        self.builder.add_convolution(name=IR_node.real_name,
                                     kernel_channels=kernel_channels,
                                     output_channels=output_channels,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=border_mode,
                                     groups=groups,
                                     W=W,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=is_deconv,
                                     output_shape=output_shape,
                                     padding_top=padding_top,
                                     padding_left=padding_left,
                                     padding_bottom=padding_bottom,
                                     padding_right=padding_right,
                                     input_name=input_name,
                                     output_name=IR_node.real_name,
                                     dilation_factors=dilations)

    def emit_Pool(self, IR_node):
        """
        Convert pooling layer to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name

        # Pooling layer type
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            layer_type_str = 'MAX'
        elif pooling_type == 'AVG':
            layer_type_str = 'AVERAGE'
        else:
            raise TypeError("Pooling type %s not supported" % pooling_type)

        # if it's global, set the global flag
        global_pooling = IR_node.get_attr('global_pooling', False)
        dim = len(IR_node.get_attr('strides')) - 2

        if global_pooling:
            if dim == 2:

                stride_height, stride_width = tuple(
                    IR_node.get_attr('strides')[1:-1])
                height, width = 1, 1

                # TODO  global pooling modification

                # Padding
                padding = self._get_padding(IR_node)

                if isinstance(padding, list):
                    padding_type = "VALID"
                    # see protobuf
                    padding_top, padding_left, padding_bottom, padding_right = padding[
                        1], padding[2], padding[5], padding[6]
                else:
                    padding_type = "SAME"
                    padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0

            elif dim == 1:
                raise NotImplementedError()
                global_pooling = False
                _, width, channels = keras_layer.input_shape
                height = 1
                stride_height, stride_width = height, width
                padding_type = 'VALID'
            else:
                raise NotImplementedError()

        else:

            height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1])
            stride_height, stride_width = tuple(
                IR_node.get_attr('strides')[1:-1])

            # Padding
            padding = self._get_padding(IR_node)
            if isinstance(padding, list):

                padding_type = "VALID"
                # see protobuf
                padding_top, padding_left, padding_bottom, padding_right = padding[
                    1], padding[2], padding[5], padding[6]
            else:
                padding_type = "SAME"
                padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0

        self.builder.add_pooling(name=IR_node.name,
                                 height=height,
                                 width=width,
                                 stride_height=stride_height,
                                 stride_width=stride_width,
                                 layer_type=layer_type_str,
                                 padding_type=padding_type,
                                 padding_top=padding_top,
                                 padding_left=padding_left,
                                 padding_bottom=padding_bottom,
                                 padding_right=padding_right,
                                 input_name=input_name,
                                 output_name=IR_node.name,
                                 exclude_pad_area=True,
                                 is_global=global_pooling)

    def emit_Scale(self, IR_node):
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name

        weights = IR_node.get_attr('scale', False)
        weights = self.weights_dict[IR_node.name]['scale']
        has_bias = IR_node.get_attr('use_bias', False)
        if has_bias:
            bias = self.weights_dict[IR_node.name]['bias']

        shape_scale = self.weights_dict[IR_node.name]['shapeScale']
        if has_bias:
            shape_bias = self.weights_dict[IR_node.name]['shapeBias']

        self.builder.add_scale(name=IR_node.real_name,
                               W=weights,
                               b=bias,
                               has_bias=has_bias,
                               input_name=input_name,
                               output_name=IR_node.name,
                               shape_scale=[shape_scale],
                               shape_bias=[shape_bias])

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_Crop(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name

        is_1d = False
        border = IR_node.get_attr('border')

        if is_1d:
            raise ValueError("Unrecognized padding option: %s" % (str(border)))
        else:
            if type(border) is int:
                top = left = bottom = right = border
            elif type(border) is list:
                # type: "list(int). A 1-D values of (leftBorder, topBorder, rightBorder, bottomBorder)."
                # This is central crop
                top, left = border[1], border[0]
                bottom, right = border[1], border[0]
            else:
                raise ValueError("Unrecognized padding option: %s" %
                                 (str(border)))

        # Now add the layer
        self.builder.add_crop(name=IR_node.name,
                              left=left,
                              right=right,
                              top=top,
                              bottom=bottom,
                              offset=[0, 0],
                              input_names=[input_name],
                              output_name=output_name)

    def emit_ReduceMean(self, IR_node):
        """
        Convert ReduceMean layer to coreml.
        """

        axis = IR_node.get_attr('axes', [1, 2])

        #       Allowed values: 'CHW', 'HW', 'C', 'H', 'W'
        if len(axis) == 1:
            if axis[0] == 0:
                axis_str = 'C'
            elif axis[0] == 1:
                axis_str = 'H'
            elif axis[0] == 2:
                axis_str = 'W'
        elif len(axis) == 2:
            axis_str = 'HW'
        elif len(axis) == 3:
            axis_str = 'CHW'

        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name

        self.builder.add_reduce(IR_node.name,
                                input_name=input_name,
                                output_name=IR_node.name,
                                axis=axis_str,
                                mode='avg',
                                epsilon=1e-6)

    def emit_DataInput(self, IR_node):
        """ Layers that can be skipped. """
        return

    def emit_Dropout(self, IR_node):
        """ Layers that can be skipped (because they are train time only. """
        IR_node.real_name = self.IR_graph.get_parent(IR_node.name,
                                                     [0]).real_name

    def emit_FullyConnected(self, IR_node):
        """
        Convert a dense layer to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]

        has_bias = IR_node.get_attr('use_bias')

        # Get the weights from keras
        W = self.weights_dict[IR_node.name]['weights'].T
        Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None
        output_channels, input_channels = W.shape

        self.builder.add_inner_product(name=IR_node.name,
                                       W=W,
                                       b=Wb,
                                       input_channels=input_channels,
                                       output_channels=output_channels,
                                       has_bias=has_bias,
                                       input_name=input_name,
                                       output_name=IR_node.name)

    def emit_Flatten(self, IR_node):
        """
        Convert a flatten layer from keras to coreml.
        """
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]
        """
        # blob_order == 0 if the input blob needs not be rearranged
        # blob_order == 1 if the input blob needs to be rearranged
        blob_order = 0

        # using keras_layer.input.shape have a "?" (Dimension[None] at the front),
        # making a 3D tensor with unknown batch size 4D
        if len(keras_layer.input.shape) == 4:
            blob_order = 1
        """

        self.builder.add_flatten(name=IR_node.name,
                                 mode=1,
                                 input_name=input_name,
                                 output_name=IR_node.name)

    def emit_Reshape(self, IR_node):
        def ShapetrToTuple(string, batch_none=False):
            if batch_none == True:
                ls = [int(item) for item in string.split(', ')]
                ls.insert(0, None)
                return tuple(ls)
            else:
                ls = [int(item) for item in string.split(', ')]
                return tuple(ls)

        last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer
        input_shape_dims = last_node.attr["_output_shapes"].list.shape
        target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape

        input_shape = ShapetrToTuple(IRGraph.shapeToStr(input_shape_dims[0]),
                                     True)
        target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0]))

        def get_coreml_target_shape(target_shape):
            if len(target_shape) == 1:  #(D,)
                coreml_shape = (1, target_shape[0], 1, 1)
            elif len(target_shape) == 2:  #(S,D)
                coreml_shape = target_shape + (1, 1)
            elif len(target_shape) == 3:  #(H,W,C)
                coreml_shape = (1, target_shape[2], target_shape[0],
                                target_shape[1])
            else:
                coreml_shape = None
            return coreml_shape

        def get_mode(input_shape, target_shape):
            in_shape = input_shape[1:]
            if len(in_shape) == 3 or len(target_shape) == 3:
                return 1
            else:
                return 0

        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        new_shape = get_coreml_target_shape(target_shape)
        mode = get_mode(input_shape, target_shape)

        self.builder.add_reshape(name=IR_node.real_name,
                                 input_name=input_name,
                                 output_name=IR_node.real_name,
                                 target_shape=new_shape,
                                 mode=mode)

    def emit_Tanh(self, IR_node):
        assert False
        code = "{:<15} = Activation(name = '{}', activation = tanh)({})".format(
            IR_node.replace_scope(IR_node.name), IR_node.name,
            IR_node.replace_scope(IR_node.in_edges[0]))
        return code

    def _emit_activation(self, IR_node, act, params=None):
        # Get input and output names
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name
        if not isinstance(params, list):
            params = [params]

        self.builder.add_activation(name=IR_node.real_name,
                                    non_linearity=act,
                                    input_name=input_name,
                                    output_name=output_name,
                                    params=params)

    # activation emit
    def emit_Relu(self, IR_node):
        self._emit_activation(IR_node, 'RELU')

    def emit_PRelu(self, IR_node):
        self._emit_activation(IR_node, 'PRELU', IR_node.get_attr('gamma', 0))

    def emit_LeakyRelu(self, IR_node):
        self._emit_activation(IR_node, 'LEAKYRELU',
                              IR_node.get_attr('alpha', 0))

    def emit_Elu(self, IR_node):
        self._emit_activation(IR_node, 'ELU', IR_node.get_attr('alpha', 0))

    def emit_ThresholdedRelu(self, IR_node):
        self._emit_activation(IR_node, 'THRESHOLDEDRELU',
                              IR_node.get_attr('alpha', 0))

    def emit_ScaledTanh(self, IR_node):
        self._emit_activation(
            IR_node, 'SCALED_TANH',
            [IR_node.get_attr('alpha', 0),
             IR_node.get_attr('beta', 0)])

    def emit_linear(self, IR_node):
        self._emit_activation(
            IR_node, 'LINEAR',
            [IR_node.get_attr('alpha', 0),
             IR_node.get_attr('beta', 0)])

    def emit_SigmoidHard(self, IR_node):
        self._emit_activation(
            IR_node, 'SIGMOID_HARD',
            [IR_node.get_attr('alpha', 0),
             IR_node.get_attr('beta', 0)])

    def emit_ParametricSoftplus(self, IR_node):
        self._emit_activation(
            IR_node, 'PARAMETRICSOFTPLUS',
            [IR_node.get_attr('alpha', 0),
             IR_node.get_attr('beta', 0)])

    def emit_Softmax(self, IR_node):
        # Get input and output names
        input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name
        output_name = IR_node.out_edges[0]
        self.builder.add_softmax(name=IR_node.name,
                                 input_name=input_name,
                                 output_name=IR_node.name)

    def emit_Sigmoid(self, IR_node):
        assert False
        code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format(
            IR_node.replace_scope(IR_node.name), IR_node.name,
            IR_node.replace_scope(IR_node.in_edges[0]))
        return code

    def emit_Relu6(self, IR_node):

        layer = IR_node.real_name
        input_name, output_name = (IR_node.IR_layer.input[0],
                                   IR_node.IR_layer.name)

        relu_output_name = output_name + '_relu'
        self.builder.add_activation(layer, 'RELU', input_name,
                                    relu_output_name)
        # negate it
        neg_output_name = relu_output_name + '_neg'
        self.builder.add_activation(layer + '__neg__', 'LINEAR',
                                    relu_output_name, neg_output_name,
                                    [-1.0, 0])
        # apply threshold
        clip_output_name = relu_output_name + '_clip'
        self.builder.add_unary(layer + '__clip__',
                               neg_output_name,
                               clip_output_name,
                               'threshold',
                               alpha=-6.0)
        # negate it back
        self.builder.add_activation(layer + '_neg2', 'LINEAR',
                                    clip_output_name, output_name, [-1.0, 0])

    def emit_Gather(self, IR_node):
        raise NotImplementedError()
        W = self.weights_dict[IR_node.name]['weights']
        if W.ndim == 2:
            vocab_size = W.shape[0]
            output_channels = W.shape[1]
            builder.add_embedding(name=IR_node.real_name,
                                  W=W,
                                  b=None,
                                  input_dim=vocab_size,
                                  output_channels=output_channels,
                                  has_bias=False,
                                  input_name=input_name,
                                  output_name=IR_node.real_name)
        else:
            raise NotImplementedError()

    def emit_RNNs(self, IR_node, func):
        assert False
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                IR_node.IR_layer.attr['dropout'].f,
                IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""

        code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format(
            IR_node.name, func, IR_node.IR_layer.attr['units'].i,
            IR_node.IR_layer.attr['use_bias'].b, dropout_str,
            IR_node.in_edges[0])

        return code

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Add(self, IR_node):
        self._emit_merge(IR_node, 'ADD')

    def emit_Concat(self, IR_node):
        self._emit_merge(IR_node, "CONCAT")

    def emit_BatchNorm(self, IR_node):
        """
        Convert a Batch Normalization layer.
        """

        # Get input and output names
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name

        axis = IR_node.get_attr('axis', -1)
        nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size

        # Set parameters
        # Parameter arrangement in Keras: gamma, beta, mean, variance
        weights = self.weights_dict[IR_node.name]
        mean = weights['mean']
        std = weights['var']
        gamma = weights.get('scale', np.ones(mean.shape))
        beta = weights.get('bias', np.zeros(mean.shape))

        # compute adjusted parameters
        # Reference: parameter transformation https://github.com/apple/coremltools/issues/153
        variance = std * std
        f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon'))
        gamma1 = gamma * f
        beta1 = beta - gamma * mean * f
        mean[:] = 0.0  #mean
        variance[:] = 1.0 - .00001  #stddev

        self.builder.add_batchnorm(name=IR_node.real_name,
                                   channels=nb_channels,
                                   gamma=gamma1,
                                   beta=beta1,
                                   mean=mean,
                                   variance=variance,
                                   input_name=input_name,
                                   output_name=IR_node.real_name)

    def emit_Pad(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name
        is_1d = False
        padding = IR_node.get_attr('pads')
        if is_1d:
            raise ValueError("Unrecognized padding option: %s" %
                             (str(padding)))
        else:
            if type(padding) is int:
                top = left = bottom = right = padding
            elif type(padding) is list:
                top, left = padding[1], padding[2]
                bottom, right = padding[5], padding[6]
            else:
                raise ValueError("Unrecognized padding option: %s" %
                                 (str(padding)))

        # padding type TODO
        # Type of the padding. Can be one of 'constant', 'reflection' or 'replication
        padding_type = IR_node.get_attr('mode', 'CONSTANT')
        if padding_type == 'CONSTANT':
            padding_type = 'constant'
        elif padding_type == 'REFLECT':
            padding_type = 'reflection'
        elif padding_type == 'SYMMETRIC':
            padding_type = 'replication'

        # Now add the layer
        self.builder.add_padding(name=IR_node.name,
                                 left=left,
                                 right=right,
                                 top=top,
                                 bottom=bottom,
                                 value=0,
                                 input_name=input_name,
                                 output_name=output_name,
                                 padding_type=padding_type)

    def emit_Squeeze(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name

        self.builder.add_bias(name=IR_node.name,
                              b=0,
                              input_name=input_name,
                              output_name=output_name,
                              shape_bias=[1])
        # self.emit_Flatten(IR_node)

    def emit_LRN(self, IR_node):
        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name
        alpha = IR_node.get_attr('alpha')
        beta = IR_node.get_attr('beta')
        k = IR_node.get_attr('k')
        depth_radius = int(IR_node.get_attr('size'))
        #  depth_radius: Half-width of the 1-D normalization window."
        self.builder.add_lrn(output_name,
                             input_name,
                             output_name,
                             alpha=alpha,
                             beta=beta,
                             local_size=2 * depth_radius - 1,
                             k=k)

    def emit_SeparableConv(self, IR_node):

        input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
        output_name = IR_node.real_name

        strides = IR_node.get_attr('strides')
        stride_height, stride_width = (strides[1], strides[2])

        # Get the weights
        W0 = self.weights_dict[IR_node.name]['depthwise_filter']
        W1 = self.weights_dict[IR_node.name]['pointwise_filter']

        padding = IR_node.get_attr('auto_pad').split('_')[0].lower()
        has_bias = IR_node.get_attr('use_bias')
        b = self.weights_dict[IR_node.name]['bias'] if has_bias else None

        output_blob_shape = IR_node.get_attr('_output_shapes')
        shape = shape_to_list(output_blob_shape[0])
        output_channels = shape[-1]

        height, width, input_channels, depth_mult = W0.shape

        W0 = np.reshape(W0, (height, width, 1, input_channels * depth_mult))

        intermediate_name = input_name + '_intermin_'

        self.builder.add_convolution(name=IR_node.name + '_step_1',
                                     kernel_channels=1,
                                     output_channels=input_channels *
                                     depth_mult,
                                     height=height,
                                     width=width,
                                     stride_height=stride_height,
                                     stride_width=stride_width,
                                     border_mode=padding,
                                     groups=input_channels,
                                     W=W0,
                                     b=None,
                                     has_bias=False,
                                     is_deconv=False,
                                     output_shape=None,
                                     input_name=input_name,
                                     output_name=intermediate_name,
                                     dilation_factors=[1, 1])

        self.builder.add_convolution(name=IR_node.name + '_step_2',
                                     kernel_channels=input_channels *
                                     depth_mult,
                                     output_channels=output_channels,
                                     height=1,
                                     width=1,
                                     stride_height=1,
                                     stride_width=1,
                                     border_mode=padding,
                                     groups=1,
                                     W=W1,
                                     b=b,
                                     has_bias=has_bias,
                                     is_deconv=False,
                                     output_shape=None,
                                     input_name=intermediate_name,
                                     output_name=output_name,
                                     dilation_factors=[1, 1])

    def emit_Slice(self, IR_node):
        pass

    def emit_Const(self, IR_node):
        pass

    def emit_Shape(self, IR_node):
        pass

    def emit_Pack(self, IR_node):
        pass
Exemplo n.º 14
0
class MXNetEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16    : "float16",
        graph_pb2.DT_FLOAT32    : "float32",
        graph_pb2.DT_FLOAT64    : "float64",
        graph_pb2.DT_INT32      : "int32",
        graph_pb2.DT_UINT8      : "uint8"
    }

    activation_map = {
        "relu"    : "Relu",
        "sigmoid" : "Sigmoid",
        "tanh"    : "Tanh",
        "elu"     : "Elu"
    }

    transpose_map = {
        1 : 2,
        2 : 3,
       -1 : 1
    }

    channels_last = ['NDHWC', 'NHWC']

    def __init__(self, model):
        super(MXNetEmitter, self).__init__()
        from six import string_types as _string_types

        if isinstance(model, _string_types):
            network_path = model
            self.weight_loaded = False
        elif len(model) == 3:
            network_path = model[0]
            weight_path = model[1]
            self.output_weights_file = model[2]
            self.weights = np.load(weight_path).item()
            self.weight_loaded = True
            self.output_weights = dict()
        else:
            raise ValueError("the # of input arguments [{}] is not supported" % len(model))

        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()


    @property
    def header_code(self):
        return """import mxnet as mx
import numpy as np
import math

# mxnet-cpu only support channel first, default convert the model and weight as channel first

def RefactorModel():
"""


    def gen_code(self, phase):
        self.IR_layer_map = dict()
        self.add_body(0, self.header_code)
        for layer in self.IR_graph.topological_sort:
            self.IR_layer_map[layer] = self.IR_graph.get_node(layer)

        shape = dict()
        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if len(current_node.in_edges) == 0:
                current_node.in_edges.append('data')

            if node_type.lower() in MXNetEmitter.activation_map:
                func = getattr(self, "emit_Activation")
                line = func(current_node, MXNetEmitter.activation_map[node_type.lower()].lower())
                self.add_body(1, line)
            elif hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)
                self.add_body(1, line)
            else:
                print("MXNet Emitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

            if node_type == "DataInput":
                cur_shape = list()
                first = True
                for dim in current_node.IR_layer.attr["shape"].shape.dim:
                    if dim.size == -1 and first:
                        cur_shape.append(1)
                        print("Detect input layer [{}] using infer batch size, set it as default value [1]".format(current_node.name))
                    else:
                        if dim.size == -1:
                            print("Warning: user should change input size manually")
                        cur_shape.append(dim.size)
                    first = False

                cur_shape.insert(1, cur_shape.pop())
                shape[current_node.name] = ', '.join('%s' % i for i in cur_shape)


        if self.weight_loaded:
            fullpath = os.path.abspath(self.output_weights_file)
            dirname = os.path.dirname(fullpath)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            with open(self.output_weights_file, 'wb') as outfile:
                np.save(outfile, self.output_weights)

        comment = "\n    # if a GPU is available, change mx.cpu() to mx.gpu()"
        last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format(
            "model",
            ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]),
            ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers]))

        self.add_body(1, comment)
        self.add_body(1, last_line)
        self.add_body(1, "return model")

        weight_code = ""
        if not self.weight_loaded:
            weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n"

        weight_code += self.gen_weight_code(shape, phase)

        main_code = "if __name__ == '__main__':\n    model = RefactorModel()\n"
        if self.weight_loaded:
            main_code += "    # remember to adjust params path\n    model = deploy_weight(model, '{}')\n".format(self.output_weights_file)

        if phase == 'train':
            train_code = """def train(model):
    import logging
    logging.getLogger().setLevel(logging.DEBUG)
    model.fit(train_iter, # train data
            eval_data = val_iter, # validation data
            optimizer = 'sgd', # Defaults to 'sgd'
            optimizer_params = {'learning_rate':0.01}, # use fixed learning rate
            eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy'
            batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
            num_epoch = 10) # train for at most 10 dataset passes\n\n
"""
            code = self.body_code + weight_code + train_code + main_code
        else:
            test_code = """import matplotlib.pyplot as plt
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])


def get_image(url, show = False):
    import cv2
    # download and show the image
    fname = mx.test_utils.download(url)
    img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB)
    if img is None:
        return None
    if show:
        plt.imshow(img)
        plt.axis('off')
    # convert into format (batch, RGB, width, height)
    img = cv2.resize(img, (224, 224))
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)
    img = img[np.newaxis, :]
    return img


def predict(model, labels, url):
    # to show the image, change the argument show into True
    img = get_image(url, show = False)
    # compute the predict probabilities
    model.forward(Batch([mx.nd.array(img)]))
    prob = model.get_outputs()[0].asnumpy()
    # print the top-5
    prob = np.squeeze(prob)
    a = np.argsort(prob)[::-1]
    for i in a[0:5]:
        print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n
"""

            main_code += """
    # # call function predict
    # with open('synset.txt', 'r') as f:
    #     labels = [l.rstrip() for l in f]
    # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg')
"""

            code = self.body_code + weight_code + test_code + main_code

        return code


    def gen_weight_code(self, shape, phase):
        str = "def deploy_weight(model, weight_file):\n"
        str += """
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    arg_params = dict()
    aux_params = dict()
    for weight_name, weight_data in weights_dict.items():
        weight_name = str(weight_name)
        if "moving" in weight_name:
            aux_params[weight_name] = mx.nd.array(weight_data)
        else:
            arg_params[weight_name] = mx.nd.array(weight_data)

"""
        if phase == 'train':
            str += "    model.bind(for_training = True, data_shapes = ["
        else:
            str += "    model.bind(for_training = False, data_shapes = ["
        first = True
        for k, v in shape.items():
            if not first:
                str += ", "
            str += "('" + k + "', " + "(" + v + "))"
            first = False
        str += "])\n"
        str += "    model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True)\n\n    return model\n\n\n"
        return str


    @staticmethod
    def calculate_same_pad(data_shape, kernel, stride):
        if (data_shape % stride == 0):
            pad = max(kernel - stride, 0)
        else:
            pad = max(kernel - (data_shape % stride), 0)
        if pad % 2 == 0:
            return False, pad
        else:
            return True, pad


    @staticmethod
    def transfer_pad(pad_list):
        defuse_pad = False
        pad = list()

        assert len(pad_list) % 2 == 0
        mid = int(len(pad_list)/2)
        pad_first = pad_list[1:mid-1]
        pad_second = pad_list[mid+1:-1]

        for i in range(0, mid-2):
            if not pad_first[i] == pad_second[i]:
                defuse_pad = True

        if defuse_pad:
            pad.extend([0] * 4)
            for i in range(0, mid-2):
                pad.extend([pad_first[i], pad_second[i]])
        else:
            pad = pad_first

        return defuse_pad, pad


    @staticmethod
    def transpose(data, dim):
        if dim == 1:
            data = data.transpose((2, 1, 0))
        elif dim == 2:
            data = data.transpose((3, 2, 0, 1))
        elif dim == 3:
            data = data.transpose((4, 3, 0, 1, 2))
        else:
            raise ValueError("The weight of dim {} cannot transpose" % dim)

        return data


    def set_pad(self, IR_node, code, pad, _max_pool):
        if _max_pool:
            constant_value = "float('-inf')"
        else:
            constant_value = "0.0"

        code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format(
                IR_node.variable_name + "_pad",
                self.parent_variable_name(IR_node),
                tuple(pad),
                constant_value,
                IR_node.name + "_pad")

        for e in IR_node.in_edges:
            if e == 'data':
                continue
            self.IR_layer_map[e].out_edges = [x if not self.IR_layer_map[x].name == IR_node.variable_name else IR_node.variable_name + "_pad" for x in self.IR_layer_map[e].out_edges]

        return code


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)


    def emit_FullyConnected(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            parent = self.IR_graph.get_parent(IR_node.name, [0])
            while parent.type == "Flatten":
                parent = self.IR_graph.get_parent(parent.name, [0])
            dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
            if dim > 2:
                original_dims = weight_dict['weights'].shape
                dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1]
                weight_dict['weights'] = np.reshape(weight_dict['weights'], dims)
                weight_dict['weights'] = np.transpose(weight_dict['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
                weight_dict['weights'] = np.reshape(weight_dict['weights'], original_dims)
            self.output_weights[IR_node.name + "_weight"] = weight_dict['weights'].transpose((1, 0))

        num_hidden = IR_node.IR_layer.attr["units"].i
        no_bias = not IR_node.IR_layer.attr["use_bias"].b
        if not no_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                num_hidden,
                no_bias,
                IR_node.name)

        return code


    def _emit_convolution(self, IR_node, pattern):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]
            weights = weight_dict['weights']

        dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

        kernel = list()
        for idx in range(0, dim):
            kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        dilate = list()
        for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
            dilate.append(e)
        dilate = ', '.join('%s' % i for i in dilate)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i)

        num_filter = 0
        if pattern == "Deconvolution":
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
        else:
            num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1]

        use_bias = IR_node.get_attr('use_bias', False)
        if use_bias and self.weight_loaded:
            self.output_weights[IR_node.name + "_bias"] = weight_dict['bias']

        if pattern == "DepthwiseConv":
            num_group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
            num_filter = num_filter * num_group
            pattern = "Convolution"
            if self.weight_loaded:
                weights = np.swapaxes(weights, -1, -2)

        else:
            num_group = IR_node.get_attr('group', 1)

        # layout = IR_node.IR_layer.attr["data_format"].s
        if dim == 1:
            layout = 'NCW'
        elif dim == 2:
            layout = 'NCHW'
        elif dim == 3:
            layout = 'NCDHW'

        if self.weight_loaded:
            # if layout not in MXNetEmitter.channels_last:
            weights = MXNetEmitter.transpose(weights, dim)
            self.output_weights[IR_node.name + "_weight"] = weights

        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name,
                pattern,
                self.parent_variable_name(IR_node),
                tuple(kernel),
                tuple(stride),
                dilate,
                tuple(pad),
                num_filter,
                num_group,
                not use_bias,
                layout,
                IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, False)
            code += "\n    {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format(
                IR_node.variable_name,
                pattern,
                IR_node.variable_name + "_pad",
                tuple(kernel),
                tuple(stride),
                dilate,
                num_filter,
                num_group,
                not use_bias,
                layout,
                IR_node.name)

        return code


    def emit_Conv(self, IR_node):
        return self._emit_convolution(IR_node, "Convolution")


    def emit_DepthwiseConv(self, IR_node):
        return self._emit_convolution(IR_node, "DepthwiseConv")


    def emit_ConvTranspose(self, IR_node):
        return self._emit_convolution(IR_node, "Deconvolution")


    def emit_DataInput(self, IR_node):
        shape = list()
        shape.extend(IR_node.IR_layer.attr["shape"].list.i)

        code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name, IR_node.name)
        return code


    # Add LeakyReLU Elu(slope not support)
    def emit_Activation(self, IR_node, act_type):

        act_type = act_type
        func_name = ""

        if act_type == "elu":
            func_name = "LeakyReLU"
        else:
            func_name = "Activation"

        code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format(
                IR_node.variable_name,
                func_name,
                self.parent_variable_name(IR_node),
                act_type,
                IR_node.name)

        return code


    def emit_BatchNorm(self, IR_node):
        if self.weight_loaded:
            weight_dict = self.weights[IR_node.name]

        # axis = IR_node.IR_layer.attr["axis"].i
        axis = 1
        eps = IR_node.IR_layer.attr["epsilon"].f
        momentum = IR_node.IR_layer.attr["momentum"].f

        fix_gamma = not IR_node.IR_layer.attr["scale"].b

        if self.weight_loaded:
            if not fix_gamma:
                self.output_weights[IR_node.name + "_gamma"] = weight_dict['scale']
            self.output_weights[IR_node.name + "_beta"] = weight_dict['bias']

        # not supported yet
        use_global_stats = "False"
        if self.weight_loaded:
            self.output_weights[IR_node.name + "_moving_var"] = weight_dict['var']
            self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['mean']

        code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                axis,
                eps,
                momentum,
                fix_gamma,
                use_global_stats,
                IR_node.name)

        return code


    def emit_Pool(self, IR_node):

        global_pool = IR_node.IR_layer.attr["global_pooling"].b

        kernel = list()
        if global_pool:
            kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2)
        else:
            for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]:
                kernel.append(e)

        pool_type = IR_node.get_attr('pooling_type').lower()

        stride = list()
        for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
            stride.append(e)

        defuse_pad = False
        pad = list()
        if "pads" in IR_node.IR_layer.attr:
            output_shape = list()
            for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
                output_shape.append(e.size)

            # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad")
            defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i)
        code = ""
        if not defuse_pad:
            code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format(
                    IR_node.variable_name,
                    self.parent_variable_name(IR_node),
                    global_pool,
                    tuple(kernel),
                    pool_type,
                    tuple(stride),
                    tuple(pad),
                    IR_node.name)
        else:
            code += self.set_pad(IR_node, code, pad, pool_type == "max")
            code += "\n    {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format(
                    IR_node.variable_name,
                    IR_node.variable_name + "_pad",
                    global_pool,
                    tuple(kernel),
                    pool_type,
                    tuple(stride),
                    IR_node.name)

        return code


    def emit_SoftmaxOutput(self, IR_node):

        code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node)
        )

        return code


    def emit_Softmax(self, IR_node):

        code = ""

        if len(IR_node.out_edges) == 0:
            code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format(
                    IR_node.variable_name,
                    self.parent_variable_name(IR_node))
        else:
            axis = IR_node.IR_layer.attr["dim"].i
            code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format(
                    IR_node.variable_name,
                    self.parent_variable_name(IR_node),
                    axis,
                    IR_node.name)

        return code


    def emit_Squeeze(self, IR_node):
        return self.emit_Flatten(IR_node)


    # def emit_ConvTranspose(self, IR_node):
    #     if self.weight_loaded:
    #         weight_dict = self.weights[IR_node.name]
    #         weights = weight_dict['weights']

    #     dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2

    #     kernel = list()
    #     for idx in range(0, dim):
    #         kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx])

    #     stride = list()
    #     for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]:
    #         stride.append(e)

    #     dilate = list()
    #     for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]:
    #         dilate.append(e)
    #     dilate = ', '.join('%s' % i for i in dilate)

    #     defuse_pad = False
    #     pad = list()
    #     if "pads" in IR_node.IR_layer.attr:
    #         output_shape = list()
    #         for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim:
    #             output_shape.append(e.size)

    #         # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad")
    #         defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i)
    #     pad = ', '.join('%s' % i for i in pad)

    #     kernel = ', '.join('%s' % i for i in kernel)
    #     stride = ', '.join('%s' % i for i in stride)

    #     num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2]
    #     no_bias = not IR_node.IR_layer.attr["use_bias"].b
    #     if not no_bias and self.weight_loaded:
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias']

    #     # layout = IR_node.IR_layer.attr["data_format"].s
    #     if dim == 1:
    #         layout = 'NCW'
    #     elif dim == 2:
    #         layout = 'NCHW'
    #     elif dim == 3:
    #         layout = 'NCDHW'

    #     if self.weight_loaded:
    #         # if layout not in MXNetEmitter.channels_last:
    #         weights = MXNetEmitter.transpose(weights, dim)
    #         self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights

    #     code = ""
    #     if not defuse_pad:
    #         code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name),
    #                 IR_node.replace_scope(IR_node.in_edges[0]),
    #                 kernel,
    #                 stride,
    #                 dilate,
    #                 pad,
    #                 num_filter,
    #                 no_bias,
    #                 layout,
    #                 IR_node.replace_scope(IR_node.name))
    #     else:
    #         code = self.set_pad(IR_node, code, pad)
    #         code += "\n    {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format(
    #                 IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name))

    #     return code


    def emit_Embedding(self, IR_node):

        input_dim = IR_node.IR_layer.attr["input_dim"].i
        output_dim = IR_node.IR_layer.attr["output_dim"].i
        dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type, "float32")

        code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                input_dim,
                output_dim,
                dtype,
                IR_node.name)

        return code


    # def emit_LeakyReLU(self, IR_node):

    #     # IR only support Elu, the same problem with func emit_Activation

    #     code = "{:<15} = mx.sym.LeakyReLU(data = {}, )".format()

    #     return code
    #     raise NotImplementedError


    def emit_Dropout(self, IR_node):
        p = IR_node.IR_layer.attr["keep_prob"].f
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode() if 'mode' in IR_node.layer.attr else 'training'
        code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                p,
                mode,
                IR_node.name)

        return code


    # reverse cannot support yet
    def emit_Reshape(self, IR_node):

        shape = list()
        for e in IR_node.IR_layer.attr["shape"].list.i:
            shape.append(e)
        shape = ', '.join('%s' % i for i in shape)
        reverse = False

        code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                shape,
                reverse,
                IR_node.name)

        return code


    def emit_Flatten(self, IR_node):
        # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node))
        code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                IR_node.name)

        return code


    @staticmethod
    def _convert_axis(IR_node, axis):
        ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1


    def emit_Concat(self, IR_node):
        dim = MXNetEmitter._convert_axis(IR_node, IR_node.IR_layer.attr["axis"].i)
        code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format(
                IR_node.variable_name,
                ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges),
                dim,
                IR_node.name)

        return code


    def emit_Cast(self, IR_node):

        dtype = IR_node.IR_layer.attr["dtype"].type

        code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                dtype,
                IR_node.name)

        return code


    def emit_Expand_dims(self, IR_node):

        axis = IR_node.IR_layer.attr["axis"].i

        code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                axis,
                IR_node.name)

        return code


    def emit_Pad(self, IR_node):
        mode = IR_node.IR_layer.attr["mode"].s.lower().decode()
        pad_width = list()
        pad_width.extend([0]*4)
        padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1]
        for padding_pair in padding:
            pad_width.extend(padding_pair)

        pad_width = ', '.join('%s' % i for i in pad_width)

        code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                mode,
                pad_width,
                IR_node.name)

        return code


    def emit_Add(self, IR_node):
        code = "{:<15} = mx.sym.broadcast_add({}, {})".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node, [1]))

        return code


    def emit_Mul(self, IR_node):

        code = "{:<15} = mx.sym.broadcast_mul({}, {})".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node, [1]))

        return code


    def emit_ReduceMean(self, IR_node):
        axes = IR_node.layer.attr['axes'].list.i[:]
        axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes)

        code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                axes,
                IR_node.layer.attr['keepdims'].b)

        return code


    def emit_LRN(self, IR_node):
        code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                IR_node.layer.attr['alpha'].f,
                IR_node.layer.attr['beta'].f,
                IR_node.layer.attr['k'].f,
                IR_node.layer.attr['size'].i * 2 - 1,
                IR_node.name)

        return code

    def emit_Constant(self, IR_node):
        raise NotImplementedError()
        code = "{:<15} = mx.sym.identity(name='{}')".format(IR_node.variable_name, IR_node.name)
        self.output_weights[IR_node.name + '_data'] = self.weights[IR_node.name]['value']
        return code

    def emit_Sub(self, IR_node):
        code = "{:<15} = mx.sym.broadcast_sub({}, {})".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node, [1]))

        return code


    def emit_Relu6(self, IR_node):
        self.add_body(1, self.emit_Activation(IR_node, 'relu'))
        old_name = IR_node.variable_name
        IR_node.real_name = IR_node.real_name + "_clip"
        self.add_body(1, "{:<15} = mx.sym.clip({}, a_min=0, a_max=6, name='{}')".format(
            IR_node.real_variable_name,
            old_name,
            IR_node.real_name))

        return ""
Exemplo n.º 15
0
class PytorchEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "torch.float16",
        graph_pb2.DT_FLOAT32: "torch.float32",
        graph_pb2.DT_FLOAT64: "torch.float64",
        graph_pb2.DT_INT16: "torch.int16",
        graph_pb2.DT_INT32: "torch.int32",
        graph_pb2.DT_INT64: "torch.int64",
        graph_pb2.DT_UINT8: "torch.uint8",
        graph_pb2.DT_UINT16: "torch.uint16"
    }

    # Base Functions
    def __init__(self, model):
        super(PytorchEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            weight_path = model[1]

        self.init_code = str()
        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()
        self._load_weights(weight_path)

        folder = Folder(self.IR_graph, self.weights_dict)
        folder.fold()

    def run(self, dstNetworkPath, dstWeightPath=None, phase='test'):
        super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)

    def add_init(self, indent, codes):
        if isinstance(codes, _string_types):
            codes = [codes]
        for code in codes:
            self.init_code += ("    " * indent) + code + '\n'

    def parent_variable_name(self, IR_node, path=[0], weight_type='weights'):
        if not IR_node.in_edges and IR_node.name in self.weights_dict.keys():
            self.weights_dict[IR_node.name][weight_type] = self.weights_dict[
                IR_node.name][weight_type]
            return "torch.from_numpy(_weights_dict['{}']['{}'])".format(
                IR_node.name, weight_type)

        return super(PytorchEmitter, self).parent_variable_name(IR_node, path)

    @property
    def header_code(self):
        return """import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

_weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file, allow_pickle=True).item()
    except:
        weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item()

    return weights_dict

class KitModel(nn.Module):
"""

    def gen_code(self, phase):
        self.add_init(
            1, """
    def __init__(self, weight_file):
        super(KitModel, self).__init__()
        global _weights_dict
        _weights_dict = load_weights(weight_file)
""")

        self.add_body(1, "def forward(self, x):")

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)
                if line:
                    self.add_body(2, line)

            else:
                print("Pytorch Emitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(
            2, "return {}".format(', '.join([
                self.IR_graph.get_node(name).real_variable_name
                for name in self.IR_graph.output_layers
                if self.IR_graph.get_node(name).type != 'Pack'
            ])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        self.add_body(0, "")
        for code in self.layers_codes.values():
            self.add_body(0, code)

        return self.header_code + '\n' + self.init_code + '\n' + self.body_code

    def _defuse_padding(self, IR_node, extra_str=""):
        input_node = self.parent_variable_name(IR_node)
        if IR_node.get_attr('auto_pad') == 'VALID':
            return input_node

        if is_valid_padding(IR_node.get_attr("pads")) == True:
            return input_node

        padding = self._convert_padding(IR_node)
        input_node = IR_node.variable_name + '_pad'
        self.add_body(
            2, "{:<15} = F.pad({}, {}{})".format(
                input_node, self.parent_variable_name(IR_node), padding,
                extra_str))

        return input_node

    def emit_Conv(self, IR_node):
        self.used_layers.add('Conv')

        dim = len(IR_node.get_attr('strides')) - 2

        in_channels = IR_node.get_attr('kernel_shape')[-2]
        filter = IR_node.get_attr('kernel_shape')[-1]
        kernel = IR_node.get_attr('kernel_shape')[:-2]
        strides = IR_node.get_attr('strides')[1:-1]

        if IR_node.type == 'DepthwiseConv':
            group = in_channels
            filter *= group

        else:
            group = IR_node.get_attr('group', 1)

        self.add_init(
            2,
            "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})"
            .format(
                IR_node.variable_name,
                dim,
                IR_node.name,
                in_channels,
                filter,
                tuple(kernel),
                tuple(strides),
                # padding,
                group,
                IR_node.get_attr('use_bias')))

        input_node = self._defuse_padding(IR_node)

        code = "{:<15} = self.{}({})".format(IR_node.variable_name,
                                             IR_node.variable_name, input_node)

        if self.weight_loaded:
            if IR_node.type == 'DepthwiseConv':
                self.weights_dict[IR_node.name]['weights'] = np.swapaxes(
                    self.weights_dict[IR_node.name]['weights'], -1, -2)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim + 1, dim] + list(range(0, dim)))

        return code

    @staticmethod
    def is_ceil_mode(pads):
        lens = len(pads)
        for i in range(lens // 2 + 1, lens - 1):
            if pads[i] == pads[i - lens // 2]:
                return False
        else:
            return True

    def emit_Pool(self, IR_node):
        dim = len(IR_node.get_attr('strides')) - 2

        if IR_node.get_attr('pooling_type') == "MAX":
            pool_name = "max_pool{}d".format(dim)
            # exstr = ", value=float('-Inf')"
        elif IR_node.get_attr('pooling_type') == "AVG":
            pool_name = "avg_pool{}d".format(dim)
            # exstr = ""
        else:
            raise ValueError()

        if IR_node.layer.attr['global_pooling'].b:
            code = "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".format(
                IR_node.variable_name, pool_name,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node))
            return code

        else:
            if IR_node.get_attr('pooling_type') == "MAX":
                # Change to padding defuse
                input_node = self._defuse_padding(IR_node,
                                                  ", value=float('-inf')")
                for e in IR_node.get_attr('dilations', []):
                    assert e == 1

                pool_size = IR_node.get_attr('kernel_shape')[1:-1]
                strides = IR_node.get_attr('strides')[1:-1]

                code = "{}, {}_idx = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={}, return_indices={})".format(
                    IR_node.variable_name,
                    IR_node.variable_name, pool_name, input_node,
                    tuple(pool_size), tuple(strides), 0, False, True)
                return code

            elif IR_node.get_attr('pooling_type') == "AVG":

                for e in IR_node.get_attr('dilations', []):
                    assert e == 1

                pool_size = IR_node.get_attr('kernel_shape')[1:-1]
                strides = IR_node.get_attr('strides')[1:-1]

                padding = IR_node.get_attr('pads')[1:dim]
                ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads'))

                # input_node = self._defuse_padding(IR_node, exstr)
                code = "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={}, count_include_pad=False)".format(
                    IR_node.variable_name, pool_name,
                    self.parent_variable_name(IR_node), tuple(pool_size),
                    tuple(strides), tuple(padding), ceil_mode)
                return code
            else:
                raise ValueError()

    def emit_Unpool(self, IR_node):
        dim = len(IR_node.get_attr('strides')) - 2

        # Change to padding defuse
        input_node = self.parent_variable_name(IR_node)
        index_node = self.parent_variable_name(IR_node, [1])
        pool_name = "max_unpool{}d".format(dim)
        pool_size = IR_node.get_attr('kernel_shape')[1:-1]
        strides = IR_node.get_attr('strides')[1:-1]

        code = "{:<15} = F.{}({},{}_idx, kernel_size={}, stride={}, padding={})".format(
            IR_node.variable_name, pool_name, input_node, index_node,
            tuple(pool_size), tuple(strides), 0)
        return code

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_DataInput(self, IR_node):
        # Ignore it in Pytorch
        IR_node.real_name = 'x'

    def emit_Dropout(self, IR_node):
        code = "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.layer.attr["keep_prob"].f)
        return code

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten' or parent.type == 'Dropout':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [
                i.size for i in
                parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
            ] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], original_dims)

    def emit_FullyConnected(self, IR_node):
        self.used_layers.add(IR_node.type)
        in_features = 1
        for i in self.IR_graph.get_parent(
                IR_node.name,
            [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]:
            in_features *= i.size

        if IR_node.get_attr('in_features') != None:
            in_features = IR_node.get_attr('in_features')

        self.add_init(
            2,
            "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})"
            .format(IR_node.variable_name, IR_node.name, in_features,
                    IR_node.layer.attr["units"].i,
                    IR_node.IR_layer.attr["use_bias"].b))

        input_node = self.parent_variable_name(IR_node)
        if len(
                self.IR_graph.get_parent(
                    IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2:
            input_node = "{}.view({}.size(0), -1)".format(
                input_node, input_node)

        code = "{:<15} = self.{}({})".format(IR_node.variable_name,
                                             IR_node.variable_name, input_node)

        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'], (1, 0))

        return code

    def emit_Flatten(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name
        code = "{:<15} = {}.view({}.size(0), -1)".format(
            IR_node.variable_name, parent, parent)
        return code

    def emit_Reshape(self, IR_node):
        shape_list = IR_node.get_attr('shape')
        shape_str = ','.join([str(int(i)) for i in shape_list])
        code = "{:<15} = torch.reshape(input = {}, shape = ({}))".format(
            IR_node.variable_name,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name,
            shape_str)
        return code

    def emit_Tanh(self, IR_node):
        code = "{:<15} = F.tanh({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]))
        return code

    def emit_Relu(self, IR_node):
        code = "{:<15} = F.relu({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]))
        return code

    def emit_LeakyRelu(self, IR_node):
        code = "{:<15} = F.leaky_relu({}, negative_slope={})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]),
            IR_node.get_attr('alpha'))
        return code

    def emit_Relu6(self, IR_node):
        code = "{:<15} = F.relu6({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]))
        return code

    def emit_Softmax(self, IR_node):
        code = "{:<15} = F.softmax({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]))
        return code

    def emit_Sigmoid(self, IR_node):
        code = "{:<15} = F.sigmoid({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node))
        return code

    def emit_Embedding(self, IR_node):
        self.used_layers.add("Embedding")
        self.add_init(
            2,
            "self.{} = self.__embedding('{}', num_embeddings={}, embedding_dim={})"
            .format(
                IR_node.variable_name,
                IR_node.name,
                IR_node.get_attr('input_dim'),  #2-D
                IR_node.get_attr('output_dim')))

        code = "{:<15} = self.{}({})".format(
            IR_node.variable_name, IR_node.variable_name,
            "torch.LongTensor(np.array({}))".format(
                self.parent_variable_name(IR_node)))
        return code

    def emit_RNNs(self, IR_node, func):
        raise NotImplementedError()
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                IR_node.IR_layer.attr['dropout'].f,
                IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""

        code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format(
            IR_node.name, func, IR_node.IR_layer.attr['units'].i,
            IR_node.IR_layer.attr['use_bias'].b, dropout_str,
            IR_node.in_edges[0])

        return code

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Add(self, IR_node):
        code = "{:<15} = {} + {}".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))
        return code

    def emit_Sub(self, IR_node):
        code = "{:<15} = {}".format(
            IR_node.variable_name, ' - '.join(
                self.parent_variable_name(IR_node, [idx])
                for idx in range(len(IR_node.in_edges))))
        return code

    def emit_Mul(self, IR_node):
        code = "{:<15} = {}".format(
            IR_node.variable_name, ' * '.join(
                self.parent_variable_name(IR_node, [idx])
                for idx in range(len(IR_node.in_edges))))
        return code

    def emit_MatMul(self, IR_node):
        code = "{:<15} = torch.matmul({})".format(
            IR_node.variable_name,
            ' , '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                       for s in IR_node.in_edges))
        return code

    def emit_Constant(self, IR_node):
        if IR_node.get_attr('value'):
            value = IR_node.get_attr('value')
            if not isinstance(value, list):
                value = [value]
            code = "self.{:<15} = torch.autograd.Variable(torch.Tensor({}), requires_grad=False)".format(
                IR_node.variable_name, value)
        else:
            code = "self.{:<15} = torch.autograd.Variable(torch.from_numpy(_weights_dict['{}']['value']), requires_grad=False)".format(
                IR_node.variable_name, IR_node.name)

        # self.add_init(2, "self.{:<15} = torch.from_numpy(_weights_dict['{}']['value'])".format(
        #     IR_node.variable_name,
        #     IR_node.name))
        IR_node.real_name = "self." + IR_node.variable_name
        return code

    def _convert_axis(self, IR_node, axis):
        ndim = len(
            self.IR_graph.get_parent(IR_node.name,
                                     [0]).get_attr('_output_shapes')[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1

    def emit_Concat(self, IR_node):
        axis = self._convert_axis(IR_node, IR_node.get_attr('axis'))
        code = "{:<15} = torch.cat(({}), {})".format(
            IR_node.variable_name,
            ', '.join(
                self.parent_variable_name(IR_node, [idx])
                for idx in range(len(IR_node.in_edges))),
            axis,
        )
        return code

    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2

        output_shape = IR_node.layer.attr['_output_shapes'].list.shape[0]
        if IR_node.get_attr('data_format', "NHWC") == "NCHW":
            num_features = output_shape.dim[1].size
        else:
            num_features = output_shape.dim[-1].size

        self.add_init(
            2,
            "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})"
            .format(
                IR_node.variable_name,
                dim,
                IR_node.name,
                num_features,
                IR_node.layer.attr['epsilon'].f,
                IR_node.layer.attr['momentum'].f,
            ))

        code = "{:<15} = self.{}({})".format(
            IR_node.variable_name, IR_node.variable_name,
            self.parent_variable_name(IR_node))
        return code

    def emit_Scale(self, IR_node):
        self.used_layers.add(IR_node.type)
        dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2

        self.add_init(
            2, "self.{} = self.__scale({}, '{}', num_features={})".format(
                IR_node.variable_name, dim, IR_node.name, IR_node.layer.
                attr['_output_shapes'].list.shape[0].dim[-1].size))

        code = "{:<15} = self.{}({})".format(
            IR_node.variable_name, IR_node.variable_name,
            self.parent_variable_name(IR_node))
        return code

    def emit_Squeeze(self, IR_node):
        code = "{:<15} = torch.squeeze({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node))
        return code

    @staticmethod
    def _convert_padding(IR_node):
        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:-1]
        new_padding = []
        for pad in padding:
            new_padding.insert(0, pad)
        return tuple(np.array(new_padding).reshape(-1).tolist())

    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode').lower() == 'constant':
            mode = "mode = 'constant', value = {}".format(0)
        elif IR_node.get_attr('mode').lower() == 'reflect':
            mode = "mode = 'reflect'"
        elif IR_node.get_attr('mode').upper() == 'SYMMETRIC':
            mode = "mode = 'replicate'"
        else:
            assert False

        padding = self._convert_padding(IR_node)
        code = "{:<15} = F.pad({}, {}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node), padding,
            mode)
        return code

    def emit_ReduceMean(self, IR_node):
        axes = [
            self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes')
        ]
        input_node = self.parent_variable_name(IR_node)
        codes = []
        for axis in sorted(axes, reverse=True):
            code = "{:<15} = torch.mean({}, {}, {})".format(
                IR_node.variable_name, input_node, axis,
                IR_node.get_attr("keepdims"))
            codes.append(code)
            input_node = IR_node.variable_name
        return codes

    def emit_LRN(self, IR_node):
        output_name = IR_node.variable_name
        input_name = self.parent_variable_name(IR_node)
        size = IR_node.get_attr('size')
        alpha = IR_node.get_attr('alpha')
        beta = IR_node.get_attr('beta')
        bias = IR_node.get_attr('bias', 1)

        code = "{:<15} = F.local_response_norm({}, size={}, alpha={}, beta={}, k={})".format(
            output_name, input_name, size, alpha, beta, bias)
        return code

    def emit_DepthwiseConv(self, IR_node):
        return self.emit_Conv(IR_node)

    def emit_Const(self, IR_node):
        if 'dtype' in IR_node.layer.attr:
            dtype_str = "dtype={}".format(
                self.dtype_map[IR_node.layer.attr['dtype'].type])
            if 'int' in dtype_str:
                code = "{:<15} = torch.tensor({}, {})".format(
                    IR_node.variable_name, IR_node.layer.attr['value'].i,
                    dtype_str)
            else:
                code = "{:<15} = torch.tensor({}, {})".format(
                    IR_node.variable_name, IR_node.layer.attr['value'].f,
                    dtype_str)

        else:
            dtype_str = "dtype=torch.float32"
            code = "{:<15} = torch.tensor({}, {})".format(
                IR_node.variable_name, IR_node.layer.attr['value'].f,
                dtype_str)
        return code

    def emit_Shape(self, IR_node):
        code = "{:<15} = torch.Tensor(list({}.size()))".format(
            IR_node.variable_name, self.parent_variable_name(IR_node))
        return code

    def emit_Pack(self, IR_node):
        code = "{:<15} = {}".format(
            IR_node.variable_name,
            '[' + ','.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges) + ']',
        )
        return code

    def emit_Slice(self, IR_node):
        starts = IR_node.get_attr('starts')
        if len(starts) > 1:
            starts = [starts[0], starts[-1]] + starts[1:-1]
        ends = IR_node.get_attr('ends')
        if len(ends) > 1:
            ends = [ends[0], ends[-1]] + ends[1:-1]
        extra_str = ""
        for idx, _ in enumerate(starts):
            if idx:
                extra_str += ", "
            extra_str += "{}:".format(starts[idx])
            if ends[idx]:
                extra_str += "{}".format(ends[idx])

        shrink_mask = IR_node.get_attr('shrink_axis_mask')

        if shrink_mask:
            mask = [int(s) for s in bin(shrink_mask)[2:][::-1]]
            shrink_str = '[' + ','.join(':' if bit == 0 else '0'
                                        for bit in mask) + ']'
        else:
            shrink_str = ''
        code = "{:<15} = {}[{}]{}".format(IR_node.variable_name,
                                          self.parent_variable_name(IR_node),
                                          extra_str, shrink_str)
        return code

    def emit_Split(self, IR_node):

        if isinstance(IR_node.get_attr('split'), list):
            split_str = IR_node.get_attr('split')
        else:
            num_split = IR_node.get_attr('split')
            split_str = "math.ceil({}.shape[{}]/{})".format(
                self.parent_variable_name(IR_node), IR_node.get_attr('axis'),
                num_split)
        code = "{:<15} = torch.split({}, {}, dim={})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            split_str,
            IR_node.get_attr('axis'),
        )
        return code

    def emit_Unstack(self, IR_node):
        code = "{:<15} = torch.unbind({}, dim={})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('axis'))
        return code

    def emit_Fill(self, IR_node):
        code = "{:<15} = torch.full({}.int().numpy().tolist(), {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('value'))
        return code

    def emit_Gather(self, IR_node):
        pass

    def emit_Unsqueeze(self, IR_node):
        code = "{:<15} = {}.unsqueeze({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            IR_node.get_attr('axes')[0])
        return code

    def emit_Transpose(self, IR_node):
        code = "{:<15} = {}.permute({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))
        return code

    def emit_Minimum(self, IR_node):
        code = "{:<15} = torch.min({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))
        return code

    def emit_Maxmum(self, IR_node):
        code = "{:<15} = torch.max({}, {})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node),
            self.parent_variable_name(IR_node, [1]))
        return code

    def emit_Square(self, IR_node):
        code = "{:<15} = {}.pow(2)".format(IR_node.variable_name,
                                           self.parent_variable_name(IR_node))
        return code

    def emit_PRelu(self, IR_node):
        code = "{:<15} = F.prelu({}, torch.from_numpy(_weights_dict['{}']['weights']))".format(
            IR_node.variable_name, self.parent_variable_name(IR_node, [0]),
            IR_node.name)

        if self.weight_loaded:
            self.weights_dict[IR_node.name]['weights'] = self.weights_dict[
                IR_node.name]['gamma']

        return code

    def emit_Cast(self, IR_node):
        dstType = IR_node.get_attr('dstType')

        if dstType == 'float':
            dst = 'torch.FloatTensor'
        elif dstType == 'double':
            dst = 'torch.DoubleTensor'
        elif dstType == 'int':
            dst = 'torch.IntTensor'

        code = "{:<15} = {}.type({})".format(
            IR_node.real_variable_name, self.parent_variable_name(IR_node),
            dst)

        return code

    def emit_Scope(self, IR_node):
        input_vars = [
            self.parent_variable_name(IR_node, [idx])
            for idx in range(len(IR_node.in_edges))
        ]
        code = "{:<15} = self.__{}({})".format(IR_node.real_variable_name,
                                               IR_node.pattern,
                                               ', '.join(input_vars))
        self._gen_scope_code(IR_node)
        return code

    def _gen_scope_code(self, scope_node):
        def _scope_func(scope_name, params, code, return_var):
            code = """
    def __{}({}):
{}
        return {}
    """.format(scope_name, params, code, ', '.join(return_var))
            return code

        if not self.layers_codes.get(scope_node.pattern, None):
            body_code = str()
            for node_name in scope_node.topology_list:
                node = self.IR_graph.get_node(node_name)
                node_type = node.type

                if hasattr(self, "emit_" + node_type):
                    func = getattr(self, "emit_" + node_type)
                    line = func(node)
                    if line != None:
                        body_code += "        " + line + '\n'
                else:
                    print("PytorchEmitter has not supported operator [%s]." %
                          (node_type))
                    self.emit_UNKNOWN(node)

            # param_code does not need parameter slice.
            input_params = scope_node.input_params
            input_params.insert(0, "self")
            param_code = ', '.join(input_params)
            function_code = _scope_func(scope_node.pattern, param_code,
                                        body_code, scope_node.return_variables)

            self.layers_codes[scope_node.pattern] = function_code

    def _layer_Embedding(self):
        self.add_body(
            0, """
    @staticmethod
    def __embedding(name, **kwargs):
        layer = nn.Embedding(**kwargs) #shape
        layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
        return layer
        """)

    def _layer_Conv(self):
        self.add_body(
            0, """
    @staticmethod
    def __conv(dim, name, **kwargs):
        if   dim == 1:  layer = nn.Conv1d(**kwargs)
        elif dim == 2:  layer = nn.Conv2d(**kwargs)
        elif dim == 3:  layer = nn.Conv3d(**kwargs)
        else:           raise NotImplementedError()

        layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
        if 'bias' in _weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
        return layer""")

    def _layer_FullyConnected(self):
        self.add_body(
            0, """
    @staticmethod
    def __dense(name, **kwargs):
        layer = nn.Linear(**kwargs)
        layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
        if 'bias' in _weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
        return layer""")

    def _layer_BatchNorm(self):
        self.add_body(
            0, """
    @staticmethod
    def __batch_normalization(dim, name, **kwargs):
        if   dim == 0 or dim == 1:  layer = nn.BatchNorm1d(**kwargs)
        elif dim == 2:  layer = nn.BatchNorm2d(**kwargs)
        elif dim == 3:  layer = nn.BatchNorm3d(**kwargs)
        else:           raise NotImplementedError()

        if 'scale' in _weights_dict[name]:
            layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['scale']))
        else:
            layer.weight.data.fill_(1)

        if 'bias' in _weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
        else:
            layer.bias.data.fill_(0)

        layer.state_dict()['running_mean'].copy_(torch.from_numpy(_weights_dict[name]['mean']))
        layer.state_dict()['running_var'].copy_(torch.from_numpy(_weights_dict[name]['var']))
        return layer""")

    def _layer_Scale(self):
        self.add_body(
            0, """
    # from torch.nn.parameter import Parameter

    class _Scale(nn.Module):

        def __init__(self, num_features, affine=True):
            super(KitModel._Scale, self).__init__()
            self.num_features = num_features
            self.affine = affine

            self.running_mean = torch.zeros(num_features)
            self.running_var = torch.ones(num_features)
            self.training = False
            self.eps = 1e-5
            if self.affine:
                self.weight = nn.Parameter(torch.Tensor(num_features))
                self.bias = nn.Parameter(torch.Tensor(num_features))
            else:
                self.register_parameter('weight', None)
                self.register_parameter('bias', None)
            self.reset_parameters()


        def reset_parameters(self):
            if self.affine:
                self.weight.data.uniform_()
                self.bias.data.zero_()

        def _check_input_dim(self, input):
            raise NotImplementedError

        def forward(self, input):
            self._check_input_dim(input)

            return F.batch_norm(
                input, self.running_mean, self.running_var, self.weight, self.bias,
                self.training,
                0 , self.eps)


    class Scale1d(_Scale):

        def _check_input_dim(self, input):
            if input.dim() != 2 and input.dim() != 3:
                raise ValueError('expected 2D or 3D input (got {}D input)'
                                .format(input.dim()))



    class Scale2d(_Scale):


        def _check_input_dim(self, input):
            if input.dim() != 4:
                raise ValueError('expected 4D input (got {}D input)'
                                .format(input.dim()))


    class Scale3d(_Scale):

        def _check_input_dim(self, input):
            if input.dim() != 5:
                raise ValueError('expected 5D input (got {}D input)'
                                .format(input.dim()))


    @staticmethod
    def __scale(dim, name, **kwargs):
        if   dim == 1:  layer = KitModel.Scale1d(**kwargs)
        elif dim == 2:  layer = KitModel.Scale2d(**kwargs)
        elif dim == 3:  layer = KitModel.Scale3d(**kwargs)
        else:           raise NotImplementedError()

        if 'scale' in _weights_dict[name]:
            layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['scale']))
        else:
            layer.weight.data.fill_(1)

        if 'bias' in _weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
        else:
            layer.bias.data.fill_(0)

        return layer""")
Exemplo n.º 16
0
class CaffeEmitter(Emitter):

    def __init__(self, model):
        from six import string_types as _string_types
        super(CaffeEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CaffeEmitter, self)._build()


    @property
    def header_code(self):
        return """import numpy as np
import sys, argparse
import caffe
from caffe import layers as L
from caffe import params as P
from caffe import to_proto
from six import text_type as _text_type


__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    n = caffe.NetSpec()
"""

    @property
    def end_code(self):
        return """    return n

def make_net(prototxt):
    n = KitModel()
    with open(prototxt, 'w') as fpb:
        print(n.to_proto(), file=fpb)

def gen_weight(weight_file, model, prototxt):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

    net = caffe.Net(prototxt, caffe.TRAIN)

    for key in __weights_dict:
        if 'weights' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['weights']
        elif 'mean' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['mean']
            net.params[key][1].data.flat = __weights_dict[key]['var']
            if 'scale' in __weights_dict[key]:
                net.params[key][2].data.flat = __weights_dict[key]['scale']
        elif 'scale' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['scale']
        if 'bias' in __weights_dict[key]:
            net.params[key][1].data.flat = __weights_dict[key]['bias']
    net.save(model)
    return net



if __name__=='__main__':
    parser = argparse.ArgumentParser(description='Generate caffe model and prototxt')
    parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file')
    parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt')
    parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel')
    args = parser.parse_args()
    make_net(args.prototxt)
    gen_weight(args.weight_file, args.model, args.prototxt)

"""

    def gen_code(self, phase = 'test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        # for test
        # with open("graph.txt", 'w') as f:
        #     for layer in self.IR_graph.topological_sort:
        #         current_node = self.IR_graph.get_node(layer)
        #         print("========current_node=========\n{}".format(current_node.layer), file=f)
        # test end

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type
            #print("========current_node={}".format(current_node.layer))

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CaffeEmitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(0, "")
        self.add_body(0,self.end_code)

        return self.body_code


    def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'):
        super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)



    @staticmethod
    def _shapeToStr(shapes):
        return [dim.size if dim.size > 0 else 1 for dim in shapes.dim]



    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims)


    def emit_Conv(self, IR_node):
        self.add_body(1, "n.{:<15} = L.Convolution(n.{}, kernel_size={}, stride={}, num_output={}, pad={}, group={}, \
bias_term={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('kernel_shape')[0],
            IR_node.get_attr('strides')[1],
            IR_node.get_attr('kernel_shape')[-1],
            IR_node.get_attr('pads')[1],
            IR_node.get_attr('group', 1),
            IR_node.get_attr('use_bias', False)))

        dim = len(IR_node.get_attr('strides')) - 2
        if self.weight_loaded:
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim)))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)

        # keys = []
        # for key in self.weights_dict[IR_node.name].keys():
        #     keys.append(key)
        # print("=======Layer: {}, keys: {}".format(IR_node.name, keys))

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            pooling_type = P.Pooling.MAX
        elif pooling_type == 'AVG':
            pooling_type = P.Pooling.AVE
        elif pooling_type == 'STOCHASTIC':
            pooling_type = P.Pooling.STOCHASTIC
        else:
            raise ValueError

        if IR_node.layer.attr['global_pooling'].b:
            self.used_layers.add('GlobalPooling')
            self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                pooling_type,
                IR_node.get_attr('strides')[1]))
        else:
            self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
                pooling_type,
                IR_node.get_attr('kernel_shape')[1],
                IR_node.get_attr('pads')[1],
                IR_node.get_attr('pads')[2],
                IR_node.get_attr('strides')[1]))


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)


    def emit_DataInput(self, IR_node):
        shape = self._shapeToStr(IR_node.get_attr('shape'))
        shape = [shape[0], shape[-1]] + shape[1:-1]
        self.add_body(1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format(
            IR_node.variable_name,
            shape))


    def emit_Dropout(self, IR_node):
        in_place = True
        self.add_body(1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            1 - IR_node.get_attr('keep_prob'),
            in_place))


    def emit_FullyConnected(self, IR_node):
        self.add_body(1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.layer.attr["units"].i,
            IR_node.get_attr('use_bias', False)))
        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)


    def emit_BatchNorm(self, IR_node):
        self.add_body(1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('epsilon'),
            self.phase == 'test'
        ))
        scale_layer_var_name = IR_node.variable_name + "_scale"
        self.add_body(1, "n.{:<15} = L.Scale(n.{}, bias_term={}, ntop=1)".format(
            scale_layer_var_name,
            IR_node.variable_name,
            IR_node.get_attr('bias', False)
        ))
        IR_node.real_name = IR_node.name + "_scale"
        if self.weight_loaded:
            self.weights_dict[scale_layer_var_name] = dict()
            if 'scale' in self.weights_dict[IR_node.name]:
                self.weights_dict[scale_layer_var_name]['scale'] = self.weights_dict[IR_node.name]['scale']
                #self.weights_dict[IR_node.name].pop('scale', None)
                self.weights_dict[IR_node.name]['scale'] = 1
            self.weights_dict[scale_layer_var_name]['bias'] = self.weights_dict[IR_node.name]['bias']
            self.weights_dict[IR_node.name].pop('bias', None)
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name)


    def emit_LRN(self, IR_node):
        self.add_body(1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            IR_node.get_attr('size') * 2 - 1,
            IR_node.get_attr('alpha'),
            IR_node.get_attr('beta'),
            IR_node.get_attr('k')
        ))


    def emit_Add(self, IR_node):
        input_layers = ', '.join(('n.' + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) for num in range(0, len(IR_node.in_edges)))
        self.add_body(1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format(
            IR_node.variable_name,
            input_layers,
        ))

    def emit_Flatten(self, IR_node):
        IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name


    def emit_Concat(self, IR_node):
        axis_array = (2, 3, 1, 0)
        axis = axis_array.index(IR_node.get_attr('axis'))
        input_layers = ', '.join(('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges)
        self.add_body(1, "n.{:<15} = L.Concat({}, axis={})".format(
            IR_node.variable_name,
            input_layers,
            axis
        ))

    # def emit_Tanh(self, IR_node):
    #     self._emit_activation(IR_node, 'ops.tanh')


    def emit_Relu(self, IR_node):
        in_place = True
        self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            in_place))



    def emit_Softmax(self, IR_node):
        self.add_body(1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node)))
Exemplo n.º 17
0
class TensorflowEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16: "tf.float16",
        graph_pb2.DT_FLOAT32: "tf.float32",
        graph_pb2.DT_FLOAT64: "tf.float64",
        graph_pb2.DT_INT16: "tf.int16",
        graph_pb2.DT_INT32: "tf.int32",
        graph_pb2.DT_INT64: "tf.int64",
        graph_pb2.DT_UINT8: "tf.uint8",
        graph_pb2.DT_UINT16: "tf.uint16"
    }

    @property
    def header_code(self):
        return """import tensorflow as tf

__weights_dict = dict()

is_train = {}

def load_weights(weight_file):
    import numpy as np

    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    global __weights_dict
    __weights_dict = load_weights(weight_file)
""".format(self.trainable)

    def __init__(self, model):
        super(TensorflowEmitter, self).__init__()

        from six import string_types as _string_types
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(TensorflowEmitter, self)._build()

    def gen_code(self, phase):
        self.trainable = (phase == 'train')
        self.add_body(0, self.header_code)

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("TensorflowEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(
            1, "return {}, {}".format(
                ', '.join([
                    self.IR_graph.get_node(name).real_variable_name
                    for name in self.IR_graph.input_layers
                    if self.IR_graph.get_node(name).type != 'Const'
                ]), ', '.join([
                    self.IR_graph.get_node(name).real_variable_name
                    for name in self.IR_graph.output_layers
                    if self.IR_graph.get_node(name).type != 'Pack'
                ])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.body_code

    @staticmethod
    def _shapeToStr(shapes):
        ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim]
        return ', '.join('%s' % i for i in ret)

    def emit_Conv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i
                                for i in IR_node.get_attr('strides')[1:-1])
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(
            1,
            "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')"
            .format(IR_node.variable_name, input_node,
                    IR_node.get_attr('group', 1), strides_str, padding,
                    IR_node.name))

    def _defuse_padding(self, IR_node, extra_str=""):
        auto_pad = IR_node.get_attr('auto_pad')
        if auto_pad:
            input_node = self.parent_variable_name(IR_node)
            if auto_pad == 'VALID':
                padding = 'VALID'
            elif auto_pad.startswith("SAME"):
                padding = 'SAME'
            else:
                raise ValueError("Unknown padding type [{}].".format(auto_pad))

            return input_node, padding

        else:
            padding = IR_node.get_attr("pads")
            padding = convert_onnx_pad_to_tf(padding)
            if not is_valid_padding(padding):
                input_node = IR_node.variable_name + '_pad'
                self.add_body(
                    1, "{:<15} = tf.pad({}, paddings = {}{})".format(
                        input_node, self.parent_variable_name(IR_node),
                        padding, extra_str))
            else:
                input_node = self.parent_variable_name(IR_node)

            return input_node, 'VALID'

    def emit_Constant(self, IR_node):
        self.add_body(
            1,
            "{:<15} = tf.constant(__weights_dict['{}']['value'], name='{}')".
            format(IR_node.variable_name, IR_node.name, IR_node.name))

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            op = 'max_pool'
            padding_const = ", constant_values=float('-Inf')"
        elif pooling_type == 'AVG':
            op = 'avg_pool'
            padding_const = ""
        else:
            raise ValueError("unknown pooling type [{}].".format(pooling_type))

        arrlen = len(IR_node.get_attr('strides'))
        dim_str = '3d' if arrlen == 5 else ""

        if IR_node.layer.attr['global_pooling'].b:
            self.add_body(
                1,
                "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')"
                .format(IR_node.variable_name, op, dim_str,
                        self.parent_variable_name(IR_node),
                        self.parent_variable_name(IR_node), arrlen,
                        IR_node.name))

        else:
            dim = len(IR_node.get_attr("strides")) - 2
            dilations = IR_node.get_attr('dilations')
            if dilations:
                for e in IR_node.get_attr('dilations'):
                    assert e == 1

            pool_size = IR_node.get_attr('kernel_shape')[1:-1]

            strides = IR_node.get_attr('strides')[1:-1]
            padding = IR_node.get_attr('pads')[1:dim]

            if pooling_type == "AVG" and pool_size.count(
                    pool_size[0]
            ) == len(pool_size) and strides[0] == 1 and strides.count(
                    strides[0]) == len(strides) and padding.count(
                        padding[0]) == len(
                            padding) and pool_size[0] == padding[0] * 2 + 1:
                kernel_shape_str = ', '.join(
                    '%s' % i for i in IR_node.get_attr('kernel_shape'))
                strides_str = ', '.join('%s' % i
                                        for i in IR_node.get_attr('strides'))

                self.add_body(
                    1,
                    "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')"
                    .format(IR_node.variable_name, op, dim_str,
                            self.parent_variable_name(IR_node),
                            kernel_shape_str, strides_str, 'SAME',
                            IR_node.name))

            else:

                kernel_shape_str = ', '.join(
                    '%s' % i for i in IR_node.get_attr('kernel_shape'))
                strides_str = ', '.join('%s' % i
                                        for i in IR_node.get_attr('strides'))

                input_node, padding = self._defuse_padding(
                    IR_node, padding_const)

                self.add_body(
                    1,
                    "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')"
                    .format(IR_node.variable_name, op, dim_str, input_node,
                            kernel_shape_str, strides_str, padding,
                            IR_node.name))

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)

    def emit_Add(self, IR_node):
        self.add_body(
            1, "{:<15} = {}".format(
                IR_node.variable_name,
                ' + '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def emit_DataInput(self, IR_node):
        assert not IR_node.in_edges
        shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape)

        if 'dtype' in IR_node.layer.attr:
            dtype_str = "{}, ".format(
                self.dtype_map[IR_node.layer.attr['dtype'].type])
        else:
            dtype_str = "tf.float32,"

        code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format(
            IR_node.variable_name, dtype_str, shape_str, IR_node.name)

        self.add_body(1, code)

    def emit_Dropout(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        if self.trainable:
            self.add_body(
                1,
                "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format(
                    IR_node.variable_name, IR_node.name,
                    1 - IR_node.IR_layer.attr["keep_prob"].f,
                    parent.real_variable_name))
        else:
            IR_node.real_name = parent.real_name

    def emit_FullyConnected(self, IR_node):
        if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[
                IR_node.name]:
            kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format(
                IR_node.name)
        else:
            kernel_str = ""

        if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[
                IR_node.name]:
            bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format(
                IR_node.name)
        else:
            bias_str = ""

        # check whether flatten operator should be added
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        parent_shape = shape_to_list(parent.get_attr('_output_shapes')[0])
        if len(parent_shape) > 2:
            # flatten is needed
            self.add_body(
                1, "{:<15} = tf.contrib.layers.flatten({})".format(
                    IR_node.variable_name + '_flatten',
                    self.parent_variable_name(IR_node)))

            code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format(
                IR_node.variable_name, IR_node.variable_name + '_flatten',
                IR_node.layer.attr['units'].i, kernel_str, bias_str,
                IR_node.layer.attr['use_bias'].b)
            self.add_body(1, code)

        else:
            code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.layer.attr['units'].i, kernel_str, bias_str,
                IR_node.layer.attr['use_bias'].b)
            self.add_body(1, code)

    def emit_Flatten(self, IR_node):
        #self._emit_unary_operation(IR_node, "contrib.layers.flatten")
        self.add_body(
            1, "{:<15} = tf.contrib.layers.flatten({})".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    def emit_Mul(self, IR_node):
        self.add_body(
            1, "{:<15} = {}".format(
                IR_node.variable_name,
                ' * '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def emit_Const(self, IR_node):
        if 'dtype' in IR_node.layer.attr:
            dtype_str = "dtype={}".format(
                self.dtype_map[IR_node.layer.attr['dtype'].type])
            if 'int' in dtype_str:
                self.add_body(
                    1, "{:<15} = tf.constant({}, {}, shape=(1,))".format(
                        IR_node.variable_name, IR_node.layer.attr['value'].i,
                        dtype_str))
            else:
                self.add_body(
                    1, "{:<15} = tf.constant({}, {}, shape=(1,))".format(
                        IR_node.variable_name, IR_node.layer.attr['value'].f,
                        dtype_str))

        else:
            dtype_str = "dtype=tf.float32"
            self.add_body(
                1, "{:<15} = tf.constant({}, {}, shape=(1,))".format(
                    IR_node.variable_name, IR_node.layer.attr['value'].f,
                    dtype_str))

    def emit_Reshape(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.reshape({}, [{}], '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                ', '.join('%s' % i for i in IR_node.get_attr('shape')),
                IR_node.name))

    def emit_Sub(self, IR_node):
        self.add_body(
            1, "{:<15} = {}".format(
                IR_node.variable_name,
                ' - '.join('%s' % self.IR_graph.get_node(s).real_variable_name
                           for s in IR_node.in_edges)))

    def _emit_unary_operation(self, IR_node, op_name):
        self.add_body(
            1, "{:<15} = tf.{}({}, name = '{}')".format(
                IR_node.variable_name, op_name,
                self.parent_variable_name(IR_node), IR_node.name))

    def emit_Tanh(self, IR_node):
        self._emit_unary_operation(IR_node, 'tanh')

    def emit_Elu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.elu')

    def emit_Relu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.relu')

    def emit_Relu6(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.relu6')

    def emit_CRelu(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.crelu')

    def emit_PRelu(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            1, "{:<15} = prelu({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_LeakyRelu(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.nn.leaky_relu({}, alpha={}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('alpha'), IR_node.name))

    def emit_Softmax(self, IR_node):
        self._emit_unary_operation(IR_node, 'nn.softmax')

    def emit_Sigmoid(self, IR_node):
        self._emit_unary_operation(IR_node, 'sigmoid')

    def emit_Embedding(self, IR_node):
        raise NotImplementedError()
        ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
            IR_node.name, IR_node.IR_layer.attr['input_dim'].i,
            IR_node.IR_layer.attr['output_dim'].i,
            IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0])

        return ret

        assert False

    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")

    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")

    def emit_Concat(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.concat([{}], {}, name = '{}')".format(
                IR_node.variable_name, ', '.join(
                    self.IR_graph.get_node(s).real_variable_name
                    for s in IR_node.in_edges), IR_node.layer.attr['axis'].i,
                IR_node.name))

    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            1,
            "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   IR_node.get_attr('epsilon'), IR_node.name))

    def emit_Scale(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(
            1, "{:<15} = scale({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_Pad(self, IR_node):
        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)

        mode = IR_node.get_attr('mode', 'constant')
        mode = mode.lower()
        if mode == 'constant' or mode == 'reflect':
            mode = mode.upper()
        elif mode == 'edge':
            mode = 'SYMMETRIC'
        else:
            raise NotImplementedError(
                "Not support padding mode {}.".format(mode))

        self.add_body(
            1, "{:<15} = tf.pad({}, {}, '{}', name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                padding, mode, IR_node.variable_name))

    def emit_Squeeze(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.squeeze({}, [{}], name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                ', '.join('%s' % axis
                          for axis in IR_node.layer.attr['axes'].list.i),
                IR_node.name))

    def emit_ReduceMean(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                ','.join('%s' % i for i in IR_node.get_attr('axes')),
                IR_node.get_attr('keepdims'), IR_node.name))

    def emit_LRN(self, IR_node):
        self.add_body(
            1,
            "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')".
            format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('size') - 1, IR_node.layer.attr['alpha'].f /
                (IR_node.layer.attr['size'].i * 2 - 1),
                IR_node.get_attr('beta'), IR_node.name))

    def emit_SeparableConv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides'))
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(
            1,
            "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')"
            .format(IR_node.variable_name, input_node, strides_str, padding,
                    IR_node.name))

    def emit_DepthwiseConv(self, IR_node):
        self.used_layers.add(IR_node.type)
        strides_str = ', '.join('%s' % i
                                for i in IR_node.layer.attr['strides'].list.i)
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(
            1,
            "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')"
            .format(IR_node.variable_name, input_node, strides_str, padding,
                    IR_node.name))

    def emit_Crop(self, IR_node):
        border = IR_node.get_attr('border')
        assert len(border) == 4

        output_shape = IR_node.get_attr('_output_shapes')[0]
        output_shape = shape_to_list(output_shape)

        self.add_body(
            1,
            "{:<15} = tf.image.crop_to_bounding_box({}, offset_height={}, offset_width={}, target_height={}, target_width={})"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    border[0], border[1], output_shape[1], output_shape[2]))

    def emit_ConvTranspose(self, IR_node):
        self.used_layers.add(IR_node.type)
        output_shape = [1] + shape_to_list(
            IR_node.get_attr('_output_shapes')[0])[1:]
        input_node, padding = self._defuse_padding(IR_node)
        self.add_body(
            1,
            "{:<15} = convolution_transpose({}, output_shape={}, strides={}, padding='{}', name='{}')"
            .format(IR_node.variable_name, input_node, output_shape,
                    IR_node.get_attr('strides'), padding, IR_node.name))

    def emit_Slice(self, IR_node):
        extra_str = ""
        if IR_node.get_attr('strides'):
            extra_str += ", strides={}".format(IR_node.get_attr('strides'))
        if IR_node.get_attr('begin_mask'):
            extra_str += ", begin_mask={}".format(
                IR_node.get_attr('begin_mask'))
        if IR_node.get_attr('end_mask'):
            extra_str += ", end_mask={}".format(IR_node.get_attr('end_mask'))
        self.add_body(
            1, "{:<15} = tf.strided_slice({}, {}, {} {}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('starts'), IR_node.get_attr('ends'),
                extra_str, IR_node.name))

    def emit_Shape(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.shape({}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.name))

    def emit_Pack(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.stack({}, axis={}, name='{}')".format(
                IR_node.variable_name, '[' +
                ','.join('%s' % self.IR_graph.get_node(s).real_variable_name
                         for s in IR_node.in_edges) + ']',
                IR_node.get_attr('axis'), IR_node.name))

    def emit_Split(self, IR_node):
        self.add_body(
            1, "{:<15} = tf.split({}, {}, {}, name='{}')".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                IR_node.get_attr('split'), IR_node.get_attr('axis'),
                IR_node.name))

    def _layer_Conv(self):
        self.add_body(
            0, """
def convolution(input, name, group, **kwargs):
    w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight")
    if group == 1:
        layer = tf.nn.convolution(input, w, **kwargs)
    else:
        weight_groups = tf.split(w, num_or_size_splits=group, axis=-1)
        xs = tf.split(input, num_or_size_splits=group, axis=-1)
        convolved = [tf.nn.convolution(x, weight, **kwargs) for
                    (x, weight) in zip(xs, weight_groups)]
        layer = tf.concat(convolved, axis=-1)

    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias")
        layer = layer + b
    return layer""")

    def _layer_PRelu(self):
        self.add_body(
            0, """
def prelu(input, name):
    gamma = tf.Variable(__weights_dict[name]['gamma'], name=name + "_gamma", trainable=is_train)
    return tf.maximum(0.0, input) + gamma * tf.minimum(0.0, input)
    """)

    def _layer_BatchNorm(self):
        self.add_body(
            0, """
def batch_normalization(input, name, **kwargs):
    mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train)
    variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train)
    offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None
    scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None
    return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs)
""")

    def _layer_Scale(self):
        self.add_body(
            0, """
def scale(input, name, **kwargs):
    mean = tf.Variable(__weights_dict[name]['scale_mean'], name = name + "_mean", trainable = is_train)
    variance = tf.Variable(__weights_dict[name]['scale_var'], name = name + "_var", trainable = is_train)
    offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None
    scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None
    return tf.nn.batch_normalization(input, mean, variance, offset, scale, variance_epsilon = 0, name = name)
""")

    def _layer_SeparableConv(self):
        self.add_body(
            0, """
def separable_convolution(input, name, **kwargs):
    depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df")
    pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf")
    layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs)
    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias")
        layer = layer + b
    return layer""")

    def _layer_DepthwiseConv(self):
        self.add_body(
            0, """
def depthwise_convolution(input, name, **kwargs):
    depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df")
    layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs)
    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias")
        layer = layer + b
    return layer""")

    def _layer_ConvTranspose(self):
        self.add_body(
            0, """
def convolution_transpose(input, name, **kwargs):
    w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight")
    dim = __weights_dict[name]['weights'].ndim - 2
    if dim == 2:
        layer = tf.nn.conv2d_transpose(input, w, **kwargs)
    elif dim == 3:
        layer = tf.nn.conv3d_transpose(input, w, **kwargs)
    else:
        raise ValueError("Error dim number {} in ConvTranspose".format(dim))

    if 'bias' in __weights_dict[name]:
        b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias")
        layer = layer + b
    return layer""")
Exemplo n.º 18
0
class PytorchEmitter(Emitter):

    dtype_map = {
        graph_pb2.DT_FLOAT16 : "float16",
        graph_pb2.DT_FLOAT32 : "float32",
        graph_pb2.DT_FLOAT64 : "float64",
        graph_pb2.DT_INT16 : "int16",
        graph_pb2.DT_INT32 : "int32",
        graph_pb2.DT_INT64 : "int64",
        graph_pb2.DT_UINT8 : "uint8",
        graph_pb2.DT_UINT16 : "uint16"
    }

    # Base Functions
    def __init__(self, model):
        super(PytorchEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            weight_path = model[1]

        self.init_code = str()
        self.IR_graph = IRGraph(network_path)
        self.IR_graph.build()
        self._load_weights(weight_path)


    def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'):
        super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)


    def add_init(self, indent, codes):
        if isinstance(codes, _string_types):
            codes = [codes]
        for code in codes:
            self.init_code += ("    " * indent) + code + '\n'


    @property
    def header_code(self):
        return """import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict

class KitModel(nn.Module):
"""

    def gen_code(self, phase):
        self.add_init(1, """
    def __init__(self, weight_file):
        super(KitModel, self).__init__()
        global __weights_dict
        __weights_dict = load_weights(weight_file)
""")

        self.add_body(1, "def forward(self, x):")

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                line = func(current_node)

            else:
                print("Pytorch Emitter has not supported operator [%s]." % (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(2, "return {}".format(
            ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers])))

        self.add_body(0, "")
        for i in self.used_layers:
            func = getattr(self, "_layer_" + i)
            func()

        return self.header_code + '\n' + self.init_code + '\n' + self.body_code


    def _defuse_padding(self, IR_node, extra_str = ""):
        input_node = self.parent_variable_name(IR_node)
        if IR_node.get_attr('auto_pad') == 'VALID':
            return input_node

        if is_valid_padding(IR_node.get_attr("pads")) == True:
            return input_node

        padding = self._convert_padding(IR_node)
        input_node = IR_node.variable_name + '_pad'
        self.add_body(2, "{:<15} = F.pad({}, {}{})".format(
            input_node,
            self.parent_variable_name(IR_node),
            padding,
            extra_str
        ))

        return input_node


    def emit_Conv(self, IR_node):
        self.used_layers.add(IR_node.type)

        dim = len(IR_node.get_attr('strides')) - 2

        in_channels = IR_node.get_attr('kernel_shape')[-2]
        filter = IR_node.get_attr('kernel_shape')[-1]
        kernel = IR_node.get_attr('kernel_shape')[:-2]
        strides = IR_node.get_attr('strides')[1:-1]

        self.add_init(2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})".format(
            IR_node.variable_name,
            dim,
            IR_node.name,
            in_channels,
            filter,
            tuple(kernel),
            tuple(strides),
            # padding,
            IR_node.get_attr('group', 1),
            IR_node.get_attr('use_bias')))

        input_node = self._defuse_padding(IR_node)
        self.add_body(2, "{:<15} = self.{}({})".format(
            IR_node.variable_name,
            IR_node.variable_name,
            input_node))

        if self.weight_loaded:
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim)))


    def emit_Pool(self, IR_node):
        dim = len(IR_node.get_attr('strides')) - 2

        if IR_node.get_attr('pooling_type') == "MAX":
            pool_name = "max_pool{}d".format(dim)
            exstr = ", value=float('-Inf')"
        elif IR_node.get_attr('pooling_type') == "AVG":
            pool_name = "avg_pool{}d".format(dim)
            exstr = ""
        else:
            assert False

        if IR_node.layer.attr['global_pooling'].b:
            self.add_body(2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".format(
                IR_node.variable_name,
                pool_name,
                self.parent_variable_name(IR_node),
                self.parent_variable_name(IR_node)
            ))

        else:
            for e in IR_node.get_attr('dilations', []):
                assert e == 1

            pool_size = IR_node.get_attr('kernel_shape')[1:-1]
            strides = IR_node.get_attr('strides')[1:-1]

            input_node = self._defuse_padding(IR_node, exstr)
            self.add_body(2, "{:<15} = F.{}({}, kernel_size={}, stride={})".format(
                IR_node.variable_name,
                pool_name,
                input_node,
                tuple(pool_size),
                tuple(strides)
                ))


    def emit_UNKNOWN(self, IR_node):
        print(IR_node.name)


    def emit_DataInput(self, IR_node):
        # Ignore it in Pytorch
        IR_node.real_name = 'x'


    def emit_Dropout(self, IR_node):
        self.add_body(2, "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)".format(
            IR_node.variable_name,
            self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name,
            IR_node.layer.attr["keep_prob"].f))

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims)


    def emit_FullyConnected(self, IR_node):
        self.used_layers.add(IR_node.type)
        in_features = 1
        for i in self.IR_graph.get_parent(IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]:
            in_features *= i.size

        self.add_init(2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})".format(
            IR_node.variable_name,
            IR_node.name,
            in_features,
            IR_node.layer.attr["units"].i,
            IR_node.IR_layer.attr["use_bias"].b))

        input_node = self.parent_variable_name(IR_node)
        if len(self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2:
            input_node = "{}.view({}.size(0), -1)".format(input_node, input_node)
        self.add_body(2, "{:<15} = self.{}({})".format(
            IR_node.variable_name,
            IR_node.variable_name,
            input_node))

        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0))


    def emit_Flatten(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name
        self.add_body(2, "{:<15} = {}.view({}.size(0), -1)".format(
            IR_node.variable_name,
            parent,
            parent))


    def emit_Reshape(self, IR_node):
        raise NotImplementedError
        shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True)
        self.add_body(1, "{:<15} = Reshape(name = \"{}\", target_shape = ({}))({})".format(
            IR_node.variable_name,
            IR_node.name,
            shape_str,
            self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name))


    def emit_Tanh(self, IR_node):
        raise NotImplementedError()
        code = "{:<15} = Activation(name = '{}', activation = 'tanh')({})".format(
                IR_node.replace_scope(IR_node.name),
                IR_node.name,
                IR_node.replace_scope(IR_node.in_edges[0]))
        return code


    def emit_Relu(self, IR_node):
        self.add_body(2, "{:<15} = F.relu({})".format(
            IR_node.variable_name,
            self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name))


    def emit_Softmax(self, IR_node):
        self.add_body(2, "{:<15} = F.softmax({})".format(
            IR_node.variable_name,
            self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name))


    def emit_Sigmoid(self, IR_node):
        code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format(
                IR_node.replace_scope(IR_node.name),
                IR_node.name,
                IR_node.replace_scope(IR_node.in_edges[0]))
        return code


    def emit_Embedding(self, IR_node):
        raise NotImplementedError()
        ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
                IR_node.name,
                IR_node.IR_layer.attr['input_dim'].i,
                IR_node.IR_layer.attr['output_dim'].i,
                IR_node.IR_layer.attr['mask_zero'].b,
                IR_node.in_edges[0])

        return ret


    def emit_RNNs(self, IR_node, func):
        raise NotImplementedError()
        # for Keras
        if "dropout" in IR_node.IR_layer.attr:
            dropout_str = ",dropout = {}, recurrent_dropout = {}".format(
                    IR_node.IR_layer.attr['dropout'].f,
                    IR_node.IR_layer.attr['recurrent_dropout'].f)
        else:
            dropout_str = ""

        code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format(
                IR_node.name,
                func,
                IR_node.IR_layer.attr['units'].i,
                IR_node.IR_layer.attr['use_bias'].b,
                dropout_str,
                IR_node.in_edges[0])

        return code


    def emit_LSTM(self, IR_node):
        return self.emit_RNNs(IR_node, "LSTM")


    def emit_GRU(self, IR_node):
        return self.emit_RNNs(IR_node, "GRU")


    def emit_Add(self, IR_node):
        self.add_body(2, "{:<15} = {}".format(
            IR_node.variable_name,
            '+ '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges)))


    @staticmethod
    def _convert_axis(IR_node, axis):
        ndim = len(IR_node.get_attr('_output_shapes')[0].dim)
        if axis == 0:
            return 0
        elif axis == ndim - 1:
            return 1
        else:
            return axis + 1


    def emit_Concat(self, IR_node):
        axis = self._convert_axis(IR_node, IR_node.get_attr('axis'))
        self.add_body(2, "{:<15} = torch.cat(({}), {})".format(
            IR_node.variable_name,
            ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges),
            axis,
        ))


    def emit_BatchNorm(self, IR_node):
        self.used_layers.add(IR_node.type)
        dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2

        self.add_init(2, "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})".format(
             IR_node.variable_name,
             dim,
             IR_node.name,
             IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1].size,
             IR_node.layer.attr['epsilon'].f,
             IR_node.layer.attr['momentum'].f,
        ))

        self.add_body(2, "{:<15} = self.{}({})".format(
            IR_node.variable_name,
            IR_node.variable_name,
            self.parent_variable_name(IR_node)
        ))


    def emit_Squeeze(self, IR_node):
        self.add_body(2, "{:<15} = torch.squeeze({})".format(
            IR_node.variable_name, self.parent_variable_name(IR_node)
        ))


    @staticmethod
    def _convert_padding(IR_node):
        padding = IR_node.get_attr('pads')
        padding = convert_onnx_pad_to_tf(padding)[1:-1]
        new_padding = []
        for pad in padding:
            new_padding.insert(0, pad)
        return tuple(np.array(new_padding).reshape(-1).tolist())


    def emit_Pad(self, IR_node):
        if IR_node.get_attr('mode') == 'constant':
            mode = "mode = 'constant', value = {}".format(0)
        elif IR_node.get_attr('mode') == 'reflect':
            mode = "mode = 'reflect'"
        elif IR_node.get_attr('mode') == 'SYMMETRIC':
            mode = "mode = 'replicate'"
        else:
            assert False

        padding = self._convert_padding(IR_node)
        self.add_body(2, "{:<15} = F.pad({}, {}, {})".format(
            IR_node.variable_name,
            self.parent_variable_name(IR_node),
            padding,
            mode))


    def emit_ReduceMean(self, IR_node):
        axes = [self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes')]
        input_node = self.parent_variable_name(IR_node)
        for axis in sorted(axes, reverse=True):
            self.add_body(2, "{:<15} = torch.mean({}, {}, {})".format(
                IR_node.variable_name,
                input_node,
                axis,
                IR_node.get_attr("keepdims")
            ))
            input_node = IR_node.variable_name


    def emit_LRN(self, IR_node):
        self.used_layers.add(IR_node.type)
        self.add_body(2, "{:<15} = self.LRN(size = {}, alpha = {}, beta = {})({})".format(
            IR_node.variable_name,
            IR_node.layer.attr['size'].i * 2 - 1,
            IR_node.layer.attr['alpha'].f,
            IR_node.layer.attr['beta'].f,
            self.parent_variable_name(IR_node)
        ))


    def _layer_Conv(self):
        self.add_body(0, """
    @staticmethod
    def __conv(dim, name, **kwargs):
        if   dim == 1:  layer = nn.Conv1d(**kwargs)
        elif dim == 2:  layer = nn.Conv2d(**kwargs)
        elif dim == 3:  layer = nn.Conv3d(**kwargs)
        else:           raise NotImplementedError()

        layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        return layer""")


    def _layer_FullyConnected(self):
        self.add_body(0, """
    @staticmethod
    def __dense(name, **kwargs):
        layer = nn.Linear(**kwargs)
        layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        return layer""")


    def _layer_BatchNorm(self):
        self.add_body(0, """
    @staticmethod
    def __batch_normalization(dim, name, **kwargs):
        if   dim == 1:  layer = nn.BatchNorm1d(**kwargs)
        elif dim == 2:  layer = nn.BatchNorm2d(**kwargs)
        elif dim == 3:  layer = nn.BatchNorm3d(**kwargs)
        else:           raise NotImplementedError()

        if 'scale' in __weights_dict[name]:
            layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale']))
        else:
            layer.weight.data.fill_(1)

        if 'bias' in __weights_dict[name]:
            layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
        else:
            layer.bias.data.fill_(0)

        layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean']))
        layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var']))
        return layer""")


    def _layer_LRN(self):
        self.add_body(0, """
    class LRN(nn.Module):
        def __init__(self, size=1, alpha=1.0, beta=0.75, ACROSS_CHANNELS=False):
            super(KitModel.LRN, self).__init__()
            self.ACROSS_CHANNELS = ACROSS_CHANNELS
            if self.ACROSS_CHANNELS:
                self.average=nn.AvgPool3d(kernel_size=(size, 1, 1),
                        stride=1,
                        padding=(int((size-1.0)/2), 0, 0))
            else:
                self.average=nn.AvgPool2d(kernel_size=size,
                        stride=1,
                        padding=int((size-1.0)/2))
            self.alpha = alpha
            self.beta = beta

        def forward(self, x):
            if self.ACROSS_CHANNELS:
                div = x.pow(2).unsqueeze(1)
                div = self.average(div).squeeze(1)
                div = div.mul(self.alpha).add(1.0).pow(self.beta)
            else:
                div = x.pow(2)
                div = self.average(div)
                div = div.mul(self.alpha).add(1.0).pow(self.beta)
            x = x.div(div)
            return x""")
Exemplo n.º 19
0
class CaffeEmitter(Emitter):
    def __init__(self, model):
        from six import string_types as _string_types
        super(CaffeEmitter, self).__init__()
        if isinstance(model, _string_types):
            network_path = model
        else:
            network_path = model[0]
            self._load_weights(model[1])

        self.IR_graph = IRGraph(network_path)
        super(CaffeEmitter, self)._build()

    @property
    def header_code(self):
        return """from __future__ import print_function
import numpy as np
import sys, argparse
import caffe
from caffe import layers as L
from caffe import params as P
from caffe import to_proto
from six import text_type as _text_type


__weights_dict = dict()

def load_weights(weight_file):
    if weight_file == None:
        return

    try:
        weights_dict = np.load(weight_file).item()
    except:
        weights_dict = np.load(weight_file, encoding='bytes').item()

    return weights_dict


def KitModel(weight_file = None):
    n = caffe.NetSpec()
"""

    @property
    def end_code(self):
        return """    return n

def make_net(prototxt):
    n = KitModel()
    with open(prototxt, 'w') as fpb:
        print(n.to_proto(), file=fpb)

def gen_weight(weight_file, model, prototxt):
    global __weights_dict
    __weights_dict = load_weights(weight_file)

    net = caffe.Net(prototxt, caffe.TRAIN)

    for key in __weights_dict:
        if 'weights' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['weights']
        elif 'mean' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['mean']
            net.params[key][1].data.flat = __weights_dict[key]['var']
            if 'scale' in __weights_dict[key]:
                net.params[key][2].data.flat = __weights_dict[key]['scale']
        elif 'scale' in __weights_dict[key]:
            net.params[key][0].data.flat = __weights_dict[key]['scale']
        if 'bias' in __weights_dict[key]:
            net.params[key][1].data.flat = __weights_dict[key]['bias']
        if 'gamma' in __weights_dict[key]: # used for prelu, not sure if other layers use this too
            net.params[key][0].data.flat = __weights_dict[key]['gamma']
    net.save(model)
    return net



if __name__=='__main__':
    parser = argparse.ArgumentParser(description='Generate caffe model and prototxt')
    parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file')
    parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt')
    parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel')
    args = parser.parse_args()
    # For some reason argparser gives us unicode, so we need to conver to str first
    make_net(str(args.prototxt))
    gen_weight(str(args.weight_file), str(args.model), str(args.prototxt))

"""

    def gen_code(self, phase='test'):
        self.phase = phase
        self.add_body(0, self.header_code)

        #for test
        # with open("graph.txt", 'w') as f:
        #     for layer in self.IR_graph.topological_sort:
        #         current_node = self.IR_graph.get_node(layer)
        #         print("========current_node=========\n{}".format(current_node.layer), file=f)
        #test end

        for layer in self.IR_graph.topological_sort:
            current_node = self.IR_graph.get_node(layer)
            node_type = current_node.type
            #print("========current_node={}".format(current_node.layer))

            if hasattr(self, "emit_" + node_type):
                func = getattr(self, "emit_" + node_type)
                func(current_node)
            else:
                print("CaffeEmitter has not supported operator [%s]." %
                      (node_type))
                self.emit_UNKNOWN(current_node)

        self.add_body(0, "")
        self.add_body(0, self.end_code)

        return self.body_code

    def run(self, dstNetworkPath, dstWeightPath=None, phase='test'):
        super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase)
        if self.weight_loaded:
            self.save_weights(self.weights_dict, dstWeightPath)

    @staticmethod
    def _shapeToStr(shapes):
        return [dim.size if dim.size > 0 else 1 for dim in shapes.dim]

    def _get_symmetric_padding(self, IR_node):
        stride_h = IR_node.get_attr('strides')[1]
        stride_w = IR_node.get_attr('strides')[2]

        # check if have pad layer
        IR_parent_node = self.IR_graph.get_parent(IR_node.name, [0])
        if IR_parent_node.type == 'Pad':
            pads = IR_parent_node.get_attr('pads')
        else:
            pads = IR_node.get_attr('pads')

        # Pad_h < kernel_h (vgg19 caffe2caffe)
        if IR_node.type == "Pool":
            if pads[1]:
                pad_h = pads[1] + (0 if pads[1] == pads[5] else stride_h)
            else:
                pad_h = 0
            if pads[2]:
                pad_w = pads[2] + (0 if pads[2] == pads[6] else stride_w)
            else:
                pad_w = 0
        else:
            pad_h = pads[1] + (0 if pads[1] == pads[5] else stride_h)
            pad_w = pads[2] + (0 if pads[2] == pads[6] else stride_w)

        return pad_h, pad_w

    def check_if_need_transpose(self, IR_node):
        parent = self.IR_graph.get_parent(IR_node.name, [0])
        while parent.type == 'Flatten' or parent.type == 'Dropout' or parent.type == 'Reshape':
            parent = self.IR_graph.get_parent(parent.name, [0])
        dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim)
        if dim > 2:
            original_dims = self.weights_dict[IR_node.name]['weights'].shape
            dims = [
                i.size for i in
                parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]
            ] + [-1]
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], dims)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim - 2] + list(range(0, dim - 2)) + [dim - 1])
            self.weights_dict[IR_node.name]['weights'] = np.reshape(
                self.weights_dict[IR_node.name]['weights'], original_dims)

    def emit_Conv(self, IR_node):
        # implement asymmetric paddings by applying symmetric padding then cropping
        pad_h, pad_w = self._get_symmetric_padding(IR_node)

        num_output = IR_node.get_attr('kernel_shape')[-1]
        if IR_node.type == "DepthwiseConv":
            num_group = IR_node.get_attr("kernel_shape")[-2]
            # num_output = IR_node.get_attr('kernel_shape')[-2]
            num_output = IR_node.get_attr("_output_shapes")[0].dim[3].size
        else:
            num_group = IR_node.get_attr("group", 1)

        self.add_body(
            1,
            "n.{:<15} = L.Convolution(n.{}, kernel_h={}, kernel_w={}, stride={}, num_output={}, pad_h={}, pad_w={}, group={}, \
            bias_term={}, ntop=1)".format(IR_node.variable_name,
                                          self.parent_variable_name(IR_node),
                                          IR_node.get_attr('kernel_shape')[0],
                                          IR_node.get_attr('kernel_shape')[1],
                                          IR_node.get_attr('strides')[1],
                                          num_output, pad_h, pad_w, num_group,
                                          IR_node.get_attr('use_bias', False)))

        dim = len(IR_node.get_attr('strides')) - 2
        if self.weight_loaded:
            if IR_node.type == "DepthwiseConv":
                self.weights_dict[IR_node.name]['weights'] = np.swapaxes(
                    self.weights_dict[IR_node.name]['weights'], -1, -2)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'],
                [dim + 1, dim] + list(range(0, dim)))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(
                IR_node.name)

        self.check_if_need_crop(IR_node)
        # keys = []
        # for key in self.weights_dict[IR_node.name].keys():
        #     keys.append(key)
        # print("=======Layer: {}, keys: {}".format(IR_node.name, keys))

    def compute_output_shape(self, IR_node, kernel_h, kernel_w):
        parent_node = self.IR_graph.get_parent(IR_node.name, [0])

        if parent_node.get_attr('_output_shapes'):
            shape = parent_node.get_attr('_output_shapes')[0]
            shape = shape_to_list(shape)
            h_i = shape[1]
            w_i = shape[2]
            pad_h, pad_w = self._get_symmetric_padding(IR_node)
            stride_h = IR_node.get_attr('strides')[1]
            stride_w = IR_node.get_attr('strides')[2]

            if IR_node.type == 'Pool':
                h_o = (h_i + 2 * pad_h - kernel_h + stride_h -
                       1) // stride_h + 1
                w_o = (w_i + 2 * pad_w - kernel_w + stride_w -
                       1) // stride_w + 1
            else:
                h_o = (h_i + 2 * pad_h - kernel_h) // stride_h + 1
                w_o = (w_i + 2 * pad_w - kernel_w) // stride_w + 1
            return h_o, w_o
        else:
            assert False

    def check_if_need_crop(self, IR_node):
        shape = IR_node.get_attr('_output_shapes')[0]
        shape = shape_to_list(shape)
        ir_ho = shape[1]
        ir_wo = shape[2]
        if ir_ho < 0 or ir_wo < 0:
            return
        if IR_node.type == 'Pool':
            k_h = IR_node.get_attr('kernel_shape')[1]
            k_w = IR_node.get_attr('kernel_shape')[2]
        else:
            k_h = IR_node.get_attr('kernel_shape')[0]
            k_w = IR_node.get_attr('kernel_shape')[1]

        caffe_ho, caffe_wo = self.compute_output_shape(IR_node, k_h, k_w)

        # if asymmetric padding, set offset to 1
        pads = IR_node.get_attr('pads')
        offset = [
            0 if pads[1] == pads[5] else 1, 0 if pads[2] == pads[6] else 1
        ]
        if caffe_ho > ir_ho or caffe_wo > ir_wo:
            crop_layer_variable_name = IR_node.variable_name + "_crop"
            self.add_body(
                1,
                "n.{:<15} = L.Crop(n.{}, L.DummyData(shape=[dict(dim=[1, {}, {}, {}])], \
                ntop=1), ntop=1, offset={})".format(crop_layer_variable_name,
                                                    IR_node.variable_name,
                                                    shape[3], ir_ho, ir_wo,
                                                    offset))
            # Change the layer name
            IR_node.real_name = IR_node.real_name + "_crop"

    def emit_Pool(self, IR_node):
        pooling_type = IR_node.get_attr('pooling_type')
        if pooling_type == 'MAX':
            pooling_type = P.Pooling.MAX
        elif pooling_type == 'AVG':
            pooling_type = P.Pooling.AVE
        elif pooling_type == 'STOCHASTIC':
            pooling_type = P.Pooling.STOCHASTIC
        else:
            raise ValueError()

        if IR_node.layer.attr['global_pooling'].b:
            self.add_body(
                1,
                "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)"
                .format(IR_node.variable_name,
                        self.parent_variable_name(IR_node), pooling_type,
                        IR_node.get_attr('strides')[1]))
        else:
            pad_h, pad_w = self._get_symmetric_padding(IR_node)
            pool_size = IR_node.get_attr('kernel_shape')[1:3]
            if pool_size[0] != pool_size[1]:
                self.add_body(
                    1,
                    "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_h={}, kernel_w={}, pad_h={}, pad_w={}, stride={}, ntop=1)"
                    .format(IR_node.variable_name,
                            self.parent_variable_name(IR_node), pooling_type,
                            pool_size[0], pool_size[1], pad_h, pad_w,
                            IR_node.get_attr('strides')[1]))
            else:
                self.add_body(
                    1,
                    "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)"
                    .format(IR_node.variable_name,
                            self.parent_variable_name(IR_node), pooling_type,
                            pool_size[0], pad_h, pad_w,
                            IR_node.get_attr('strides')[1]))

            # check if need crop output shape
            self.check_if_need_crop(IR_node)

    def emit_ResizeBilinear(self, IR_node):
        shape = IR_node.get_attr("_output_shapes")[0]
        shape = shape_to_list(shape)
        self.add_body(
            1,
            "n.{:<15} = L.ResizeBilinear(n.{}, height={}, width={}, ntop=1)".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   shape[1], shape[2]))

    def emit_UNKNOWN(self, IR_node):
        print(IR_node.IR_layer.name)

    def emit_DataInput(self, IR_node):
        shape = self._shapeToStr(IR_node.get_attr('shape'))
        shape = [shape[0], shape[-1]] + shape[1:-1]
        self.add_body(
            1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format(
                IR_node.variable_name, shape))

    def emit_Dropout(self, IR_node):
        in_place = True
        self.add_body(
            1,
            "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    1 - IR_node.get_attr('keep_prob'), in_place))

    def emit_FullyConnected(self, IR_node):
        self.add_body(
            1,
            "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.layer.attr["units"].i,
                    IR_node.get_attr('use_bias', False)))
        if self.weight_loaded:
            self.check_if_need_transpose(IR_node)
            self.weights_dict[IR_node.name]['weights'] = np.transpose(
                self.weights_dict[IR_node.name]['weights'], (1, 0))
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(
                IR_node.name)

    def emit_BatchNorm(self, IR_node):

        self.add_body(
            1,
            "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)"
            .format(IR_node.variable_name, self.parent_variable_name(IR_node),
                    IR_node.get_attr('epsilon'), self.phase == 'test'))

        scale_layer_var_name = IR_node.variable_name + "_scale"
        self.add_body(
            1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)".
            format(scale_layer_var_name, IR_node.variable_name,
                   IR_node.get_attr('bias', False)))

        if self.weight_loaded:
            self.weights_dict[scale_layer_var_name] = dict()
            if 'scale' in self.weights_dict[IR_node.name]:
                self.weights_dict[scale_layer_var_name][
                    'scale'] = self.weights_dict[IR_node.name]['scale']
            else:
                self.weights_dict[scale_layer_var_name]['scale'] = 1

            self.weights_dict[IR_node.name]['scale'] = 1

            if 'bias' in self.weights_dict[IR_node.name]:
                self.weights_dict[scale_layer_var_name][
                    'bias'] = self.weights_dict[IR_node.name]['bias']
                self.weights_dict[IR_node.name].pop('bias', None)
                # change the key "name" to "variable_name", in case of the layer name has invalid characters

            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(
                IR_node.name)

        IR_node.real_name = IR_node.name + "_scale"

    def emit_Scale(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   IR_node.get_attr('use_bias', False)))
        if self.weight_loaded:
            self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(
                IR_node.name)

    def emit_Constant(self, IR_node):
        if IR_node.get_attr('value'):
            value = IR_node.get_attr('value')
        else:
            value = self.weights_dict[IR_node.name]['value'][0]
        IR_node_after = self.IR_graph.get_son(IR_node.name, [0])
        shape = IR_node_after.get_attr("_output_shapes")[0]
        shape = shape_to_list(shape)
        if len(shape) == 4:
            shape[1], shape[3] = shape[3], shape[1]
            shape[0] = 1
        shape = list(map(lambda x: str(x), shape))

        self.add_body(
            1,
            "n.{:<15} = L.DummyData(shape=[dict(dim=[{}])], data_filler=dict(type='constant', value={}), ntop=1)"
            .format(IR_node.variable_name, ', '.join(shape), value))

    def emit_LRN(self, IR_node):
        self.add_body(
            1,
            "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'),
                   IR_node.get_attr('beta'), IR_node.get_attr('k')))

    def emit_Add(self, IR_node):
        input_layers = ', '.join(
            ('n.' +
             self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name)
            for num in range(0, len(IR_node.in_edges)))
        self.add_body(
            1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format(
                IR_node.variable_name,
                input_layers,
            ))

    def emit_Flatten(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.Flatten(n.{})".format(
                IR_node.variable_name,
                self.parent_variable_name(IR_node),
            ))

    def emit_Squeeze(self, IR_node):
        shape = IR_node.get_attr("_output_shapes")[0]
        shape = shape_to_list(shape)
        if shape:
            dim_str = "'dim': {}".format(shape)
            dim_str = " reshape_param={'shape': { " + dim_str + '} }'
            self.add_body(
                1, "n.{:<15} = L.Reshape(n.{}, {})".format(
                    IR_node.variable_name, self.parent_variable_name(IR_node),
                    dim_str))
        else:
            IR_node.real_name = self.IR_graph.get_parent(IR_node.name,
                                                         [0]).real_name

    def emit_Concat(self, IR_node):
        axis_array = (2, 3, 1, 0)
        axis = axis_array.index(IR_node.get_attr('axis'))
        input_layers = ', '.join(
            ('n.' + self.IR_graph.get_node(edge).real_variable_name)
            for edge in IR_node.in_edges)
        self.add_body(
            1,
            "n.{:<15} = L.Concat({}, axis={})".format(IR_node.variable_name,
                                                      input_layers, axis))

    def emit_Sigmoid(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.Sigmoid(n.{}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    def emit_Relu(self, IR_node):
        in_place = True
        self.add_body(
            1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                in_place))

    def emit_Elu(self, IR_node):
        in_place = True
        self.add_body(
            1, "n.{:<15} = L.ELU(n.{}, in_place={}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                in_place))

    def emit_LeakyRelu(self, IR_node):
        in_place = True
        self.add_body(
            1,
            "n.{:<15} = L.ReLU(n.{}, in_place={}, negative_slope={}, ntop=1)".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   in_place, IR_node.IR_layer.attr['alpha'].f))

    def emit_PRelu(self, IR_node):
        in_place = True
        self.add_body(
            1, "n.{:<15} = L.PReLU(n.{}, in_place={}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                in_place))

    def emit_Tanh(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.TanH(n.{}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    def emit_Softmax(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    def emit_Pad(self, IR_node):
        IR_node.real_name = self.IR_graph.get_parent(IR_node.name,
                                                     [0]).real_name

    def reduction(self, IR_node, op, axes):
        # Convert NHWC (IR) to NCHW (Caffe): [0,1,2,3]->[0,3,1,2]
        if len(axes) == 1:
            assert (axes[0] == 2)
        elif len(axes) == 2:
            assert ((axes[0] == 1) and (axes[1] == 2))

        self.add_body(
            1, "n.{:<15} = L.Reduction(n.{}, operation={} , axis={} ,ntop=1)".
            format(IR_node.variable_name, self.parent_variable_name(IR_node),
                   op, len(axes)))

        if IR_node.get_attr('keepdims') == True:
            shape = IR_node.get_attr("_output_shapes")[0]
            shape = shape_to_list(shape)
            shape = [1] + [shape[-1]] + shape[1:-1]
            dim_str = "'dim': {}".format(shape)
            dim_str = "{'shape': { " + dim_str + '} }'
            self.add_body(
                1, "n.{:<15} = L.Reshape(n.{}, reshape_param={}) ".format(
                    IR_node.variable_name + "_reshape",
                    IR_node.real_variable_name, dim_str))
            IR_node.real_name = IR_node.real_name + '_reshape'

    def emit_ReduceMean(self, IR_node):
        self.reduction(IR_node, 4, IR_node.get_attr('axes'))

    def emit_ReduceSum(self, IR_node):
        self.reduction(IR_node, 1, IR_node.get_attr('axes'))

    def emit_Relu6(self, IR_node):
        self.emit_Relu(IR_node)

    def emit_DepthwiseConv(self, IR_node):
        self.emit_Conv(IR_node)

    def emit_Const(self, IR_node):
        pass

    def emit_Shape(self, IR_node):
        pass

    def emit_Reshape(self, IR_node):
        shape = IR_node.get_attr("_output_shapes")[0]
        shape = shape_to_list(shape)
        if shape:
            dim_str = "'dim': {}".format(shape)
            dim_str = " reshape_param={'shape': { " + dim_str + '} }'
            self.add_body(
                1, "n.{:<15} = L.Reshape(n.{}, {})".format(
                    IR_node.variable_name, self.parent_variable_name(IR_node),
                    dim_str))
        else:
            IR_node.real_name = self.IR_graph.get_parent(IR_node.name,
                                                         [0]).real_name

    def emit_Slice(self, IR_node):
        pass

    def emit_Pack(self, IR_node):
        pass

    def emit_Abs(self, IR_node):
        self.add_body(
            1, "n.{:<15} = L.AbsVal(n.{}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node)))

    def emit_Sub(self, IR_node):
        input_layers = ', '.join(
            ('n.' + self.IR_graph.get_node(edge).real_variable_name)
            for edge in IR_node.in_edges)
        self.add_body(
            1, "n.{:<15} = L.Eltwise({}, coeff = [1, -1], ntop=1)".format(
                IR_node.variable_name, input_layers))

    def emit_Mul(self, IR_node):
        if len(IR_node.in_edges) == 2:
            input_layers = ', '.join(
                ('n.' + self.IR_graph.get_node(edge).real_variable_name)
                for edge in IR_node.in_edges)
            self.add_body(
                1, "n.{:<15} = L.Eltwise({}, operation=0, ntop=1)".format(
                    IR_node.variable_name, input_layers))
        elif len(IR_node.in_edges) == 1:
            self.emit_Scale(IR_node)
        else:
            assert False

    def emit_UpSampling2D(self, IR_node):
        scales = IR_node.get_attr('scales')
        scale = tuple(scales)[0]

        shape = IR_node.get_attr('_output_shapes')[0]
        shape = shape_to_list(shape)

        self.add_body(
            1,
            "n.{:<15} = L.Deconvolution(n.{}, convolution_param=dict(kernel_size={}, stride={}, pad={}, num_output={}, group={}, bias_term={}), param=[dict(lr_mult=0)], ntop=1)"
            .format(IR_node.variable_name, IR_node.in_edges[0],
                    2 * scale - scale % 2, scale,
                    int(math.ceil(
                        (scale - 1) / 2)), shape[-1], shape[-1], False))

    # def emit_Square(self, IR_node):
    #     input_layers = ', '.join(('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges)
    #     self.add_body(1, "n.{:<15} = L.Square({}, ntop=1)".format(
    #         IR_node.variable_name,
    #         input_layers))

    def emit_Elu(self, IR_node):
        in_place = True
        self.add_body(
            1, "n.{:<15} = L.ELU(n.{}, in_place={}, ntop=1)".format(
                IR_node.variable_name, self.parent_variable_name(IR_node),
                in_place))