Пример #1
0
    def __init__(self, params):
        Model.check_parameters(params, {'name': 'InceptionV3'})
        BaseInceptionModel.__init__(self, params)

        v = mx.sym.Variable(name="data")
        # Input conv modules
        v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(2, 2), pad=(0, 0))
        v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(1, 1), pad=(0, 0))
        v = self.conv('conv', v, num_filters=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1))
        v = mx.symbol.Pooling(name='pool1', data=v, pool_type="max", kernel=(3, 3), stride=(2, 2))
        v = self.conv('conv', v, num_filters=80, kernel=(1, 1), stride=(1, 1), pad=(0, 0))
        v = self.conv('conv', v, num_filters=192, kernel=(3, 3), stride=(1, 1), pad=(0, 0))
        v = mx.symbol.Pooling(name='pool2', data=v, pool_type="max", kernel=(3, 3), stride=(2, 2))
        # Three Type A inception modules
        for sz in (32, 64, 64):
            v = self.module_a(v, sz)
        # One Type B inception module
        v = self.module_b(v)
        # Four Type C inception modules
        for sz in (128, 160, 160, 192):
            v = self.module_c(v, sz)
        # One Type D inception module
        v = self.module_d(v)
        # Two Type E inception modules
        v = self.module_e(v, 'avg')
        v = self.module_e(v, 'max')
        # Final global pooling
        v = mx.symbol.Pooling(name='pool', data=v, pool_type="avg", kernel=(8, 8), stride=(1, 1))
        # And classifier
        self.__output = self.add_head_nodes(v)
Пример #2
0
    def __init__(self, params):
        specs = ResNet.specs[params['model']]
        Model.check_parameters(
            params, {
                'name': specs['name'],
                'input_shape': (3, 224, 224),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)
        # Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
        # Original author Wei Wu
        if specs['num_layers'] >= 50:
            filter_list = [64, 256, 512, 1024, 2048]
            bottle_neck = True
        else:
            filter_list = [64, 64, 128, 256, 512]
            bottle_neck = False

        self.__output = self.resnet(units=specs['units'],
                                    num_stages=4,
                                    filter_list=filter_list,
                                    bottle_neck=bottle_neck,
                                    workspace=256)
Пример #3
0
    def __init__(self, params):
        Model.check_parameters(params, {'name': 'InceptionV4'})
        BaseInceptionModel.__init__(self, params)

        v = self.add_data_node()
        # Input conv modules
        v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(2, 2), pad=(0, 0))
        v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(1, 1), pad=(0, 0))
        v = self.conv('conv', v, num_filters=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1))
        # Stem modules
        v = self.inception_v4_sa(v)
        v = self.inception_v4_sb(v)
        v = self.inception_v4_sc(v)
        # Four Type A modules
        for _ in range(4):
            v = self.inception_v4_a(v)
        # One Type A Reduction module
        v = self.inception_v4_ra(v, 192, 224, 256, 384)
        # Seven Type B modules
        for _ in range(7):
            v = self.inception_v4_b(v)
        # One Type B Reduction module
        v = self.inception_v4_rb(v)
        # Three Type C modules
        for _ in range(3):
            v = self.inception_v4_c(v)
        # Final global pooling
        v = mx.symbol.Pooling(name='pool', data=v, pool_type="avg", kernel=(8, 8), stride=(1, 1))
        if self.phase == 'training':
            v = mx.symbol.Dropout(name='drop', data=v, p=0.2)
        # And classifier
        self.__output = self.add_head_nodes(v)
Пример #4
0
 def __init__(self, params):
     Model.check_parameters(
         params,
         {'input_shape': (3, 299, 299), 'num_classes': 1000,
          'phase': 'training',
          'dtype': 'float32'}
     )
     Model.__init__(self, params)
     self.counts = defaultdict(lambda: 0)
Пример #5
0
    def __init__(self, params):
        Model.check_parameters(
            params,
            {'name': 'DeepMNIST', 'input_shape': (784, ), 'num_classes': 10,
             'phase': 'training',
             'dtype': 'float32'}
        )
        Model.__init__(self, params)

        v = self.add_data_node()

        for layer_size in [2500, 2000, 1500, 1000, 500]:
            v = mx.sym.FullyConnected(data=v, num_hidden=layer_size)
            v = mx.symbol.Activation(data=v, act_type="relu")

        self.__output = self.add_head_nodes(v)
Пример #6
0
    def __init__(self, params):
        Model.check_parameters(
            params,
            {'name': 'EngAcousticModel', 'input_shape':(540),
             'num_classes': 8192, 'phase': 'training',
             'dtype': 'float32'}
        )
        Model.__init__(self, params)

        v = self.add_data_node()

        for _ in range(5):
            v = mx.sym.FullyConnected(data=v, num_hidden=2048)
            v = mx.symbol.Activation(data=v, act_type="relu")

        self.__output = self.add_head_nodes(v)
Пример #7
0
    def __init__(self, params):
        specs = VGG.specs[params['model']]
        Model.check_parameters(
            params, {
                'name': specs['name'],
                'input_shape': (3, 224, 224),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)
        training = self.phase == 'training'

        v = self.add_data_node()

        layers, filters = specs['specs']
        for i, num in enumerate(layers):
            for j in range(num):
                v = mx.symbol.Convolution(name='conv%d_%d' % (i + 1, j + 1),
                                          data=v,
                                          kernel=(3, 3),
                                          pad=(1, 1),
                                          num_filter=filters[i])
                v = mx.symbol.Activation(name='relu%d_%d' % (i + 1, j + 1),
                                         data=v,
                                         act_type="relu")
            v = mx.sym.Pooling(name='pool%d' % (i + 1),
                               data=v,
                               pool_type="max",
                               kernel=(2, 2),
                               stride=(2, 2))

        v = mx.sym.Flatten(name='flatten', data=v)

        for i in range(2):
            v = mx.sym.FullyConnected(name='fc%d' % (6 + i),
                                      data=v,
                                      num_hidden=4096)
            v = mx.symbol.Activation(name='relu%d' % (6 + i),
                                     data=v,
                                     act_type="relu")
            v = mx.symbol.Dropout(name='drop%d' %
                                  (6 + i), data=v, p=0.5) if training else v

        self.__output = self.add_head_nodes(v)
Пример #8
0
    def __init__(self, params):
        Model.check_parameters(
            params,
            {'name': 'AlexNet', 'input_shape':(3, 227, 227), 'num_classes': 1000,
             'phase': 'training',
             'dtype': 'float32'}
        )
        Model.__init__(self, params)
        if self.dtype == 'float16':
            print("[WARNING] MxNet does not provide half precision kernel for LRN layer. It will be disabled. "\
                  "Thus, comparison with single precision version or other frameworks will not be totally fair.")

        training = self.phase == 'training'
        data = self.add_data_node()

        conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=96)
        relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu')
        norm1 = self.maybe_lrn(relu1, 'norm1')
        pool1 = mx.symbol.Pooling(name='pool1', data=norm1, pool_type="max", kernel=(3, 3), stride=(2, 2))

        conv2 = mx.symbol.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=1)
        relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu")
        norm2 = self.maybe_lrn(relu2, 'norm2')
        pool2 = mx.symbol.Pooling(name='pool2', data=norm2, kernel=(3, 3), stride=(2, 2), pool_type="max")

        conv3 = mx.symbol.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384)
        relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu")

        conv4 = mx.symbol.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1)
        relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu")

        conv5 = mx.symbol.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=1)
        relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu")
        pool5 = mx.symbol.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max")

        flatten = mx.symbol.Flatten(data=pool5)
        fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096)
        relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu")
        drop6 = mx.symbol.Dropout(name='drop6', data=relu6, p=0.5) if training else relu6

        fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096)
        relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu")
        drop7 = mx.symbol.Dropout(name='drop7', data=relu7, p=0.5) if training else relu7

        self.__output = self.add_head_nodes(drop7)
Пример #9
0
    def __init__(self, params):
        Model.check_parameters(
            params,
            {'name': 'AlexNet', 'input_shape':(3, 227, 227), 'num_classes': 1000,
             'phase': 'training',
             'dtype': 'float32'}
        )
        Model.__init__(self, params)

        training = self.phase == 'training'
        data = self.add_data_node()

        conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=96)
        relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu')
        norm1 = self.maybe_lrn(relu1, 'norm1')
        pool1 = mx.symbol.Pooling(name='pool1', data=norm1, pool_type="max", kernel=(3, 3), stride=(2, 2))

        conv2 = mx.symbol.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=1)
        relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu")
        norm2 = self.maybe_lrn(relu2, 'norm2')
        pool2 = mx.symbol.Pooling(name='pool2', data=norm2, kernel=(3, 3), stride=(2, 2), pool_type="max")

        conv3 = mx.symbol.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384)
        relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu")

        conv4 = mx.symbol.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1)
        relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu")

        conv5 = mx.symbol.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=1)
        relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu")
        pool5 = mx.symbol.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max")

        flatten = mx.symbol.Flatten(data=pool5)
        fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096)
        relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu")
        drop6 = mx.symbol.Dropout(name='drop6', data=relu6, p=0.5) if training else relu6

        fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096)
        relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu")
        drop7 = mx.symbol.Dropout(name='drop7', data=relu7, p=0.5) if training else relu7

        self.__output = self.add_head_nodes(drop7)
Пример #10
0
    def __init__(self, params):
        specs = ResNet.specs[params['model']]
        Model.check_parameters(
            params, {
                'name': specs['name'],
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32',
                'input_layout': 'NCHW',
                'model_layout': 'NCHW',
                'nvidia_layers': False,
                'workspace': 1024
            })
        params['input_shape'] = Model.conv_shape(3, (224, 224),
                                                 params['input_layout'])

        Model.__init__(self, params)

        self.params = params
        self.layers = Layers(params)

        # Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
        # Original author Wei Wu
        # Some optimizations are taken from NVIDIA code from NGC containers.
        if specs['num_layers'] >= 50:
            filter_list = [64, 256, 512, 1024, 2048]
            bottle_neck = True
        else:
            filter_list = [64, 64, 128, 256, 512]
            bottle_neck = False

        self.__output = self.resnet(units=specs['units'],
                                    num_stages=4,
                                    filter_list=filter_list,
                                    bottle_neck=bottle_neck,
                                    workspace=params['workspace'],
                                    fuse_bn_add_relu=params['nvidia_layers'],
                                    fuse_bn_relu=params['nvidia_layers'])
Пример #11
0
        def __worker(q, conv_arch, num_rnn_layers, rnn_layer_size,
                     bidirectional, rnn_type, brnn_output):
            # Uncomment the following two lines if you need MXNET's output
            sys.stdout = open(os.devnull, 'w')
            sys.stderr = open(os.devnull, 'w')
            os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
            device = mx.gpu(0)
            m = DeepSpeech2({
                'batch_size': 16,
                'model_opts': {
                    'conv_arch': conv_arch,
                    'num_rnn_layers': num_rnn_layers,
                    'rnn_layer_size': rnn_layer_size,
                    'bidirectional': bidirectional,
                    'rnn_type': rnn_type,
                    'brnn_output': brnn_output
                }
            })
            data_shape = (m.batch_size, ) + m.input_shape
            data = SyntheticDataIterator(m.num_classes,
                                         data_shape,
                                         max_iter=10,
                                         dtype=np.float32,
                                         label_shape=(m.batch_size,
                                                      m.output_length))
            mod = mx.mod.Module(symbol=m.output,
                                context=device,
                                label_names=['softmax_label'])
            mod.bind(data_shapes=data.provide_data,
                     label_shapes=data.provide_label,
                     for_training=True,
                     inputs_need_grad=False)
            mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
            mod.init_optimizer(kvstore='local',
                               optimizer='sgd',
                               optimizer_params=(('learning_rate', 0.01), ))

            batch = next(data)
            mod.forward_backward(batch)
            mod.update()
            mx.nd.waitall()

            q.put(Model.num_parameters(mod))
Пример #12
0
    def __init__(self, params):
        Model.check_parameters(
            params, {
                'name': 'Overfeat',
                'input_shape': (3, 231, 231),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)
        training = self.phase == 'training'

        data = self.add_data_node()
        # Layer1
        conv1 = mx.symbol.Convolution(name='conv1',
                                      data=data,
                                      kernel=(11, 11),
                                      stride=(4, 4),
                                      num_filter=96)
        relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu')
        pool1 = mx.symbol.Pooling(name='pool1',
                                  data=relu1,
                                  pool_type="max",
                                  kernel=(2, 2),
                                  stride=(2, 2))
        # Layer2
        conv2 = mx.symbol.Convolution(name='conv2',
                                      data=pool1,
                                      kernel=(5, 5),
                                      num_filter=256)
        relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu")
        pool2 = mx.symbol.Pooling(name='pool2',
                                  data=relu2,
                                  kernel=(2, 2),
                                  stride=(2, 2),
                                  pool_type="max")
        # Layer3
        conv3 = mx.symbol.Convolution(name='conv3',
                                      data=pool2,
                                      kernel=(3, 3),
                                      pad=(1, 1),
                                      num_filter=512)
        relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu")
        # Layer4
        conv4 = mx.symbol.Convolution(name='conv4',
                                      data=relu3,
                                      kernel=(3, 3),
                                      pad=(1, 1),
                                      num_filter=1024)
        relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu")
        # Layer5
        conv5 = mx.symbol.Convolution(name='conv5',
                                      data=relu4,
                                      kernel=(3, 3),
                                      pad=(1, 1),
                                      num_filter=1024)
        relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu")
        pool5 = mx.symbol.Pooling(name='pool5',
                                  data=relu5,
                                  kernel=(2, 2),
                                  stride=(2, 2),
                                  pool_type="max")
        # Layer6
        flatten = mx.symbol.Flatten(data=pool5)
        fc6 = mx.symbol.FullyConnected(name='fc6',
                                       data=flatten,
                                       num_hidden=3072)
        relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu")
        drop6 = mx.symbol.Dropout(name='drop6', data=relu6,
                                  p=0.5) if training else relu6
        # Layer7
        fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096)
        relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu")
        drop7 = mx.symbol.Dropout(name='drop7', data=relu7,
                                  p=0.5) if training else relu7

        self.__output = self.add_head_nodes(drop7)
Пример #13
0
    def __init__(self, params):
        # Common parameters for all models
        Model.check_parameters(
            params,
            {
                'name': 'DeepSpeech2',
                'input_shape': (1, 200, 161),
                'num_classes':
                29 * 29 + 1,  # Alphabet size + BLANK character (which is 0)
                'phase': 'training',
                'dtype': 'float32',
                'model_opts': {}
            })
        # Specific parameters for DeepSpeech2
        Model.check_parameters(
            params['model_opts'],
            {
                'conv_batch_norm': True,  # Use BatchNorm in Conv layers
                'conv_arch': '2-layer-2D-v2',  # Conv layers architecture
                'num_rnn_layers': 3,  # Number of RNN layers
                'rnn_layer_size': 2048,  # Hidden size of each RNN
                'bidirectional': True,  # Use bidirectional RNN
                'rnn_type': 'rnn_relu',  # One of RNN_TYPES
                'rnn_batch_norm':
                False,  # Use Batch norm in RNNs after i2h matrix. DOES NOT WORK NOW.
                'brnn_share_i2h':
                False,  # Share i2h weights in RNNs. DOES NOT WORK NOW.
                'brnn_output':
                'concat',  # Aggregate method for BRNN outputs. One of BRNN_OUTPUT
                'rnn_dropout': 0.0,  # Use dropout in RNNs. Value from [0, 1).
                'ctc_loss':
                'mxnet_ctc_loss'  # CTC Loss implementation, one of CTC_LOSSES
            })
        Model.__init__(self, params)
        self.__batch_size = params['batch_size']
        self.__output_length = 0  # [output] Length of output sequence
        self.__data_shape = (
            self.batch_size, ) + self.input_shape  # For debugging purposses
        self.__debug = logging.getLogger().isEnabledFor(
            logging.DEBUG) or os.environ.get('DLBS_DEBUG', '0') == '1'

        if self.model_opts['conv_arch'] not in DeepSpeech2.CONV_ARCHS:
            raise "Invalid conv arch ('%s'), must be one of '%s'" % \
                  (self.model_opts['conv_arch'], str(DeepSpeech2.CONV_ARCHS))
        if self.model_opts['rnn_type'] not in DeepSpeech2.RNN_TYPES:
            raise "Invalid RNN type ('%s'), must be one of '%s'" % \
                  (self.model_opts['rnn_type'], str(DeepSpeech2.RNN_TYPES))
        if self.model_opts['brnn_output'] not in DeepSpeech2.BRNN_OUTPUT:
            raise "Invalid BRNN output function ('%s'), must be one of '%s'" % \
                  (self.model_opts['brnn_output'], str(DeepSpeech2.BRNN_OUTPUT))
        if self.model_opts['ctc_loss'] not in DeepSpeech2.CTC_LOSSES:
            raise "Invalid ctc loss ('%s'), must be one of '%s'" % \
                  (self.model_opts['ctc_loss'], str(DeepSpeech2.CTC_LOSSES))
        if self.model_opts['rnn_batch_norm'] is True:
            self.model_opts['rnn_batch_norm'] = False
            print("[WARNING] Batch norm is not supported in RNNs.")
        if self.model_opts['brnn_share_i2h'] is True:
            self.model_opts['brnn_share_i2h'] = False
            print(
                "[WARNING] Sharing input2hidden weights in BRNNs is not supported."
            )

        print("Model options: " + str(self.model_opts))
        # This helps debugging shapes
        logging.debug("Batch size: %d", self.batch_size)
        logging.debug("Input length: %d", self.input_shape[1])
        logging.debug("Num input features: %d", self.input_shape[2])
        # Input data v is a spectrogram
        v = self.add_data_node()  # [Batch, 1, DatumLen, DatumFeatures]
        self.log_shape("Input shape: %s", v)
        # 1-3 layers of 1D or 2D convolutions
        v, length = self.add_conv_layers(v)  # [Batch, 1, CnnLen, CnnFeatures]
        # Add RNN layers
        v, nrnn_features = self.add_rnn_layers(
            v, length)  # [CnnLen, Batch, RnnFeatures]
        # Compute CTC loss
        v = mx.sym.Reshape(
            data=v, shape=(-1, nrnn_features))  # [CnnLen*Batch, RnnFeatures]
        self.log_shape("FC input shape: %s", v)
        v = mx.sym.FullyConnected(
            data=v,
            num_hidden=self.num_classes)  # [CnnLen*Batch, self.num_classes]
        self.log_shape("FC output shape: %s", v)
        if self.dtype == 'float16':
            print("Casting logits to np.float32")
            v = mx.sym.cast(data=v, dtype=np.float32)
        if self.phase == 'training':
            # [CnnLen, Batch, NumClasses(alphabet+1)]
            v_ctc = mx.sym.Reshape(data=v,
                                   shape=(length, self.batch_size,
                                          self.num_classes))
            labels = mx.sym.Variable(name="softmax_label",
                                     shape=(self.batch_size, length),
                                     init=mx.init.Zero())
            self.log_shape("CTC input shape: %s", v_ctc)
            if self.model_opts['ctc_loss'] == 'warp_ctc_loss':
                print("Using Baidu's Warp CTC Loss.")
                print("[WARNING] WarpCTC was not tested and may not work.")
                try:
                    v = mx.symbol.WarpCTC(data=v_ctc, label=labels)
                except AttributeError:
                    print(
                        "[ERROR] WarpCTC symbol is not available. Recompile MXNET with WarpCTC support."
                    )
                    raise
            else:
                print("Using CTCLoss from mx.symbol.contrib.")
                # data:  (sequence_length, batch_size, alphabet_size + 1)
                #        The 0th element of this vector is reserved for the special blank character.
                # label: (batch_size, label_sequence_length)
                #        Is a tensor of integers between 1 and alphabet_size.
                # out:   (batch_size)
                #        Is a list of CTC loss values, one per example in the batch.
                ctc_loss = mx.sym.MakeLoss(
                    mx.symbol.contrib.CTCLoss(data=v_ctc,
                                              label=labels,
                                              name='ctc'))
                predictions = mx.sym.MakeLoss(
                    mx.sym.SoftmaxActivation(data=v, name='softmax'))
                v = mx.sym.Group([mx.sym.BlockGrad(predictions), ctc_loss])
        else:
            v = mx.symbol.softmax(data=v, name='softmax')
        self.log_shape("Output shape: %s", v)

        self.__output = v
        self.__output_length = length  # We have this many labels per input sequence.
        self._labels_shape = (self.__output_length,
                              )  # K labels for every batch
        self._labels_range = (
            1, self.num_classes
        )  # The class '0' is reserved for BLANK character.

        self.__ctc_metrics = CtcMetrics(seq_len=self.__output_length)
        self._eval_metric = mx.metric.CustomMetric(
            feval=self.__ctc_metrics.accuracy,
            name='ctc_metric',
            allow_extra_outputs=True)
Пример #14
0
    def __init__(self, params):
        """ Naming and topology according to: http://ethereon.github.io/netscope/#/gist/f2e4825a8d4f8a3609cefd7ffadc910a
            Based on: https://github.com/dmlc/mxnet/blob/master/example/image-classification/symbols/alexnet.py
        """
        Model.check_parameters(
            params, {
                'name': 'GoogleNet',
                'input_shape': (3, 224, 224),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)
        training = self.phase == 'training'

        data = self.add_data_node()

        conv1 = ConvFactory(data,
                            64,
                            kernel=(7, 7),
                            stride=(2, 2),
                            pad=(3, 3),
                            name="conv1/7x7_s2")
        pool1 = mx.sym.Pooling(conv1,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name="pool1/3x3_s2")
        norm1 = mx.symbol.LRN(data=pool1,
                              alpha=0.0001,
                              beta=0.75,
                              knorm=2,
                              nsize=5,
                              name='pool1/norm1')

        conv2_reduce = ConvFactory(norm1,
                                   64,
                                   kernel=(1, 1),
                                   stride=(1, 1),
                                   name="conv2/3x3_reduce")

        conv2 = ConvFactory(conv2_reduce,
                            192,
                            kernel=(3, 3),
                            stride=(1, 1),
                            pad=(1, 1),
                            name="conv2/3x3")
        norm2 = mx.symbol.LRN(data=conv2,
                              alpha=0.0001,
                              beta=0.75,
                              knorm=2,
                              nsize=5,
                              name='conv2/norm2')
        pool2 = mx.sym.Pooling(norm2,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name='pool2/3x3_s2')

        in3a = InceptionFactory(pool2,
                                64,
                                96,
                                128,
                                16,
                                32,
                                "max",
                                32,
                                name="inception_3a")
        in3b = InceptionFactory(in3a,
                                128,
                                128,
                                192,
                                32,
                                96,
                                "max",
                                64,
                                name="inception_3b")
        pool3 = mx.sym.Pooling(in3b,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name='pool3/3x3_s2')

        in4a = InceptionFactory(pool3,
                                192,
                                96,
                                208,
                                16,
                                48,
                                "max",
                                64,
                                name="inception_4a")
        in4b = InceptionFactory(in4a,
                                160,
                                112,
                                224,
                                24,
                                64,
                                "max",
                                64,
                                name="inception_4b")
        in4c = InceptionFactory(in4b,
                                128,
                                128,
                                256,
                                24,
                                64,
                                "max",
                                64,
                                name="inception_4c")
        in4d = InceptionFactory(in4c,
                                112,
                                144,
                                288,
                                32,
                                64,
                                "max",
                                64,
                                name="inception_4d")
        in4e = InceptionFactory(in4d,
                                256,
                                160,
                                320,
                                32,
                                128,
                                "max",
                                128,
                                name="inception_4e")
        pool4 = mx.sym.Pooling(in4e,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pad=(1, 1),
                               pool_type="max",
                               name='pool4/3x3_s2')

        in5a = InceptionFactory(pool4,
                                256,
                                160,
                                320,
                                32,
                                128,
                                "max",
                                128,
                                name="inception_5a")
        in5b = InceptionFactory(in5a,
                                384,
                                192,
                                384,
                                48,
                                128,
                                "max",
                                128,
                                name="inception_5b")
        pool5 = mx.sym.Pooling(in5b,
                               kernel=(7, 7),
                               stride=(1, 1),
                               pool_type="avg",
                               name='pool5/7x7_s1')
        flatten5 = mx.sym.Flatten(data=pool5)
        drop5 = mx.symbol.Dropout(
            data=flatten5, p=0.5,
            name='pool5/drop_7x7_s1') if training else flatten5

        self.__output = self.add_head_nodes(drop5)
Пример #15
0
    def __init__(self, params):
        """ Naming and topology according to: http://ethereon.github.io/netscope/#/gist/f2e4825a8d4f8a3609cefd7ffadc910a
            Based on: https://github.com/dmlc/mxnet/blob/master/example/image-classification/symbols/alexnet.py
        """
        Model.check_parameters(
            params, {
                'name': 'GoogleNet',
                'input_shape': (3, 224, 224),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)

        if self.dtype == 'float16':
            print("[WARNING] MxNet does not provide half precision kernel for LRN layer. It will be disabled. "\
                  "Thus, comparison with single precision version or other frameworks will not be totally fair.")

        training = self.phase == 'training'
        data = self.add_data_node()

        conv1 = ConvFactory(data,
                            64,
                            kernel=(7, 7),
                            stride=(2, 2),
                            pad=(3, 3),
                            name="conv1/7x7_s2")
        pool1 = mx.sym.Pooling(conv1,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name="pool1/3x3_s2")
        norm1 = self.maybe_lrn(pool1, 'pool1/norm1')

        conv2_reduce = ConvFactory(norm1,
                                   64,
                                   kernel=(1, 1),
                                   stride=(1, 1),
                                   name="conv2/3x3_reduce")

        conv2 = ConvFactory(conv2_reduce,
                            192,
                            kernel=(3, 3),
                            stride=(1, 1),
                            pad=(1, 1),
                            name="conv2/3x3")
        norm2 = self.maybe_lrn(conv2, 'conv2/norm2')
        pool2 = mx.sym.Pooling(norm2,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name='pool2/3x3_s2')

        in3a = InceptionFactory(pool2,
                                64,
                                96,
                                128,
                                16,
                                32,
                                "max",
                                32,
                                name="inception_3a")
        in3b = InceptionFactory(in3a,
                                128,
                                128,
                                192,
                                32,
                                96,
                                "max",
                                64,
                                name="inception_3b")
        pool3 = mx.sym.Pooling(in3b,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max",
                               name='pool3/3x3_s2')

        in4a = InceptionFactory(pool3,
                                192,
                                96,
                                208,
                                16,
                                48,
                                "max",
                                64,
                                name="inception_4a")
        in4b = InceptionFactory(in4a,
                                160,
                                112,
                                224,
                                24,
                                64,
                                "max",
                                64,
                                name="inception_4b")
        in4c = InceptionFactory(in4b,
                                128,
                                128,
                                256,
                                24,
                                64,
                                "max",
                                64,
                                name="inception_4c")
        in4d = InceptionFactory(in4c,
                                112,
                                144,
                                288,
                                32,
                                64,
                                "max",
                                64,
                                name="inception_4d")
        in4e = InceptionFactory(in4d,
                                256,
                                160,
                                320,
                                32,
                                128,
                                "max",
                                128,
                                name="inception_4e")
        pool4 = mx.sym.Pooling(in4e,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pad=(1, 1),
                               pool_type="max",
                               name='pool4/3x3_s2')

        in5a = InceptionFactory(pool4,
                                256,
                                160,
                                320,
                                32,
                                128,
                                "max",
                                128,
                                name="inception_5a")
        in5b = InceptionFactory(in5a,
                                384,
                                192,
                                384,
                                48,
                                128,
                                "max",
                                128,
                                name="inception_5b")
        pool5 = mx.sym.Pooling(in5b,
                               kernel=(7, 7),
                               stride=(1, 1),
                               pool_type="avg",
                               name='pool5/7x7_s1')
        flatten5 = mx.sym.Flatten(data=pool5)
        drop5 = mx.symbol.Dropout(
            data=flatten5, p=0.5,
            name='pool5/drop_7x7_s1') if training else flatten5

        self.__output = self.add_head_nodes(drop5)
Пример #16
0
    def __init__(self, params):
        Model.check_parameters(
            params, {
                'name': 'AlexNetOWT',
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32',
                'input_layout': 'NCHW',
                'model_layout': 'NCHW',
                'nvidia_layers': False
            })
        params['input_shape'] = Model.conv_shape(3, (227, 227),
                                                 params['input_layout'])
        Model.__init__(self, params)

        layers = Layers(params)

        data = self.add_data_node()
        data = Layers.conv_transform_layout(data, params['input_layout'],
                                            params['model_layout'])

        conv1 = layers.Convolution(name='conv1',
                                   data=data,
                                   kernel=(11, 11),
                                   stride=(4, 4),
                                   num_filter=64)
        relu1 = layers.Activation(name='relu1', data=conv1, act_type='relu')
        pool1 = layers.Pooling(name='pool1',
                               data=relu1,
                               pool_type="max",
                               kernel=(3, 3),
                               stride=(2, 2))

        conv2 = layers.Convolution(name='conv2',
                                   data=pool1,
                                   kernel=(5, 5),
                                   pad=(2, 2),
                                   num_filter=192)
        relu2 = layers.Activation(name='relu2', data=conv2, act_type="relu")
        pool2 = layers.Pooling(name='pool2',
                               data=relu2,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max")

        conv3 = layers.Convolution(name='conv3',
                                   data=pool2,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   num_filter=384)
        relu3 = layers.Activation(name='relu3', data=conv3, act_type="relu")

        conv4 = layers.Convolution(name='conv4',
                                   data=relu3,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   num_filter=256)
        relu4 = layers.Activation(name='relu4', data=conv4, act_type="relu")

        conv5 = layers.Convolution(name='conv5',
                                   data=relu4,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   num_filter=256)
        relu5 = layers.Activation(name='relu5', data=conv5, act_type="relu")
        pool5 = layers.Pooling(name='pool5',
                               data=relu5,
                               kernel=(3, 3),
                               stride=(2, 2),
                               pool_type="max")

        flatten = mx.symbol.Flatten(data=pool5)
        fc6 = mx.symbol.FullyConnected(name='fc6',
                                       data=flatten,
                                       num_hidden=4096)
        relu6 = layers.Activation(name='relu6', data=fc6, act_type="relu")
        drop6 = layers.Dropout(name='drop6', data=relu6, p=0.5)

        fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096)
        relu7 = layers.Activation(name='relu7', data=fc7, act_type="relu")
        drop7 = layers.Dropout(name='drop7', data=relu7, p=0.5)

        self.__output = self.add_head_nodes(drop7)
    def __init__(self, params):
        """ """
        Model.check_parameters(
            params, {
                'name': 'InceptionResNetV2',
                'input_shape': (3, 299, 299),
                'num_classes': 1000,
                'phase': 'training',
                'dtype': 'float32'
            })
        Model.__init__(self, params)

        data = self.add_data_node()
        conv1a_3_3 = ConvFactory(data=data,
                                 num_filter=32,
                                 kernel=(3, 3),
                                 stride=(2, 2))
        conv2a_3_3 = ConvFactory(conv1a_3_3, 32, (3, 3))
        conv2b_3_3 = ConvFactory(conv2a_3_3, 64, (3, 3), pad=(1, 1))
        maxpool3a_3_3 = mx.symbol.Pooling(data=conv2b_3_3,
                                          kernel=(3, 3),
                                          stride=(2, 2),
                                          pool_type='max')

        conv3b_1_1 = ConvFactory(maxpool3a_3_3, 80, (1, 1))
        conv4a_3_3 = ConvFactory(conv3b_1_1, 192, (3, 3))
        maxpool5a_3_3 = mx.symbol.Pooling(data=conv4a_3_3,
                                          kernel=(3, 3),
                                          stride=(2, 2),
                                          pool_type='max')

        tower_conv = ConvFactory(maxpool5a_3_3, 96, (1, 1))
        tower_conv1_0 = ConvFactory(maxpool5a_3_3, 48, (1, 1))
        tower_conv1_1 = ConvFactory(tower_conv1_0, 64, (5, 5), pad=(2, 2))

        tower_conv2_0 = ConvFactory(maxpool5a_3_3, 64, (1, 1))
        tower_conv2_1 = ConvFactory(tower_conv2_0, 96, (3, 3), pad=(1, 1))
        tower_conv2_2 = ConvFactory(tower_conv2_1, 96, (3, 3), pad=(1, 1))

        tower_pool3_0 = mx.symbol.Pooling(data=maxpool5a_3_3,
                                          kernel=(3, 3),
                                          stride=(1, 1),
                                          pad=(1, 1),
                                          pool_type='avg')
        tower_conv3_1 = ConvFactory(tower_pool3_0, 64, (1, 1))
        tower_5b_out = mx.symbol.Concat(
            *[tower_conv, tower_conv1_1, tower_conv2_2, tower_conv3_1])
        net = repeat(tower_5b_out,
                     10,
                     block35,
                     scale=0.17,
                     input_num_channels=320)
        tower_conv = ConvFactory(net, 384, (3, 3), stride=(2, 2))
        tower_conv1_0 = ConvFactory(net, 256, (1, 1))
        tower_conv1_1 = ConvFactory(tower_conv1_0, 256, (3, 3), pad=(1, 1))
        tower_conv1_2 = ConvFactory(tower_conv1_1, 384, (3, 3), stride=(2, 2))
        tower_pool = mx.symbol.Pooling(net,
                                       kernel=(3, 3),
                                       stride=(2, 2),
                                       pool_type='max')
        net = mx.symbol.Concat(*[tower_conv, tower_conv1_2, tower_pool])
        net = repeat(net, 20, block17, scale=0.1, input_num_channels=1088)
        tower_conv = ConvFactory(net, 256, (1, 1))
        tower_conv0_1 = ConvFactory(tower_conv, 384, (3, 3), stride=(2, 2))
        tower_conv1 = ConvFactory(net, 256, (1, 1))
        tower_conv1_1 = ConvFactory(tower_conv1, 288, (3, 3), stride=(2, 2))
        tower_conv2 = ConvFactory(net, 256, (1, 1))
        tower_conv2_1 = ConvFactory(tower_conv2, 288, (3, 3), pad=(1, 1))
        tower_conv2_2 = ConvFactory(tower_conv2_1, 320, (3, 3), stride=(2, 2))
        tower_pool = mx.symbol.Pooling(net,
                                       kernel=(3, 3),
                                       stride=(2, 2),
                                       pool_type='max')
        net = mx.symbol.Concat(
            *[tower_conv0_1, tower_conv1_1, tower_conv2_2, tower_pool])

        net = repeat(net, 9, block8, scale=0.2, input_num_channels=2080)
        net = block8(net, with_act=False, input_num_channels=2080)

        net = ConvFactory(net, 1536, (1, 1))
        net = mx.symbol.Pooling(net,
                                kernel=(1, 1),
                                global_pool=True,
                                stride=(2, 2),
                                pool_type='avg')
        net = mx.symbol.Flatten(net)
        if self.phase == 'training':
            net = mx.symbol.Dropout(data=net, p=0.2)
        self.__output = self.add_head_nodes(net)