def __init__(self, params): Model.check_parameters(params, {'name': 'InceptionV3'}) BaseInceptionModel.__init__(self, params) v = mx.sym.Variable(name="data") # Input conv modules v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(2, 2), pad=(0, 0)) v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(1, 1), pad=(0, 0)) v = self.conv('conv', v, num_filters=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1)) v = mx.symbol.Pooling(name='pool1', data=v, pool_type="max", kernel=(3, 3), stride=(2, 2)) v = self.conv('conv', v, num_filters=80, kernel=(1, 1), stride=(1, 1), pad=(0, 0)) v = self.conv('conv', v, num_filters=192, kernel=(3, 3), stride=(1, 1), pad=(0, 0)) v = mx.symbol.Pooling(name='pool2', data=v, pool_type="max", kernel=(3, 3), stride=(2, 2)) # Three Type A inception modules for sz in (32, 64, 64): v = self.module_a(v, sz) # One Type B inception module v = self.module_b(v) # Four Type C inception modules for sz in (128, 160, 160, 192): v = self.module_c(v, sz) # One Type D inception module v = self.module_d(v) # Two Type E inception modules v = self.module_e(v, 'avg') v = self.module_e(v, 'max') # Final global pooling v = mx.symbol.Pooling(name='pool', data=v, pool_type="avg", kernel=(8, 8), stride=(1, 1)) # And classifier self.__output = self.add_head_nodes(v)
def __init__(self, params): specs = ResNet.specs[params['model']] Model.check_parameters( params, { 'name': specs['name'], 'input_shape': (3, 224, 224), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) # Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py # Original author Wei Wu if specs['num_layers'] >= 50: filter_list = [64, 256, 512, 1024, 2048] bottle_neck = True else: filter_list = [64, 64, 128, 256, 512] bottle_neck = False self.__output = self.resnet(units=specs['units'], num_stages=4, filter_list=filter_list, bottle_neck=bottle_neck, workspace=256)
def __init__(self, params): Model.check_parameters(params, {'name': 'InceptionV4'}) BaseInceptionModel.__init__(self, params) v = self.add_data_node() # Input conv modules v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(2, 2), pad=(0, 0)) v = self.conv('conv', v, num_filters=32, kernel=(3, 3), stride=(1, 1), pad=(0, 0)) v = self.conv('conv', v, num_filters=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1)) # Stem modules v = self.inception_v4_sa(v) v = self.inception_v4_sb(v) v = self.inception_v4_sc(v) # Four Type A modules for _ in range(4): v = self.inception_v4_a(v) # One Type A Reduction module v = self.inception_v4_ra(v, 192, 224, 256, 384) # Seven Type B modules for _ in range(7): v = self.inception_v4_b(v) # One Type B Reduction module v = self.inception_v4_rb(v) # Three Type C modules for _ in range(3): v = self.inception_v4_c(v) # Final global pooling v = mx.symbol.Pooling(name='pool', data=v, pool_type="avg", kernel=(8, 8), stride=(1, 1)) if self.phase == 'training': v = mx.symbol.Dropout(name='drop', data=v, p=0.2) # And classifier self.__output = self.add_head_nodes(v)
def __init__(self, params): Model.check_parameters( params, {'input_shape': (3, 299, 299), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32'} ) Model.__init__(self, params) self.counts = defaultdict(lambda: 0)
def __init__(self, params): Model.check_parameters( params, {'name': 'DeepMNIST', 'input_shape': (784, ), 'num_classes': 10, 'phase': 'training', 'dtype': 'float32'} ) Model.__init__(self, params) v = self.add_data_node() for layer_size in [2500, 2000, 1500, 1000, 500]: v = mx.sym.FullyConnected(data=v, num_hidden=layer_size) v = mx.symbol.Activation(data=v, act_type="relu") self.__output = self.add_head_nodes(v)
def __init__(self, params): Model.check_parameters( params, {'name': 'EngAcousticModel', 'input_shape':(540), 'num_classes': 8192, 'phase': 'training', 'dtype': 'float32'} ) Model.__init__(self, params) v = self.add_data_node() for _ in range(5): v = mx.sym.FullyConnected(data=v, num_hidden=2048) v = mx.symbol.Activation(data=v, act_type="relu") self.__output = self.add_head_nodes(v)
def __init__(self, params): specs = VGG.specs[params['model']] Model.check_parameters( params, { 'name': specs['name'], 'input_shape': (3, 224, 224), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) training = self.phase == 'training' v = self.add_data_node() layers, filters = specs['specs'] for i, num in enumerate(layers): for j in range(num): v = mx.symbol.Convolution(name='conv%d_%d' % (i + 1, j + 1), data=v, kernel=(3, 3), pad=(1, 1), num_filter=filters[i]) v = mx.symbol.Activation(name='relu%d_%d' % (i + 1, j + 1), data=v, act_type="relu") v = mx.sym.Pooling(name='pool%d' % (i + 1), data=v, pool_type="max", kernel=(2, 2), stride=(2, 2)) v = mx.sym.Flatten(name='flatten', data=v) for i in range(2): v = mx.sym.FullyConnected(name='fc%d' % (6 + i), data=v, num_hidden=4096) v = mx.symbol.Activation(name='relu%d' % (6 + i), data=v, act_type="relu") v = mx.symbol.Dropout(name='drop%d' % (6 + i), data=v, p=0.5) if training else v self.__output = self.add_head_nodes(v)
def __init__(self, params): Model.check_parameters( params, {'name': 'AlexNet', 'input_shape':(3, 227, 227), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32'} ) Model.__init__(self, params) if self.dtype == 'float16': print("[WARNING] MxNet does not provide half precision kernel for LRN layer. It will be disabled. "\ "Thus, comparison with single precision version or other frameworks will not be totally fair.") training = self.phase == 'training' data = self.add_data_node() conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=96) relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu') norm1 = self.maybe_lrn(relu1, 'norm1') pool1 = mx.symbol.Pooling(name='pool1', data=norm1, pool_type="max", kernel=(3, 3), stride=(2, 2)) conv2 = mx.symbol.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=1) relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu") norm2 = self.maybe_lrn(relu2, 'norm2') pool2 = mx.symbol.Pooling(name='pool2', data=norm2, kernel=(3, 3), stride=(2, 2), pool_type="max") conv3 = mx.symbol.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384) relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu") conv4 = mx.symbol.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1) relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu") conv5 = mx.symbol.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=1) relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu") pool5 = mx.symbol.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max") flatten = mx.symbol.Flatten(data=pool5) fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096) relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu") drop6 = mx.symbol.Dropout(name='drop6', data=relu6, p=0.5) if training else relu6 fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096) relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu") drop7 = mx.symbol.Dropout(name='drop7', data=relu7, p=0.5) if training else relu7 self.__output = self.add_head_nodes(drop7)
def __init__(self, params): Model.check_parameters( params, {'name': 'AlexNet', 'input_shape':(3, 227, 227), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32'} ) Model.__init__(self, params) training = self.phase == 'training' data = self.add_data_node() conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=96) relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu') norm1 = self.maybe_lrn(relu1, 'norm1') pool1 = mx.symbol.Pooling(name='pool1', data=norm1, pool_type="max", kernel=(3, 3), stride=(2, 2)) conv2 = mx.symbol.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=1) relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu") norm2 = self.maybe_lrn(relu2, 'norm2') pool2 = mx.symbol.Pooling(name='pool2', data=norm2, kernel=(3, 3), stride=(2, 2), pool_type="max") conv3 = mx.symbol.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384) relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu") conv4 = mx.symbol.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1) relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu") conv5 = mx.symbol.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=1) relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu") pool5 = mx.symbol.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max") flatten = mx.symbol.Flatten(data=pool5) fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096) relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu") drop6 = mx.symbol.Dropout(name='drop6', data=relu6, p=0.5) if training else relu6 fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096) relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu") drop7 = mx.symbol.Dropout(name='drop7', data=relu7, p=0.5) if training else relu7 self.__output = self.add_head_nodes(drop7)
def __init__(self, params): specs = ResNet.specs[params['model']] Model.check_parameters( params, { 'name': specs['name'], 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32', 'input_layout': 'NCHW', 'model_layout': 'NCHW', 'nvidia_layers': False, 'workspace': 1024 }) params['input_shape'] = Model.conv_shape(3, (224, 224), params['input_layout']) Model.__init__(self, params) self.params = params self.layers = Layers(params) # Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py # Original author Wei Wu # Some optimizations are taken from NVIDIA code from NGC containers. if specs['num_layers'] >= 50: filter_list = [64, 256, 512, 1024, 2048] bottle_neck = True else: filter_list = [64, 64, 128, 256, 512] bottle_neck = False self.__output = self.resnet(units=specs['units'], num_stages=4, filter_list=filter_list, bottle_neck=bottle_neck, workspace=params['workspace'], fuse_bn_add_relu=params['nvidia_layers'], fuse_bn_relu=params['nvidia_layers'])
def __worker(q, conv_arch, num_rnn_layers, rnn_layer_size, bidirectional, rnn_type, brnn_output): # Uncomment the following two lines if you need MXNET's output sys.stdout = open(os.devnull, 'w') sys.stderr = open(os.devnull, 'w') os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' device = mx.gpu(0) m = DeepSpeech2({ 'batch_size': 16, 'model_opts': { 'conv_arch': conv_arch, 'num_rnn_layers': num_rnn_layers, 'rnn_layer_size': rnn_layer_size, 'bidirectional': bidirectional, 'rnn_type': rnn_type, 'brnn_output': brnn_output } }) data_shape = (m.batch_size, ) + m.input_shape data = SyntheticDataIterator(m.num_classes, data_shape, max_iter=10, dtype=np.float32, label_shape=(m.batch_size, m.output_length)) mod = mx.mod.Module(symbol=m.output, context=device, label_names=['softmax_label']) mod.bind(data_shapes=data.provide_data, label_shapes=data.provide_label, for_training=True, inputs_need_grad=False) mod.init_params(initializer=mx.init.Xavier(magnitude=2.)) mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.01), )) batch = next(data) mod.forward_backward(batch) mod.update() mx.nd.waitall() q.put(Model.num_parameters(mod))
def __init__(self, params): Model.check_parameters( params, { 'name': 'Overfeat', 'input_shape': (3, 231, 231), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) training = self.phase == 'training' data = self.add_data_node() # Layer1 conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=96) relu1 = mx.symbol.Activation(name='relu1', data=conv1, act_type='relu') pool1 = mx.symbol.Pooling(name='pool1', data=relu1, pool_type="max", kernel=(2, 2), stride=(2, 2)) # Layer2 conv2 = mx.symbol.Convolution(name='conv2', data=pool1, kernel=(5, 5), num_filter=256) relu2 = mx.symbol.Activation(name='relu2', data=conv2, act_type="relu") pool2 = mx.symbol.Pooling(name='pool2', data=relu2, kernel=(2, 2), stride=(2, 2), pool_type="max") # Layer3 conv3 = mx.symbol.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=512) relu3 = mx.symbol.Activation(name='relu3', data=conv3, act_type="relu") # Layer4 conv4 = mx.symbol.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=1024) relu4 = mx.symbol.Activation(name='relu4', data=conv4, act_type="relu") # Layer5 conv5 = mx.symbol.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=1024) relu5 = mx.symbol.Activation(name='relu5', data=conv5, act_type="relu") pool5 = mx.symbol.Pooling(name='pool5', data=relu5, kernel=(2, 2), stride=(2, 2), pool_type="max") # Layer6 flatten = mx.symbol.Flatten(data=pool5) fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=3072) relu6 = mx.symbol.Activation(name='relu6', data=fc6, act_type="relu") drop6 = mx.symbol.Dropout(name='drop6', data=relu6, p=0.5) if training else relu6 # Layer7 fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096) relu7 = mx.symbol.Activation(name='relu7', data=fc7, act_type="relu") drop7 = mx.symbol.Dropout(name='drop7', data=relu7, p=0.5) if training else relu7 self.__output = self.add_head_nodes(drop7)
def __init__(self, params): # Common parameters for all models Model.check_parameters( params, { 'name': 'DeepSpeech2', 'input_shape': (1, 200, 161), 'num_classes': 29 * 29 + 1, # Alphabet size + BLANK character (which is 0) 'phase': 'training', 'dtype': 'float32', 'model_opts': {} }) # Specific parameters for DeepSpeech2 Model.check_parameters( params['model_opts'], { 'conv_batch_norm': True, # Use BatchNorm in Conv layers 'conv_arch': '2-layer-2D-v2', # Conv layers architecture 'num_rnn_layers': 3, # Number of RNN layers 'rnn_layer_size': 2048, # Hidden size of each RNN 'bidirectional': True, # Use bidirectional RNN 'rnn_type': 'rnn_relu', # One of RNN_TYPES 'rnn_batch_norm': False, # Use Batch norm in RNNs after i2h matrix. DOES NOT WORK NOW. 'brnn_share_i2h': False, # Share i2h weights in RNNs. DOES NOT WORK NOW. 'brnn_output': 'concat', # Aggregate method for BRNN outputs. One of BRNN_OUTPUT 'rnn_dropout': 0.0, # Use dropout in RNNs. Value from [0, 1). 'ctc_loss': 'mxnet_ctc_loss' # CTC Loss implementation, one of CTC_LOSSES }) Model.__init__(self, params) self.__batch_size = params['batch_size'] self.__output_length = 0 # [output] Length of output sequence self.__data_shape = ( self.batch_size, ) + self.input_shape # For debugging purposses self.__debug = logging.getLogger().isEnabledFor( logging.DEBUG) or os.environ.get('DLBS_DEBUG', '0') == '1' if self.model_opts['conv_arch'] not in DeepSpeech2.CONV_ARCHS: raise "Invalid conv arch ('%s'), must be one of '%s'" % \ (self.model_opts['conv_arch'], str(DeepSpeech2.CONV_ARCHS)) if self.model_opts['rnn_type'] not in DeepSpeech2.RNN_TYPES: raise "Invalid RNN type ('%s'), must be one of '%s'" % \ (self.model_opts['rnn_type'], str(DeepSpeech2.RNN_TYPES)) if self.model_opts['brnn_output'] not in DeepSpeech2.BRNN_OUTPUT: raise "Invalid BRNN output function ('%s'), must be one of '%s'" % \ (self.model_opts['brnn_output'], str(DeepSpeech2.BRNN_OUTPUT)) if self.model_opts['ctc_loss'] not in DeepSpeech2.CTC_LOSSES: raise "Invalid ctc loss ('%s'), must be one of '%s'" % \ (self.model_opts['ctc_loss'], str(DeepSpeech2.CTC_LOSSES)) if self.model_opts['rnn_batch_norm'] is True: self.model_opts['rnn_batch_norm'] = False print("[WARNING] Batch norm is not supported in RNNs.") if self.model_opts['brnn_share_i2h'] is True: self.model_opts['brnn_share_i2h'] = False print( "[WARNING] Sharing input2hidden weights in BRNNs is not supported." ) print("Model options: " + str(self.model_opts)) # This helps debugging shapes logging.debug("Batch size: %d", self.batch_size) logging.debug("Input length: %d", self.input_shape[1]) logging.debug("Num input features: %d", self.input_shape[2]) # Input data v is a spectrogram v = self.add_data_node() # [Batch, 1, DatumLen, DatumFeatures] self.log_shape("Input shape: %s", v) # 1-3 layers of 1D or 2D convolutions v, length = self.add_conv_layers(v) # [Batch, 1, CnnLen, CnnFeatures] # Add RNN layers v, nrnn_features = self.add_rnn_layers( v, length) # [CnnLen, Batch, RnnFeatures] # Compute CTC loss v = mx.sym.Reshape( data=v, shape=(-1, nrnn_features)) # [CnnLen*Batch, RnnFeatures] self.log_shape("FC input shape: %s", v) v = mx.sym.FullyConnected( data=v, num_hidden=self.num_classes) # [CnnLen*Batch, self.num_classes] self.log_shape("FC output shape: %s", v) if self.dtype == 'float16': print("Casting logits to np.float32") v = mx.sym.cast(data=v, dtype=np.float32) if self.phase == 'training': # [CnnLen, Batch, NumClasses(alphabet+1)] v_ctc = mx.sym.Reshape(data=v, shape=(length, self.batch_size, self.num_classes)) labels = mx.sym.Variable(name="softmax_label", shape=(self.batch_size, length), init=mx.init.Zero()) self.log_shape("CTC input shape: %s", v_ctc) if self.model_opts['ctc_loss'] == 'warp_ctc_loss': print("Using Baidu's Warp CTC Loss.") print("[WARNING] WarpCTC was not tested and may not work.") try: v = mx.symbol.WarpCTC(data=v_ctc, label=labels) except AttributeError: print( "[ERROR] WarpCTC symbol is not available. Recompile MXNET with WarpCTC support." ) raise else: print("Using CTCLoss from mx.symbol.contrib.") # data: (sequence_length, batch_size, alphabet_size + 1) # The 0th element of this vector is reserved for the special blank character. # label: (batch_size, label_sequence_length) # Is a tensor of integers between 1 and alphabet_size. # out: (batch_size) # Is a list of CTC loss values, one per example in the batch. ctc_loss = mx.sym.MakeLoss( mx.symbol.contrib.CTCLoss(data=v_ctc, label=labels, name='ctc')) predictions = mx.sym.MakeLoss( mx.sym.SoftmaxActivation(data=v, name='softmax')) v = mx.sym.Group([mx.sym.BlockGrad(predictions), ctc_loss]) else: v = mx.symbol.softmax(data=v, name='softmax') self.log_shape("Output shape: %s", v) self.__output = v self.__output_length = length # We have this many labels per input sequence. self._labels_shape = (self.__output_length, ) # K labels for every batch self._labels_range = ( 1, self.num_classes ) # The class '0' is reserved for BLANK character. self.__ctc_metrics = CtcMetrics(seq_len=self.__output_length) self._eval_metric = mx.metric.CustomMetric( feval=self.__ctc_metrics.accuracy, name='ctc_metric', allow_extra_outputs=True)
def __init__(self, params): """ Naming and topology according to: http://ethereon.github.io/netscope/#/gist/f2e4825a8d4f8a3609cefd7ffadc910a Based on: https://github.com/dmlc/mxnet/blob/master/example/image-classification/symbols/alexnet.py """ Model.check_parameters( params, { 'name': 'GoogleNet', 'input_shape': (3, 224, 224), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) training = self.phase == 'training' data = self.add_data_node() conv1 = ConvFactory(data, 64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name="conv1/7x7_s2") pool1 = mx.sym.Pooling(conv1, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1/3x3_s2") norm1 = mx.symbol.LRN(data=pool1, alpha=0.0001, beta=0.75, knorm=2, nsize=5, name='pool1/norm1') conv2_reduce = ConvFactory(norm1, 64, kernel=(1, 1), stride=(1, 1), name="conv2/3x3_reduce") conv2 = ConvFactory(conv2_reduce, 192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="conv2/3x3") norm2 = mx.symbol.LRN(data=conv2, alpha=0.0001, beta=0.75, knorm=2, nsize=5, name='conv2/norm2') pool2 = mx.sym.Pooling(norm2, kernel=(3, 3), stride=(2, 2), pool_type="max", name='pool2/3x3_s2') in3a = InceptionFactory(pool2, 64, 96, 128, 16, 32, "max", 32, name="inception_3a") in3b = InceptionFactory(in3a, 128, 128, 192, 32, 96, "max", 64, name="inception_3b") pool3 = mx.sym.Pooling(in3b, kernel=(3, 3), stride=(2, 2), pool_type="max", name='pool3/3x3_s2') in4a = InceptionFactory(pool3, 192, 96, 208, 16, 48, "max", 64, name="inception_4a") in4b = InceptionFactory(in4a, 160, 112, 224, 24, 64, "max", 64, name="inception_4b") in4c = InceptionFactory(in4b, 128, 128, 256, 24, 64, "max", 64, name="inception_4c") in4d = InceptionFactory(in4c, 112, 144, 288, 32, 64, "max", 64, name="inception_4d") in4e = InceptionFactory(in4d, 256, 160, 320, 32, 128, "max", 128, name="inception_4e") pool4 = mx.sym.Pooling(in4e, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name='pool4/3x3_s2') in5a = InceptionFactory(pool4, 256, 160, 320, 32, 128, "max", 128, name="inception_5a") in5b = InceptionFactory(in5a, 384, 192, 384, 48, 128, "max", 128, name="inception_5b") pool5 = mx.sym.Pooling(in5b, kernel=(7, 7), stride=(1, 1), pool_type="avg", name='pool5/7x7_s1') flatten5 = mx.sym.Flatten(data=pool5) drop5 = mx.symbol.Dropout( data=flatten5, p=0.5, name='pool5/drop_7x7_s1') if training else flatten5 self.__output = self.add_head_nodes(drop5)
def __init__(self, params): """ Naming and topology according to: http://ethereon.github.io/netscope/#/gist/f2e4825a8d4f8a3609cefd7ffadc910a Based on: https://github.com/dmlc/mxnet/blob/master/example/image-classification/symbols/alexnet.py """ Model.check_parameters( params, { 'name': 'GoogleNet', 'input_shape': (3, 224, 224), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) if self.dtype == 'float16': print("[WARNING] MxNet does not provide half precision kernel for LRN layer. It will be disabled. "\ "Thus, comparison with single precision version or other frameworks will not be totally fair.") training = self.phase == 'training' data = self.add_data_node() conv1 = ConvFactory(data, 64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name="conv1/7x7_s2") pool1 = mx.sym.Pooling(conv1, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1/3x3_s2") norm1 = self.maybe_lrn(pool1, 'pool1/norm1') conv2_reduce = ConvFactory(norm1, 64, kernel=(1, 1), stride=(1, 1), name="conv2/3x3_reduce") conv2 = ConvFactory(conv2_reduce, 192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="conv2/3x3") norm2 = self.maybe_lrn(conv2, 'conv2/norm2') pool2 = mx.sym.Pooling(norm2, kernel=(3, 3), stride=(2, 2), pool_type="max", name='pool2/3x3_s2') in3a = InceptionFactory(pool2, 64, 96, 128, 16, 32, "max", 32, name="inception_3a") in3b = InceptionFactory(in3a, 128, 128, 192, 32, 96, "max", 64, name="inception_3b") pool3 = mx.sym.Pooling(in3b, kernel=(3, 3), stride=(2, 2), pool_type="max", name='pool3/3x3_s2') in4a = InceptionFactory(pool3, 192, 96, 208, 16, 48, "max", 64, name="inception_4a") in4b = InceptionFactory(in4a, 160, 112, 224, 24, 64, "max", 64, name="inception_4b") in4c = InceptionFactory(in4b, 128, 128, 256, 24, 64, "max", 64, name="inception_4c") in4d = InceptionFactory(in4c, 112, 144, 288, 32, 64, "max", 64, name="inception_4d") in4e = InceptionFactory(in4d, 256, 160, 320, 32, 128, "max", 128, name="inception_4e") pool4 = mx.sym.Pooling(in4e, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name='pool4/3x3_s2') in5a = InceptionFactory(pool4, 256, 160, 320, 32, 128, "max", 128, name="inception_5a") in5b = InceptionFactory(in5a, 384, 192, 384, 48, 128, "max", 128, name="inception_5b") pool5 = mx.sym.Pooling(in5b, kernel=(7, 7), stride=(1, 1), pool_type="avg", name='pool5/7x7_s1') flatten5 = mx.sym.Flatten(data=pool5) drop5 = mx.symbol.Dropout( data=flatten5, p=0.5, name='pool5/drop_7x7_s1') if training else flatten5 self.__output = self.add_head_nodes(drop5)
def __init__(self, params): Model.check_parameters( params, { 'name': 'AlexNetOWT', 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32', 'input_layout': 'NCHW', 'model_layout': 'NCHW', 'nvidia_layers': False }) params['input_shape'] = Model.conv_shape(3, (227, 227), params['input_layout']) Model.__init__(self, params) layers = Layers(params) data = self.add_data_node() data = Layers.conv_transform_layout(data, params['input_layout'], params['model_layout']) conv1 = layers.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=64) relu1 = layers.Activation(name='relu1', data=conv1, act_type='relu') pool1 = layers.Pooling(name='pool1', data=relu1, pool_type="max", kernel=(3, 3), stride=(2, 2)) conv2 = layers.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=192) relu2 = layers.Activation(name='relu2', data=conv2, act_type="relu") pool2 = layers.Pooling(name='pool2', data=relu2, kernel=(3, 3), stride=(2, 2), pool_type="max") conv3 = layers.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384) relu3 = layers.Activation(name='relu3', data=conv3, act_type="relu") conv4 = layers.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=256) relu4 = layers.Activation(name='relu4', data=conv4, act_type="relu") conv5 = layers.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256) relu5 = layers.Activation(name='relu5', data=conv5, act_type="relu") pool5 = layers.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max") flatten = mx.symbol.Flatten(data=pool5) fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096) relu6 = layers.Activation(name='relu6', data=fc6, act_type="relu") drop6 = layers.Dropout(name='drop6', data=relu6, p=0.5) fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096) relu7 = layers.Activation(name='relu7', data=fc7, act_type="relu") drop7 = layers.Dropout(name='drop7', data=relu7, p=0.5) self.__output = self.add_head_nodes(drop7)
def __init__(self, params): """ """ Model.check_parameters( params, { 'name': 'InceptionResNetV2', 'input_shape': (3, 299, 299), 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32' }) Model.__init__(self, params) data = self.add_data_node() conv1a_3_3 = ConvFactory(data=data, num_filter=32, kernel=(3, 3), stride=(2, 2)) conv2a_3_3 = ConvFactory(conv1a_3_3, 32, (3, 3)) conv2b_3_3 = ConvFactory(conv2a_3_3, 64, (3, 3), pad=(1, 1)) maxpool3a_3_3 = mx.symbol.Pooling(data=conv2b_3_3, kernel=(3, 3), stride=(2, 2), pool_type='max') conv3b_1_1 = ConvFactory(maxpool3a_3_3, 80, (1, 1)) conv4a_3_3 = ConvFactory(conv3b_1_1, 192, (3, 3)) maxpool5a_3_3 = mx.symbol.Pooling(data=conv4a_3_3, kernel=(3, 3), stride=(2, 2), pool_type='max') tower_conv = ConvFactory(maxpool5a_3_3, 96, (1, 1)) tower_conv1_0 = ConvFactory(maxpool5a_3_3, 48, (1, 1)) tower_conv1_1 = ConvFactory(tower_conv1_0, 64, (5, 5), pad=(2, 2)) tower_conv2_0 = ConvFactory(maxpool5a_3_3, 64, (1, 1)) tower_conv2_1 = ConvFactory(tower_conv2_0, 96, (3, 3), pad=(1, 1)) tower_conv2_2 = ConvFactory(tower_conv2_1, 96, (3, 3), pad=(1, 1)) tower_pool3_0 = mx.symbol.Pooling(data=maxpool5a_3_3, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type='avg') tower_conv3_1 = ConvFactory(tower_pool3_0, 64, (1, 1)) tower_5b_out = mx.symbol.Concat( *[tower_conv, tower_conv1_1, tower_conv2_2, tower_conv3_1]) net = repeat(tower_5b_out, 10, block35, scale=0.17, input_num_channels=320) tower_conv = ConvFactory(net, 384, (3, 3), stride=(2, 2)) tower_conv1_0 = ConvFactory(net, 256, (1, 1)) tower_conv1_1 = ConvFactory(tower_conv1_0, 256, (3, 3), pad=(1, 1)) tower_conv1_2 = ConvFactory(tower_conv1_1, 384, (3, 3), stride=(2, 2)) tower_pool = mx.symbol.Pooling(net, kernel=(3, 3), stride=(2, 2), pool_type='max') net = mx.symbol.Concat(*[tower_conv, tower_conv1_2, tower_pool]) net = repeat(net, 20, block17, scale=0.1, input_num_channels=1088) tower_conv = ConvFactory(net, 256, (1, 1)) tower_conv0_1 = ConvFactory(tower_conv, 384, (3, 3), stride=(2, 2)) tower_conv1 = ConvFactory(net, 256, (1, 1)) tower_conv1_1 = ConvFactory(tower_conv1, 288, (3, 3), stride=(2, 2)) tower_conv2 = ConvFactory(net, 256, (1, 1)) tower_conv2_1 = ConvFactory(tower_conv2, 288, (3, 3), pad=(1, 1)) tower_conv2_2 = ConvFactory(tower_conv2_1, 320, (3, 3), stride=(2, 2)) tower_pool = mx.symbol.Pooling(net, kernel=(3, 3), stride=(2, 2), pool_type='max') net = mx.symbol.Concat( *[tower_conv0_1, tower_conv1_1, tower_conv2_2, tower_pool]) net = repeat(net, 9, block8, scale=0.2, input_num_channels=2080) net = block8(net, with_act=False, input_num_channels=2080) net = ConvFactory(net, 1536, (1, 1)) net = mx.symbol.Pooling(net, kernel=(1, 1), global_pool=True, stride=(2, 2), pool_type='avg') net = mx.symbol.Flatten(net) if self.phase == 'training': net = mx.symbol.Dropout(data=net, p=0.2) self.__output = self.add_head_nodes(net)