def __init__(self, params): specs = ResNet.specs[params['model']] Model.check_parameters( params, { 'name': specs['name'], 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32', 'input_layout': 'NCHW', 'model_layout': 'NCHW', 'nvidia_layers': False, 'workspace': 1024 }) params['input_shape'] = Model.conv_shape(3, (224, 224), params['input_layout']) Model.__init__(self, params) self.params = params self.layers = Layers(params) # Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py # Original author Wei Wu # Some optimizations are taken from NVIDIA code from NGC containers. if specs['num_layers'] >= 50: filter_list = [64, 256, 512, 1024, 2048] bottle_neck = True else: filter_list = [64, 64, 128, 256, 512] bottle_neck = False self.__output = self.resnet(units=specs['units'], num_stages=4, filter_list=filter_list, bottle_neck=bottle_neck, workspace=params['workspace'], fuse_bn_add_relu=params['nvidia_layers'], fuse_bn_relu=params['nvidia_layers'])
def __init__(self, params): Model.check_parameters( params, { 'name': 'AlexNetOWT', 'num_classes': 1000, 'phase': 'training', 'dtype': 'float32', 'input_layout': 'NCHW', 'model_layout': 'NCHW', 'nvidia_layers': False }) params['input_shape'] = Model.conv_shape(3, (227, 227), params['input_layout']) Model.__init__(self, params) layers = Layers(params) data = self.add_data_node() data = Layers.conv_transform_layout(data, params['input_layout'], params['model_layout']) conv1 = layers.Convolution(name='conv1', data=data, kernel=(11, 11), stride=(4, 4), num_filter=64) relu1 = layers.Activation(name='relu1', data=conv1, act_type='relu') pool1 = layers.Pooling(name='pool1', data=relu1, pool_type="max", kernel=(3, 3), stride=(2, 2)) conv2 = layers.Convolution(name='conv2', data=pool1, kernel=(5, 5), pad=(2, 2), num_filter=192) relu2 = layers.Activation(name='relu2', data=conv2, act_type="relu") pool2 = layers.Pooling(name='pool2', data=relu2, kernel=(3, 3), stride=(2, 2), pool_type="max") conv3 = layers.Convolution(name='conv3', data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=384) relu3 = layers.Activation(name='relu3', data=conv3, act_type="relu") conv4 = layers.Convolution(name='conv4', data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=256) relu4 = layers.Activation(name='relu4', data=conv4, act_type="relu") conv5 = layers.Convolution(name='conv5', data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256) relu5 = layers.Activation(name='relu5', data=conv5, act_type="relu") pool5 = layers.Pooling(name='pool5', data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max") flatten = mx.symbol.Flatten(data=pool5) fc6 = mx.symbol.FullyConnected(name='fc6', data=flatten, num_hidden=4096) relu6 = layers.Activation(name='relu6', data=fc6, act_type="relu") drop6 = layers.Dropout(name='drop6', data=relu6, p=0.5) fc7 = mx.symbol.FullyConnected(name='fc7', data=drop6, num_hidden=4096) relu7 = layers.Activation(name='relu7', data=fc7, act_type="relu") drop7 = layers.Dropout(name='drop7', data=relu7, p=0.5) self.__output = self.add_head_nodes(drop7)