def model(inputs, is_training): """Creation of the model graph.""" inputs = conv2d_fixed_padding( inputs=inputs, filters=64, kernel_size=7, strides=2, is_training=is_training, data_format=data_format) inputs = tf.identity(inputs, 'initial_conv') inputs = batch_norm_relu(inputs, is_training, data_format=data_format) pooled_inputs = tf.layers.max_pooling2d( inputs=inputs, pool_size=3, strides=2, padding='SAME', data_format=data_format) if is_training and FLAGS.mlperf_logging: resnet_log_helper.log_max_pool(input_tensor=inputs, output_tensor=pooled_inputs) inputs = tf.identity(pooled_inputs, 'initial_max_pool') inputs = block_group( inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0], strides=1, is_training=is_training, name='block_group1', data_format=data_format) inputs = block_group( inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1], strides=2, is_training=is_training, name='block_group2', data_format=data_format) inputs = block_group( inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2], strides=2, is_training=is_training, name='block_group3', data_format=data_format) inputs = block_group( inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3], strides=2, is_training=is_training, name='block_group4', data_format=data_format) # The activation is 7x7 so this is a global average pool. # TODO(huangyp): reduce_mean will be faster. pool_size = (inputs.shape[1], inputs.shape[2]) inputs = tf.layers.average_pooling2d( inputs=inputs, pool_size=pool_size, strides=1, padding='VALID', data_format=data_format) inputs = tf.identity(inputs, 'final_avg_pool') inputs = tf.reshape( inputs, [-1, 2048 if block_fn is bottleneck_block else 512]) if is_training and FLAGS.mlperf_logging: mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_DENSE, value=num_classes) inputs = tf.layers.dense( inputs=inputs, units=num_classes, kernel_initializer=tf.random_normal_initializer(stddev=.01)) inputs = tf.identity(inputs, 'final_dense') if is_training and FLAGS.mlperf_logging: mlperf_log.resnet_print( key=mlperf_log.MODEL_HP_FINAL_SHAPE, value=inputs.shape.as_list()[1:]) return inputs
def __call__(self, inputs, training): """Add operations to classify a batch of input images. Args: inputs: A Tensor representing a batch of input images. training: A boolean. Set to True to add operations required only when training the classifier. Returns: A logits Tensor with shape [<batch_size>, self.num_classes]. """ # Drop batch size from shape logging. mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_INITIAL_SHAPE, value=inputs.shape.as_list()[1:]) with self._model_variable_scope(): if self.data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(inputs, [0, 3, 1, 2]) if (self.resnet_version == 1) or (self.resnet_version == 2): inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, data_format=self.data_format) elif self.resnet_version == 14: # v1d architecture inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters // 2, kernel_size=3, strides=self.conv_stride, data_format=self.data_format) inputs = batch_norm(inputs, training, self.data_format) inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters // 2, kernel_size=3, strides=1, data_format=self.data_format) inputs = batch_norm(inputs, training, self.data_format) inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters, kernel_size=3, strides=1, data_format=self.data_format) elif self.resnet_version == 24: # v1-simple architecture inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, data_format=self.data_format) elif self.resnet_version == 34: # v1cs architecture inputs = conv2d_fixed_padding( inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, data_format=self.data_format) inputs = tf.identity(inputs, 'initial_conv') # We do not include batch normalization or activation functions in V2 # for the initial conv1 because the first ResNet unit will perform these # for both the shortcut and non-shortcut paths as part of the first # block's projection. Cf. Appendix of [2]. if (self.resnet_version == 1) or (self.resnet_version == 14) \ or (self.resnet_version == 24) or (self.resnet_version == 34): inputs = batch_norm(inputs, training, self.data_format) mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) inputs = tf.nn.relu(inputs) if self.first_pool_size: pooled_inputs = tf.layers.max_pooling2d( inputs=inputs, pool_size=self.first_pool_size, strides=self.first_pool_stride, padding='SAME', data_format=self.data_format) resnet_log_helper.log_max_pool(input_tensor=inputs, output_tensor=pooled_inputs) inputs = tf.identity(pooled_inputs, 'initial_max_pool') feat_s = list() if self.resnet_version == 24: # mlperf open import nets.manual_blocks_t as manual_blocks # group1 for i, num_blocks in enumerate(self.block_sizes[0:1]): num_filters = self.num_filters * (2**i) inputs = block_layer( inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=self.block_fn, blocks=num_blocks, strides=self.block_strides[i], training=training, name='block_layer{}'.format(i + 1), data_format=self.data_format, version=self.resnet_version) # group2 inputs = manual_blocks.block_m(inputs=inputs, filters=128, training=training, strides=2, data_format=self.data_format) inputs = manual_blocks.block_m0(inputs=inputs, filters=128, training=training, strides=1, data_format=self.data_format) # group3 inputs = manual_blocks.block_m1(inputs=inputs, filters=256, training=training, strides=2, data_format=self.data_format) inputs = manual_blocks.block_m2(inputs=inputs, filters=256, training=training, strides=1, data_format=self.data_format) if self.enable_at: feat_s.append(inputs) # group4 inputs = manual_blocks.block_m3(inputs=inputs, filters=512, training=training, strides=2, data_format=self.data_format) inputs = manual_blocks.block_m4(inputs=inputs, filters=512, training=training, strides=1, data_format=self.data_format) if self.enable_at: feat_s.append(inputs) elif self.resnet_version == 34: # dawnbench import nets.manual_blocks as manual_blocks # group1~2 for i, num_blocks in enumerate(self.block_sizes[0:2]): num_filters = self.num_filters * (2**i) inputs = block_layer( inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=self.block_fn, blocks=num_blocks, strides=self.block_strides[i], training=training, name='block_layer{}'.format(i + 1), data_format=self.data_format, version=self.resnet_version) # group3 inputs = manual_blocks.block_m1(inputs=inputs, filters=256, training=training, strides=2, data_format=self.data_format) inputs = manual_blocks.block_m2(inputs=inputs, filters=256, training=training, strides=1, data_format=self.data_format) if self.enable_at: feat_s.append(inputs) # group4 inputs = manual_blocks.block_m3(inputs=inputs, filters=512, training=training, strides=2, data_format=self.data_format) inputs = manual_blocks.block_m4(inputs=inputs, filters=512, training=training, strides=1, data_format=self.data_format) if self.enable_at: feat_s.append(inputs) else: # standard v1, v1d, v2 for i, num_blocks in enumerate(self.block_sizes): num_filters = self.num_filters * (2**i) inputs = block_layer( inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=self.block_fn, blocks=num_blocks, strides=self.block_strides[i], training=training, name='block_layer{}'.format(i + 1), data_format=self.data_format, version=self.resnet_version) if (i > 1) and self.enable_at: feat_s.append(inputs) # Only apply the BN and ReLU for model that does pre_activation in each # building/bottleneck block, eg resnet V2. if self.pre_activation: inputs = batch_norm(inputs, training, self.data_format) mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) inputs = tf.nn.relu(inputs) # The current top layer has shape # `batch_size x pool_size x pool_size x final_size`. # ResNet does an Average Pooling layer over pool_size, # but that is the same as doing a reduce_mean. We do a reduce_mean # here because it performs better than AveragePooling2D. axes = [2, 3] if self.data_format == 'channels_first' else [1, 2] inputs = tf.reduce_mean(inputs, axes, keepdims=True) inputs = tf.identity(inputs, 'final_reduce_mean') inputs = tf.reshape(inputs, [-1, inputs.get_shape().as_list()[-1]]) mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_DENSE, value=self.num_classes) inputs = tf.layers.dense( inputs=inputs, units=self.num_classes, kernel_initializer=tf.random_normal_initializer(stddev=.01)) inputs = tf.identity(inputs, 'final_dense') # Drop batch size from shape logging. mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_FINAL_SHAPE, value=inputs.shape.as_list()[1:]) return inputs, feat_s
def log_max_pool(self, input_tensor, output_tensor): if self.model == 'resnet50': resnet_log_helper.log_max_pool(input_tensor, output_tensor)