def inception_conv_layers(layer_dict, inputs=None, pretrained_dict=None, bn=False, wd=0, init_w=None, is_training=True, trainable=True, conv_stride=2): if inputs is None: inputs = layer_dict['cur_input'] layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, nl=tf.nn.relu, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): conv1 = L.conv(7, 64, inputs=inputs, name='conv1_7x7_s2', stride=conv_stride) padding1 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) conv1_pad = tf.pad(conv1, padding1, 'CONSTANT') pool1, _ = L.max_pool(layer_dict=layer_dict, inputs=conv1_pad, stride=2, filter_size=3, padding='VALID', name='pool1') pool1_lrn = tf.nn.local_response_normalization(pool1, depth_radius=2, alpha=2e-05, beta=0.75, name='pool1_lrn') conv2_reduce = L.conv(1, 64, inputs=pool1_lrn, name='conv2_3x3_reduce') conv2 = L.conv(3, 192, inputs=conv2_reduce, name='conv2_3x3') padding2 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) conv2_pad = tf.pad(conv2, padding2, 'CONSTANT') pool2, _ = L.max_pool(layer_dict=layer_dict, inputs=conv2_pad, stride=2, filter_size=3, padding='VALID', name='pool2') pool2_lrn = tf.nn.local_response_normalization(pool2, depth_radius=2, alpha=2e-05, beta=0.75, name='pool2_lrn') layer_dict['cur_input'] = pool2_lrn return pool2_lrn
def inception_layers(layer_dict, inputs=None, pretrained_dict=None, bn=False, init_w=None, wd=0, trainable=True, is_training=True): if inputs is not None: layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([inception_layer], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd): inception_layer(64, 96, 128, 16, 32, 32, name='inception_3a') inception_layer(128, 128, 192, 32, 96, 64, name='inception_3b') L.max_pool(layer_dict, stride=2, filter_size=3, name='pool3') inception_layer(192, 96, 208, 16, 48, 64, name='inception_4a') inception_layer(160, 112, 224, 24, 64, 64, name='inception_4b') inception_layer(128, 128, 256, 24, 64, 64, name='inception_4c') inception_layer(112, 144, 288, 32, 64, 64, name='inception_4d') inception_layer(256, 160, 320, 32, 128, 128, name='inception_4e') L.max_pool(layer_dict, stride=2, filter_size=3, name='pool4') inception_layer(256, 160, 320, 32, 128, 128, name='inception_5a') inception_layer(384, 192, 384, 48, 128, 128, name='inception_5b') return layer_dict['cur_input']
def new_model_forward(weights, inputs, make_vars=False): # Create tf.Variables if required if make_vars: weights = [ tf.Variable(w) if isinstance(w, np.ndarray) else w for w in weights ] outputs = tf.nn.conv2d(inputs, weights[0], [1, 1, 1, 1], padding='SAME') outputs += weights[1] # outputs = tf.nn.bias_add(outputs, ) outputs = Layers.max_pool(outputs) outputs = tf.nn.relu(outputs) outputs = tf.nn.conv2d(outputs, weights[2], [1, 1, 1, 1], padding='SAME') outputs += weights[3] outputs = Layers.max_pool(outputs) outputs = tf.nn.relu(outputs) outputs = tf.nn.conv2d(outputs, weights[4], [1, 1, 1, 1], padding='SAME') outputs += weights[5] outputs = Layers.max_pool(outputs) outputs = tf.nn.relu(outputs) outputs = tf.nn.conv2d(outputs, weights[6], [1, 1, 1, 1], padding='SAME') outputs += weights[7] outputs = Layers.max_pool(outputs) outputs = tf.nn.relu(outputs) outputs = tf.nn.conv2d(outputs, weights[8], [1, 1, 1, 1], padding='SAME') outputs += weights[9] outputs = Layers.global_pool(outputs) # Reshape to one-hot predictions if isinstance(weights[-1], np.ndarray): outputs = tf.reshape(outputs, [-1, weights[-1].shape[-1]]) else: outputs = tf.reshape(outputs, [-1, weights[-1].shape.as_list()[-1]]) return outputs
def _build_model(self, **kwargs): """ Build model. :param kwargs: dict, extra arguments for building YOLO. -image_mean: np.ndarray, mean image for each input channel, shape: (C,). :return d: dict, containing outputs on each layer. """ d = dict() x_mean = kwargs.pop('image_mean', 0.0) # input X_input = self.X - x_mean is_train = self.is_train #conv1 - batch_norm1 - leaky_relu1 - pool1 with tf.variable_scope('layer1'): d['conv1'] = conv_layer(X_input, 3, 1, 32, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm1'] = batchNormalization(d['conv1'], is_train) d['leaky_relu1'] = tf.nn.leaky_relu(d['batch_norm1'], alpha=0.1) d['pool1'] = max_pool(d['leaky_relu1'], 2, 2, padding='SAME') # (416, 416, 3) --> (208, 208, 32) print('layer1.shape', d['pool1'].get_shape().as_list()) #conv2 - batch_norm2 - leaky_relu2 - pool2 with tf.variable_scope('layer2'): d['conv2'] = depth_point_layer(d['pool1'], 3, 1, 64, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm2'] = batchNormalization(d['conv2'], is_train) d['leaky_relu2'] = tf.nn.leaky_relu(d['batch_norm2'], alpha=0.1) d['pool2'] = max_pool(d['leaky_relu2'], 2, 2, padding='SAME') # (208, 208, 32) --> (104, 104, 64) print('layer2.shape', d['pool2'].get_shape().as_list()) #conv3 - batch_norm3 - leaky_relu3 with tf.variable_scope('layer3'): d['conv3'] = depth_point_layer(d['pool2'], 3, 1, 128, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm3'] = batchNormalization(d['conv3'], is_train) d['leaky_relu3'] = tf.nn.leaky_relu(d['batch_norm3'], alpha=0.1) # (104, 104, 64) --> (104, 104, 128) print('layer3.shape', d['leaky_relu3'].get_shape().as_list()) #conv4 - batch_norm4 - leaky_relu4 with tf.variable_scope('layer4'): d['conv4'] = conv_layer(d['leaky_relu3'], 1, 1, 64, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm4'] = batchNormalization(d['conv4'], is_train) d['leaky_relu4'] = tf.nn.leaky_relu(d['batch_norm4'], alpha=0.1) # (104, 104, 128) --> (104, 104, 64) print('layer4.shape', d['leaky_relu4'].get_shape().as_list()) #conv5 - batch_norm5 - leaky_relu5 - pool5 with tf.variable_scope('layer5'): d['conv5'] = depth_point_layer(d['leaky_relu4'], 3, 1, 128, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm5'] = batchNormalization(d['conv5'], is_train) d['leaky_relu5'] = tf.nn.leaky_relu(d['batch_norm5'], alpha=0.1) d['pool5'] = max_pool(d['leaky_relu5'], 2, 2, padding='SAME') # (104, 104, 64) --> (52, 52, 128) print('layer5.shape', d['pool5'].get_shape().as_list()) #conv6 - batch_norm6 - leaky_relu6 with tf.variable_scope('layer6'): d['conv6'] = depth_point_layer(d['pool5'], 3, 1, 256, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm6'] = batchNormalization(d['conv6'], is_train) d['leaky_relu6'] = tf.nn.leaky_relu(d['batch_norm6'], alpha=0.1) # (52, 52, 128) --> (52, 52, 256) print('layer6.shape', d['leaky_relu6'].get_shape().as_list()) #conv7 - batch_norm7 - leaky_relu7 with tf.variable_scope('layer7'): d['conv7'] = conv_layer(d['leaky_relu6'], 1, 1, 128, padding='SAME', weights_stddev=0.01, biases_value=0.0) d['batch_norm7'] = batchNormalization(d['conv7'], is_train) d['leaky_relu7'] = tf.nn.leaky_relu(d['batch_norm7'], alpha=0.1) # (52, 52, 256) --> (52, 52, 128) print('layer7.shape', d['leaky_relu7'].get_shape().as_list()) #conv8 - batch_norm8 - leaky_relu8 - pool8 with tf.variable_scope('layer8'): d['conv8'] = depth_point_layer(d['leaky_relu7'], 3, 1, 256, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm8'] = batchNormalization(d['conv8'], is_train) d['leaky_relu8'] = tf.nn.leaky_relu(d['batch_norm8'], alpha=0.1) d['pool8'] = max_pool(d['leaky_relu8'], 2, 2, padding='SAME') # (52, 52, 128) --> (26, 26, 256) print('layer8.shape', d['pool8'].get_shape().as_list()) #conv9 - batch_norm9 - leaky_relu9 with tf.variable_scope('layer9'): d['conv9'] = depth_point_layer(d['pool8'], 3, 1, 512, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm9'] = batchNormalization(d['conv9'], is_train) d['leaky_relu9'] = tf.nn.leaky_relu(d['batch_norm9'], alpha=0.1) # (26, 26, 256) --> (26, 26, 512) print('layer9.shape', d['leaky_relu9'].get_shape().as_list()) #conv10 - batch_norm10 - leaky_relu10 with tf.variable_scope('layer10'): d['conv10'] = conv_layer(d['leaky_relu9'], 1, 1, 256, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm10'] = batchNormalization(d['conv10'], is_train) d['leaky_relu10'] = tf.nn.leaky_relu(d['batch_norm10'], alpha=0.1) # (26, 26, 512) --> (26, 26, 256) print('layer10.shape', d['leaky_relu10'].get_shape().as_list()) #conv11 - batch_norm11 - leaky_relu11 with tf.variable_scope('layer11'): d['conv11'] = depth_point_layer(d['leaky_relu10'], 3, 1, 512, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm11'] = batchNormalization(d['conv11'], is_train) d['leaky_relu11'] = tf.nn.leaky_relu(d['batch_norm11'], alpha=0.1) # (26, 26, 256) --> (26, 26, 512) print('layer11.shape', d['leaky_relu11'].get_shape().as_list()) #conv12 - batch_norm12 - leaky_relu12 with tf.variable_scope('layer12'): d['conv12'] = conv_layer(d['leaky_relu11'], 1, 1, 256, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm12'] = batchNormalization(d['conv12'], is_train) d['leaky_relu12'] = tf.nn.leaky_relu(d['batch_norm12'], alpha=0.1) # (26, 26, 512) --> (26, 26, 256) print('layer12.shape', d['leaky_relu12'].get_shape().as_list()) #conv13 - batch_norm13 - leaky_relu13 - pool13 with tf.variable_scope('layer13'): d['conv13'] = depth_point_layer(d['leaky_relu12'], 3, 1, 512, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm13'] = batchNormalization(d['conv13'], is_train) d['leaky_relu13'] = tf.nn.leaky_relu(d['batch_norm13'], alpha=0.1) d['pool13'] = max_pool(d['leaky_relu13'], 2, 2, padding='SAME') # (26, 26, 256) --> (13, 13, 512) print('layer13.shape', d['pool13'].get_shape().as_list()) #conv14 - batch_norm14 - leaky_relu14 with tf.variable_scope('layer14'): d['conv14'] = depth_point_layer(d['pool13'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm14'] = batchNormalization(d['conv14'], is_train) d['leaky_relu14'] = tf.nn.leaky_relu(d['batch_norm14'], alpha=0.1) # (13, 13, 512) --> (13, 13, 1024) print('layer14.shape', d['leaky_relu14'].get_shape().as_list()) #conv15 - batch_norm15 - leaky_relu15 with tf.variable_scope('layer15'): d['conv15'] = conv_layer(d['leaky_relu14'], 1, 1, 512, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm15'] = batchNormalization(d['conv15'], is_train) d['leaky_relu15'] = tf.nn.leaky_relu(d['batch_norm15'], alpha=0.1) # (13, 13, 1024) --> (13, 13, 512) print('layer15.shape', d['leaky_relu15'].get_shape().as_list()) #conv16 - batch_norm16 - leaky_relu16 with tf.variable_scope('layer16'): d['conv16'] = depth_point_layer(d['leaky_relu15'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm16'] = batchNormalization(d['conv16'], is_train) d['leaky_relu16'] = tf.nn.leaky_relu(d['batch_norm16'], alpha=0.1) # (13, 13, 512) --> (13, 13, 1024) print('layer16.shape', d['leaky_relu16'].get_shape().as_list()) #conv17 - batch_norm16 - leaky_relu17 with tf.variable_scope('layer17'): d['conv17'] = conv_layer(d['leaky_relu16'], 1, 1, 512, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm17'] = batchNormalization(d['conv17'], is_train) d['leaky_relu17'] = tf.nn.leaky_relu(d['batch_norm17'], alpha=0.1) # (13, 13, 1024) --> (13, 13, 512) print('layer17.shape', d['leaky_relu17'].get_shape().as_list()) #conv18 - batch_norm18 - leaky_relu18 with tf.variable_scope('layer18'): d['conv18'] = depth_point_layer(d['leaky_relu17'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm18'] = batchNormalization(d['conv18'], is_train) d['leaky_relu18'] = tf.nn.leaky_relu(d['batch_norm18'], alpha=0.1) # (13, 13, 512) --> (13, 13, 1024) print('layer18.shape', d['leaky_relu18'].get_shape().as_list()) #conv19 - batch_norm19 - leaky_relu19 with tf.variable_scope('layer19'): d['conv19'] = depth_point_layer(d['leaky_relu18'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm19'] = batchNormalization(d['conv19'], is_train) d['leaky_relu19'] = tf.nn.leaky_relu(d['batch_norm19'], alpha=0.1) # (13, 13, 1024) --> (13, 13, 1024) print('layer19.shape', d['leaky_relu19'].get_shape().as_list()) #conv20 - batch_norm20 - leaky_relu20 with tf.variable_scope('layer20'): d['conv20'] = depth_point_layer(d['leaky_relu19'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm20'] = batchNormalization(d['conv20'], is_train) d['leaky_relu20'] = tf.nn.leaky_relu(d['batch_norm20'], alpha=0.1) # (13, 13, 1024) --> (13, 13, 1024) print('layer20.shape', d['leaky_relu20'].get_shape().as_list()) # concatenate layer20 and layer 13 using space to depth with tf.variable_scope('layer21'): d['skip_connection'] = conv_layer(d['leaky_relu13'], 1, 1, 64, padding='SAME', use_bias=False, weights_stddev=0.01) d['skip_batch'] = batchNormalization(d['skip_connection'], is_train) d['skip_leaky_relu'] = tf.nn.leaky_relu(d['skip_batch'], alpha=0.1) d['skip_space_to_depth_x2'] = tf.space_to_depth( d['skip_leaky_relu'], block_size=2) d['concat21'] = tf.concat( [d['skip_space_to_depth_x2'], d['leaky_relu20']], axis=-1) # (13, 13, 1024) --> (13, 13, 256+1024) print('layer21.shape', d['concat21'].get_shape().as_list()) #conv22 - batch_norm22 - leaky_relu22 with tf.variable_scope('layer22'): d['conv22'] = depth_point_layer(d['concat21'], 3, 1, 1024, padding='SAME', use_bias=False, weights_stddev=0.01) d['batch_norm22'] = batchNormalization(d['conv22'], is_train) d['leaky_relu22'] = tf.nn.leaky_relu(d['batch_norm22'], alpha=0.1) # (13, 13, 1280) --> (13, 13, 1024) print('layer22.shape', d['leaky_relu22'].get_shape().as_list()) output_channel = self.num_anchors * (5 + self.num_classes) d['logit'] = conv_layer(d['leaky_relu22'], 1, 1, output_channel, padding='SAME', use_bias=True, weights_stddev=0.01, biases_value=0.1) d['pred'] = tf.reshape(d['logit'], (-1, self.grid_size[0], self.grid_size[1], self.num_anchors, 5 + self.num_classes)) print('pred.shape', d['pred'].get_shape().as_list()) # (13, 13, 1024) --> (13, 13, num_anchors , (5 + num_classes)) return d
def _build_model(self, **kwargs): """ Build model. :param kwargs: dict, extra arguments for building AlexNet. - image_mean: np.ndarray, mean image for each input channel, shape: (C,). - dropout_prob: float, the probability of dropping out each unit in FC layer. :return d: dict, containing outputs on each layer. """ d = dict() # Dictionary to save intermediate values returned from each layer. X_mean = kwargs.pop('image_mean', 0.0) dropout_prob = kwargs.pop('dropout_prob', 0.0) num_classes = int(self.y.get_shape()[-1]) # The probability of keeping each unit for dropout layers keep_prob = tf.cond(self.is_train, lambda: 1. - dropout_prob, lambda: 1.) # input X_input = self.X - X_mean # perform mean subtraction # First Convolution Layer # conv1 - relu1 - pool1 with tf.variable_scope('conv1'): # conv_layer(x, side_l, stride, out_depth, padding='SAME', **kwargs): d['conv1'] = conv_layer(X_input, 3, 1, 64, padding='SAME', weights_stddev=0.01, biases_value=1.0) print('conv1.shape', d['conv1'].get_shape().as_list()) d['relu1'] = tf.nn.relu(d['conv1']) # max_pool(x, side_l, stride, padding='SAME'): d['pool1'] = max_pool(d['relu1'], 2, 1, padding='SAME') d['drop1'] = tf.nn.dropout(d['pool1'], keep_prob) print('pool1.shape', d['pool1'].get_shape().as_list()) # Second Convolution Layer # conv2 - relu2 - pool2 with tf.variable_scope('conv2'): d['conv2'] = conv_layer(d['pool1'], 3, 1, 128, padding='SAME', weights_stddev=0.01, biases_value=1.0) print('conv2.shape', d['conv2'].get_shape().as_list()) d['relu2'] = tf.nn.relu(d['conv2']) d['pool2'] = max_pool(d['relu2'], 2, 1, padding='SAME') d['drop2'] = tf.nn.dropout(d['pool2'], keep_prob) print('pool2.shape', d['pool2'].get_shape().as_list()) # Third Convolution Layer # conv3 - relu3 with tf.variable_scope('conv3'): d['conv3'] = conv_layer(d['pool2'], 3, 1, 256, padding='SAME', weights_stddev=0.01, biases_value=1.0) print('conv3.shape', d['conv3'].get_shape().as_list()) d['relu3'] = tf.nn.relu(d['conv3']) d['pool3'] = max_pool(d['relu3'], 2, 1, padding='SAME') d['drop3'] = tf.nn.dropout(d['pool3'], keep_prob) print('pool3.shape', d['pool3'].get_shape().as_list()) # Flatten feature maps f_dim = int(np.prod(d['drop3'].get_shape()[1:])) f_emb = tf.reshape(d['drop3'], [-1, f_dim]) # fc4 with tf.variable_scope('fc4'): d['fc4'] = fc_layer(f_emb, 1024, weights_stddev=0.005, biases_value=0.1) d['relu4'] = tf.nn.relu(d['fc4']) print('fc4.shape', d['relu4'].get_shape().as_list()) # fc5 with tf.variable_scope('fc5'): d['fc5'] = fc_layer(d['relu4'], 1024, weights_stddev=0.005, biases_value=0.1) d['relu5'] = tf.nn.relu(d['fc5']) print('fc5.shape', d['relu5'].get_shape().as_list()) d['logits'] = fc_layer(d['relu5'], num_classes, weights_stddev=0.01, biases_value=0.0) print('logits.shape', d['logits'].get_shape().as_list()) # softmax d['pred'] = tf.nn.softmax(d['logits']) return d
def _build_model(self, **kwargs): """ Build model. :param kwargs: dict, extra arguments for building YOLO. -image_mean: np.ndarray, mean image for each input channel, shape: (C,). :return d: dict, containing outputs on each layer. """ d = dict() x_mean = kwargs.pop('image_mean', 0.0) pretrain = kwargs.pop('pretrain', False) frontend = kwargs.pop('frontend', 'resnet_v2_50') # input X_input = self.X - x_mean is_train = self.is_train # Feature Extractor if pretrain: frontend_dir = os.path.join('pretrained_models', '{}.ckpt'.format(frontend)) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( self.X, is_training=self.is_train) d['init_fn'] = slim.assign_from_checkpoint_fn( model_path=frontend_dir, var_list=slim.get_model_variables(frontend)) convs = [ end_points[frontend + '/block{}'.format(x)] for x in [4, 2, 1] ] d['conv_s32'] = convs[0] d['conv_s16'] = convs[1] else: # Build ConvNet #conv1 - batch_norm1 - leaky_relu1 - pool1 with tf.variable_scope('layer1'): d['conv1'] = conv_bn_relu(X_input, 32, (3, 3), is_train) d['pool1'] = max_pool(d['conv1'], 2, 2, padding='SAME') # (416, 416, 3) --> (208, 208, 32) #conv2 - batch_norm2 - leaky_relu2 - pool2 with tf.variable_scope('layer2'): d['conv2'] = conv_bn_relu(d['pool1'], 64, (3, 3), is_train) d['pool2'] = max_pool(d['conv2'], 2, 2, padding='SAME') # (208, 208, 32) --> (104, 104, 64) #conv3 - batch_norm3 - leaky_relu3 with tf.variable_scope('layer3'): d['conv3'] = conv_bn_relu(d['pool2'], 128, (3, 3), is_train) # (104, 104, 64) --> (104, 104, 128) #conv4 - batch_norm4 - leaky_relu4 with tf.variable_scope('layer4'): d['conv4'] = conv_bn_relu(d['conv3'], 64, (1, 1), is_train) # (104, 104, 128) --> (104, 104, 64) #conv5 - batch_norm5 - leaky_relu5 - pool5 with tf.variable_scope('layer5'): d['conv5'] = conv_bn_relu(d['conv4'], 128, (3, 3), is_train) d['pool5'] = max_pool(d['conv5'], 2, 2, padding='SAME') # (104, 104, 64) --> (52, 52, 128) #conv6 - batch_norm6 - leaky_relu6 with tf.variable_scope('layer6'): d['conv6'] = conv_bn_relu(d['pool5'], 256, (3, 3), is_train) # (52, 52, 128) --> (52, 52, 256) #conv7 - batch_norm7 - leaky_relu7 with tf.variable_scope('layer7'): d['conv7'] = conv_bn_relu(d['conv6'], 128, (1, 1), is_train) # (52, 52, 256) --> (52, 52, 128) #conv8 - batch_norm8 - leaky_relu8 - pool8 with tf.variable_scope('layer8'): d['conv8'] = conv_bn_relu(d['conv7'], 256, (3, 3), is_train) d['pool8'] = max_pool(d['conv8'], 2, 2, padding='SAME') # (52, 52, 128) --> (26, 26, 256) #conv9 - batch_norm9 - leaky_relu9 with tf.variable_scope('layer9'): d['conv9'] = conv_bn_relu(d['pool8'], 512, (3, 3), is_train) # (26, 26, 256) --> (26, 26, 512) #conv10 - batch_norm10 - leaky_relu10 with tf.variable_scope('layer10'): d['conv10'] = conv_bn_relu(d['conv9'], 256, (1, 1), is_train) # (26, 26, 512) --> (26, 26, 256) #conv11 - batch_norm11 - leaky_relu11 with tf.variable_scope('layer11'): d['conv11'] = conv_bn_relu(d['conv10'], 512, (3, 3), is_train) # (26, 26, 256) --> (26, 26, 512) #conv12 - batch_norm12 - leaky_relu12 with tf.variable_scope('layer12'): d['conv12'] = conv_bn_relu(d['conv11'], 256, (1, 1), is_train) # (26, 26, 512) --> (26, 26, 256) #conv13 - batch_norm13 - leaky_relu13 - pool13 with tf.variable_scope('layer13'): d['conv13'] = conv_bn_relu(d['conv12'], 512, (3, 3), is_train) d['pool13'] = max_pool(d['conv13'], 2, 2, padding='SAME') # (26, 26, 256) --> (13, 13, 512) #conv14 - batch_norm14 - leaky_relu14 with tf.variable_scope('layer14'): d['conv14'] = conv_bn_relu(d['pool13'], 1024, (3, 3), is_train) # (13, 13, 512) --> (13, 13, 1024) #conv15 - batch_norm15 - leaky_relu15 with tf.variable_scope('layer15'): d['conv15'] = conv_bn_relu(d['conv14'], 512, (1, 1), is_train) # (13, 13, 1024) --> (13, 13, 512) #conv16 - batch_norm16 - leaky_relu16 with tf.variable_scope('layer16'): d['conv16'] = conv_bn_relu(d['conv15'], 1024, (3, 3), is_train) # (13, 13, 512) --> (13, 13, 1024) #conv17 - batch_norm16 - leaky_relu17 with tf.variable_scope('layer17'): d['conv17'] = conv_bn_relu(d['conv16'], 512, (1, 1), is_train) # (13, 13, 1024) --> (13, 13, 512) #conv18 - batch_norm18 - leaky_relu18 with tf.variable_scope('layer18'): d['conv18'] = conv_bn_relu(d['conv17'], 1024, (3, 3), is_train) # (13, 13, 512) --> (13, 13, 1024) #conv19 - batch_norm19 - leaky_relu19 with tf.variable_scope('layer19'): d['conv19'] = conv_bn_relu(d['conv18'], 1024, (3, 3), is_train) # (13, 13, 1024) --> (13, 13, 1024) d['conv_s32'] = d['conv19'] d['conv_s16'] = d['conv13'] #Detection Layer #conv20 - batch_norm20 - leaky_relu20 with tf.variable_scope('layer20'): d['conv20'] = conv_bn_relu(d['conv_s32'], 1024, (3, 3), is_train) # (13, 13, 1024) --> (13, 13, 1024) # concatenate layer20 and layer 13 using space to depth with tf.variable_scope('layer21'): d['skip_connection'] = conv_bn_relu(d['conv_s16'], 64, (1, 1), is_train) d['skip_space_to_depth_x2'] = tf.space_to_depth( d['skip_connection'], block_size=2) d['concat21'] = tf.concat( [d['skip_space_to_depth_x2'], d['conv20']], axis=-1) # (13, 13, 1024) --> (13, 13, 256+1024) #conv22 - batch_norm22 - leaky_relu22 with tf.variable_scope('layer22'): d['conv22'] = conv_bn_relu(d['concat21'], 1024, (3, 3), is_train) # (13, 13, 1280) --> (13, 13, 1024) output_channel = self.num_anchors * (5 + self.num_classes) d['logits'] = conv_layer(d['conv22'], output_channel, (1, 1), (1, 1), padding='SAME', use_bias=True) d['pred'] = tf.reshape(d['logits'], (-1, self.grid_size[0], self.grid_size[1], self.num_anchors, 5 + self.num_classes)) # (13, 13, 1024) --> (13, 13, num_anchors , (5 + num_classes)) return d
def _prepare_module(self): d = OrderedDict() #conv1 - batch_norm1 - leaky_relu1 - pool1 d['conv1'] = ConvBnAct(3, 32, 3, stride=1, padding=1) d['pool1'] = max_pool(2, 2) #conv2 - batch_norm2 - leaky_relu2 - pool2 d['conv2'] = ConvBnAct(32, 64, 3, stride=1, padding=1) d['pool2'] = max_pool(2, 2) #conv3 - batch_norm3 - leaky_relu3 d['conv3'] = ConvBnAct(64, 128, 3, stride=1, padding=1) #conv4 - batch_norm4 - leaky_relu4 d['conv4'] = ConvBnAct(128, 64, 1, stride=1, padding=0) #conv5 - batch_norm5 - leaky_relu5 - pool5 d['conv5'] = ConvBnAct(64, 128, 3, stride=1, padding=1) d['pool5'] = max_pool(2, 2) #conv6 - batch_norm6 - leaky_relu6 d['conv6'] = ConvBnAct(128, 256, 3, stride=1, padding=1) #conv7 - batch_norm7 - leaky_relu7 d['conv7'] = ConvBnAct(256, 128, 1, stride=1, padding=0) #conv8 - batch_norm8 - leaky_relu8 - pool8 d['conv8'] = ConvBnAct(128, 256, 3, stride=1, padding=1) d['pool8'] = max_pool(2, 2) #conv9 - batch_norm9 - leaky_relu9 d['conv9'] = ConvBnAct(256, 512, 3, stride=1, padding=1) #conv10 - batch_norm10 - leaky_relu10 d['conv10'] = ConvBnAct(512, 256, 1, stride=1, padding=0) #conv11 - batch_norm11 - leaky_relu11 d['conv11'] = ConvBnAct(256, 512, 3, stride=1, padding=1) #conv12 - batch_norm12 - leaky_relu12 d['conv12'] = ConvBnAct(512, 256, 1, stride=1, padding=0) #conv13 - batch_norm13 - leaky_relu13 - pool13 d['conv13'] = ConvBnAct(256, 512, 3, stride=1, padding=1) d['pool13'] = max_pool(2, 2) #conv14 - batch_norm14 - leaky_relu14 d['conv14'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv15 - batch_norm15 - leaky_relu15 d['conv15'] = ConvBnAct(1024, 512, 1, stride=1, padding=0) #conv16 - batch_norm16 - leaky_relu16 d['conv16'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv17 - batch_norm16 - leaky_relu17 d['conv17'] = ConvBnAct(1024, 512, 1, stride=1, padding=0) #conv18 - batch_norm18 - leaky_relu18 d['conv18'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv19 - batch_norm19 - leaky_relu19 d['conv19'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) # Detection Layer #conv20 - batch_norm20 - leaky_relu20 d['conv20'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) # concatenate layer20 and layer 13 using space to depth d['skip_connection'] = nn.Sequential( ConvBnAct(512, 64, 1, stride=1, padding=0), SpaceToDepth(2)) d['conv21'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) #conv22 - batch_norm22 - leaky_relu22 d['conv22'] = ConvBnAct(1280, 1024, 3, stride=1, padding=1) output_channel = self.num_anchors * (5 + self.num_classes) d['logits'] = conv2d(1024, output_channel, 1, stride=1, padding=0, bias=True) self.module = nn.ModuleList() for i in d.values(): self.module.append(i) return d
def _build_model(self, **kwargs): """ Build model. :param kwargs: dict, extra arguments for building AlexNet. - image_mean: np.ndarray, mean image for each input channel, shape: (C,). - dropout_prob: float, the probability of dropping out each unit in FC layer. :return d: dict, containing outputs on each layer. """ d = dict( ) # Dictionary to save intermediate values returned from each layer. X_mean = kwargs.pop('image_mean', 0.0) dropout_prob = kwargs.pop('dropout_prob', 0.0) num_classes = int(self.y.get_shape()[-1]) # The probability of keeping each unit for dropout layers keep_prob = tf.cond(self.is_train, lambda: 1. - dropout_prob, lambda: 1.) # input X_input = self.X - X_mean # perform mean subtraction # conv1 - relu1 - pool1 with tf.variable_scope('conv1'): d['conv1'] = conv_layer(X_input, 11, 4, 96, padding='VALID', weights_stddev=0.01, biases_value=0.0) print('conv1.shape', d['conv1'].get_shape().as_list()) d['relu1'] = tf.nn.relu(d['conv1']) # (227, 227, 3) --> (55, 55, 96) d['pool1'] = max_pool(d['relu1'], 3, 2, padding='VALID') # (55, 55, 96) --> (27, 27, 96) print('pool1.shape', d['pool1'].get_shape().as_list()) # conv2 - relu2 - pool2 with tf.variable_scope('conv2'): d['conv2'] = conv_layer(d['pool1'], 5, 1, 256, padding='SAME', weights_stddev=0.01, biases_value=0.1) print('conv2.shape', d['conv2'].get_shape().as_list()) d['relu2'] = tf.nn.relu(d['conv2']) # (27, 27, 96) --> (27, 27, 256) d['pool2'] = max_pool(d['relu2'], 3, 2, padding='VALID') # (27, 27, 256) --> (13, 13, 256) print('pool2.shape', d['pool2'].get_shape().as_list()) # conv3 - relu3 with tf.variable_scope('conv3'): d['conv3'] = conv_layer(d['pool2'], 3, 1, 384, padding='SAME', weights_stddev=0.01, biases_value=0.0) print('conv3.shape', d['conv3'].get_shape().as_list()) d['relu3'] = tf.nn.relu(d['conv3']) # (13, 13, 256) --> (13, 13, 384) # conv4 - relu4 with tf.variable_scope('conv4'): d['conv4'] = conv_layer(d['relu3'], 3, 1, 384, padding='SAME', weights_stddev=0.01, biases_value=0.1) print('conv4.shape', d['conv4'].get_shape().as_list()) d['relu4'] = tf.nn.relu(d['conv4']) # (13, 13, 384) --> (13, 13, 384) # conv5 - relu5 - pool5 with tf.variable_scope('conv5'): d['conv5'] = conv_layer(d['relu4'], 3, 1, 256, padding='SAME', weights_stddev=0.01, biases_value=0.1) print('conv5.shape', d['conv5'].get_shape().as_list()) d['relu5'] = tf.nn.relu(d['conv5']) # (13, 13, 384) --> (13, 13, 256) d['pool5'] = max_pool(d['relu5'], 3, 2, padding='VALID') # (13, 13, 256) --> (6, 6, 256) print('pool5.shape', d['pool5'].get_shape().as_list()) # Flatten feature maps f_dim = int(np.prod(d['pool5'].get_shape()[1:])) f_emb = tf.reshape(d['pool5'], [-1, f_dim]) # (6, 6, 256) --> (9216) # fc6 with tf.variable_scope('fc6'): d['fc6'] = fc_layer(f_emb, 4096, weights_stddev=0.005, biases_value=0.1) d['relu6'] = tf.nn.relu(d['fc6']) d['drop6'] = tf.nn.dropout(d['relu6'], keep_prob) # (9216) --> (4096) print('drop6.shape', d['drop6'].get_shape().as_list()) # fc7 with tf.variable_scope('fc7'): d['fc7'] = fc_layer(d['drop6'], 4096, weights_stddev=0.005, biases_value=0.1) d['relu7'] = tf.nn.relu(d['fc7']) d['drop7'] = tf.nn.dropout(d['relu7'], keep_prob) # (4096) --> (4096) print('drop7.shape', d['drop7'].get_shape().as_list()) # fc8 with tf.variable_scope('fc8'): d['logits'] = fc_layer(d['relu7'], num_classes, weights_stddev=0.01, biases_value=0.0) # (4096) --> (num_classes) # softmax d['pred'] = tf.nn.softmax(d['logits']) return d
def inception_layer(conv_11_size, conv_33_reduce_size, conv_33_size, conv_55_reduce_size, conv_55_size, pool_size, layer_dict, inputs=None, bn=False, wd=0, init_w=None, pretrained_dict=None, trainable=True, is_training=True, name='inception'): if inputs is None: inputs = layer_dict['cur_input'] layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, nl=tf.nn.relu, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): conv_11 = L.conv(filter_size=1, out_dim=conv_11_size, inputs=inputs, name='{}_1x1'.format(name)) L.conv(filter_size=1, out_dim=conv_33_reduce_size, inputs=inputs, name='{}_3x3_reduce'.format(name)) conv_33 = L.conv(filter_size=3, out_dim=conv_33_size, name='{}_3x3'.format(name)) L.conv(filter_size=1, out_dim=conv_55_reduce_size, inputs=inputs, name='{}_5x5_reduce'.format(name)) conv_55 = L.conv(filter_size=5, out_dim=conv_55_size, name='{}_5x5'.format(name)) L.max_pool(layer_dict=layer_dict, inputs=inputs, stride=1, filter_size=3, padding='SAME', name='{}_pool'.format(name)) convpool = L.conv(filter_size=1, out_dim=pool_size, name='{}_pool_proj'.format(name)) output = tf.concat([conv_11, conv_33, conv_55, convpool], 3, name='{}_concat'.format(name)) layer_dict['cur_input'] = output layer_dict[name] = output return output