def Transition(_X, in_channels): _X = tf.layers.batch_normalization(_X, training=IS_TRAIN) _X = tf.nn.relu(_X) _X = slim.conv3d(_X, in_channels, kernel_size=[1, 1, 1], stride=1, biases_initializer=None) _X = slim.avg_pool3d(_X, [2, 2, 2], stride=2, padding='SAME') return _X
def inference_t3d(frames, feature_size, _dropout, block_config=(6, 12, 24, 16)): global IS_TRAIN IS_TRAIN = True #dbg_op = tf.Print(IS_TRAIN, [IS_TRAIN], message="IS_TRAIN:") with slim.arg_scope([slim.conv3d], weights_initializer=weights_initializer, weights_regularizer=slim.l2_regularizer(1e-7)): #print('input_shape:', frames.shape.as_list()) out = slim.conv3d(frames, START_CHANNEL, [3, 7, 7], stride=[1, 2, 2], padding='SAME', biases_initializer=None) #print('conv1_shape:', out.shape.as_list()) out = tf.layers.batch_normalization(out, training=IS_TRAIN) out = tf.nn.relu(out) out = slim.max_pool3d(out, kernel_size=[3, 3, 3], stride=2, padding='SAME') #print('max3d_shape:', out.shape.as_list()) in_channels = START_CHANNEL for i, num_layers in enumerate(block_config): out = build_block(out, num_layers, in_channels, _dropout) #print('block_shape:', out.shape.as_list()) in_channels = in_channels + GROWTH_RATE * num_layers if i != len(block_config) - 1: if i == 0: out = TTL(out, (1, 3, 6)) else: out = TTL(out) #print('ttl3d_shape:', out.shape.as_list()) in_channels = 128 * 3 out = Transition(out, in_channels // 2) #print('trans_shape:', out.shape.as_list()) in_channels = in_channels // 2 out = tf.layers.batch_normalization(out, training=IS_TRAIN) out = tf.nn.relu(out) #Standard input shape=[BATCH,NUM_CLIP=16,HEIGHT=160,WIDTH=160,RGB=3],makes that kernel_size of AVG_POOL equals '5' #If you are about to change size of input,changing the kernel size of 'avg_pool3d' simultaneously. out = slim.avg_pool3d(out, kernel_size=[1, 5, 5]) #print('avg3d_shape:', out.shape.as_list()) out = tf.reshape(out, [out.get_shape().as_list()[0], -1]) #print('fc_re_shape:', out.shape.as_list()) out = slim.fully_connected(out, feature_size) return out
def model(self, video, mode, only_endpoints=False, final_endpoint=''): """Create the model graph. Args: video: a BxTxHxWxC video tensor mode: string, train or eval only_endpoints: Whether to return only the endpoints. final_endpoint: Specifies the endpoint to construct the network up to. If not specified, the entire network is constructed and returned. Only used if only_endpoints is True. Returns: loss, accuracy and logits, or endpoints """ self.is_training = (mode == 'train') is_training = self.is_training data_format = self.data_format endpoints = {} def add_and_check_endpoint(net, endpoint): endpoints[endpoint] = net return only_endpoints and final_endpoint == endpoint with slim.arg_scope([slim.conv2d], padding='SAME'): with tf.variable_scope('VidIncRes', 'VidIncRes', [video]): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net = video conv_op = self.get_layer_type(self.spec.convop1) net = conv_op(net, 64, [self.spec.time1, 7, 7], strides=[2, 2, 2], scope='Conv2d_1a_7x7', dilation=self.spec.dilation) net = batch_norm_relu(net, is_training, relu=True, data_format=data_format) if add_and_check_endpoint(net, 'Conv2d_1a_7x7'): return endpoints net = slim.max_pool3d(net, [self.spec.max_pool1_time, 3, 3], stride=[2, 2, 2], scope='maxpool1', padding='SAME') if add_and_check_endpoint(net, 'maxpool1'): return endpoints net = self.residual_block( net=net, filters=4 * 64, layers=self.spec.blocks[0].layers, scope='res_block_2', data_format=data_format, block=self.spec.blocks[0]) if add_and_check_endpoint(net, 'res_block_2'): return endpoints net = slim.max_pool3d(net, [self.spec.max_pool1_time, 2, 2], stride=[1, 2, 2], scope='maxpool2', padding='SAME') if add_and_check_endpoint(net, 'maxpool2'): return endpoints net = self.residual_block(net, 4 * 128, self.spec.blocks[1].layers, scope='res_block_3', data_format=data_format, block=self.spec.blocks[1]) if add_and_check_endpoint(net, 'res_block_3'): return endpoints net = slim.max_pool3d(net, [self.spec.max_pool3_time, 2, 2], stride=[1, 2, 2], scope='maxpool3', padding='SAME') if add_and_check_endpoint(net, 'maxpool3'): return endpoints net = self.residual_block( net, filters=4 * 256, layers=self.spec.blocks[2].layers, scope='res_block_4', data_format=data_format, block=self.spec.blocks[2]) if add_and_check_endpoint(net, 'res_block_4'): return endpoints net = slim.max_pool3d(net, [self.spec.max_pool4_time, 2, 2], stride=[1, 2, 2], scope='maxpool4', padding='SAME') if add_and_check_endpoint(net, 'maxpool4'): return endpoints net = self.residual_block(net, 4 * 512, self.spec.blocks[3].layers, scope='res_block_5', data_format=data_format, block=self.spec.blocks[3]) if add_and_check_endpoint(net, 'res_block_5'): return endpoints # Adds one more endpoint denoting the last cell before logits. if add_and_check_endpoint(net, 'LastCell'): return endpoints with tf.variable_scope('Logits'): shape = net.get_shape().as_list() s = shape[3] pool_size = (min( shape[1] if data_format == 'channels_last' else shape[2], 2), s, s) net = slim.avg_pool3d(inputs=net, kernel_size=pool_size, stride=1, padding='VALID') net = slim.dropout(net, self.dropout_keep_prob, scope='Dropout_0b', is_training=is_training) net = slim.conv3d(net, self.num_classes, kernel_size=1, stride=1, activation_fn=None, normalizer_fn=None, weights_initializer=initializers. variance_scaling_initializer( factor=2.0, mode='FAN_IN', uniform=False)) # spatial-temporal pooling logits = tf.reduce_mean( net, axis=([1, 2, 3] if data_format == 'channels_last' else [2, 3, 4])) if add_and_check_endpoint(logits, 'Logits'): return endpoints pred = tf.argmax(slim.softmax(logits), axis=1) if add_and_check_endpoint(pred, 'Predictions'): return endpoints if only_endpoints: return endpoints return logits
def inception_v1_3d(inputs, keep_prob, num_classes): with tf.variable_scope('InceptionV1_3d'): with slim.arg_scope( [slim.conv3d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.001)): with slim.arg_scope([slim.conv3d, slim.max_pool3d], stride=1, padding='SAME'): with slim.arg_scope([slim.conv3d], normalizer_fn=slim.batch_norm): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): net = slim.conv3d(inputs, 64, [7, 7, 7], stride=2, scope='Conv2d_1a_7x7') net = slim.max_pool3d(net, [1, 3, 3], stride=[1, 2, 2], scope='MaxPool_2a_3x3') net = slim.conv3d(net, 64, [1, 1, 1], scope='Conv2d_2b_1x1') net = slim.conv3d(net, 192, [3, 3, 3], scope='Conv2d_2c_3x3') net = slim.max_pool3d(net, [1, 3, 3], stride=[1, 2, 2], scope='MaxPool_3a_3x3') with tf.variable_scope('Mixed_3b'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 64, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 96, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 128, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 16, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 32, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 32, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_3c'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 128, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 128, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 192, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 32, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 96, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 64, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) net = slim.max_pool3d(net, [3, 3, 3], stride=2, scope='MaxPool_4a_3x3') with tf.variable_scope('Mixed_4b'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 192, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 96, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 208, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 16, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 48, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 64, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_4c'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 160, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 112, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 224, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 24, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 64, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 64, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_4d'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 128, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 128, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 256, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 24, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 64, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 64, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_4e'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 112, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 144, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 288, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 32, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 64, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 64, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_4f'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 256, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 160, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 320, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 32, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 128, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 128, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) net = slim.max_pool3d(net, [2, 2, 2], stride=2, scope='MaxPool_5a_2x2x2') with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 256, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 160, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 320, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 32, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 128, [3, 3, 3], scope='Conv2d_0a_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 128, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Mixed_5c'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv3d(net, 384, [1, 1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv3d(net, 192, [1, 1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv3d(branch_1, 384, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv3d(net, 48, [1, 1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv3d(branch_2, 128, [3, 3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool3d( net, [3, 3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv3d(branch_3, 128, [1, 1, 1], scope='Conv2d_0b_1x1') net = tf.concat(axis=4, values=[ branch_0, branch_1, branch_2, branch_3 ]) with tf.variable_scope('Logits'): net = slim.avg_pool3d(net, [2, 7, 7], stride=1, scope='AvgPool_0a_7x7') net = slim.dropout(net, keep_prob, scope='Dropout_0b') logits = slim.conv3d(net, num_classes, [1, 1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0c_1x1') logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze') averaged_logits = tf.reduce_mean(logits, axis=1) return averaged_logits
def res_3d_net(self, x, bn_func): params = { 'padding': 'SAME', 'activation_fn': tf.nn.relu, 'normalizer_fn': bn_func } x = tf.reshape(x, (self.batch_size, self.time_step, 28, 28, 96)) with slim.arg_scope([slim.conv3d], **params): with tf.variable_scope('res3a'): res3a_2n = slim.conv3d(x, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res3b_1'): res3b_1 = slim.conv3d(res3a_2n, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d2') with tf.variable_scope('res3b_2'): res3b_2 = slim.conv3d(res3b_1, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res3b'): res3b = tf.add(res3a_2n, res3b_2) res3b_bn = bn_func(res3b) res3b_relu = tf.nn.relu(res3b_bn) with tf.variable_scope('res4a_down'): res4a_down = slim.conv3d(res3b_relu, 256, kernel_size=[3, 3, 3], stride=[2, 2, 2], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res4a_1'): res4a1 = slim.conv3d(res3b_relu, 256, kernel_size=[3, 3, 3], stride=[2, 2, 2], scope='conv3d1') with tf.variable_scope('res4a_2'): res4a2 = slim.conv3d(res4a1, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res4a'): res4a = tf.add(res4a_down, res4a2) res4a_bn = bn_func(res4a) res4a_relu = tf.nn.relu(res4a_bn) with tf.variable_scope('res4b_1'): res4b1 = slim.conv3d(res4a_relu, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res4b_2'): res4b2 = slim.conv3d(res4b1, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res4b'): res4b = tf.add(res4a, res4b2) res4b_bn = bn_func(res4b) res4b_relu = tf.nn.relu(res4b_bn) with tf.variable_scope('res5a_down'): res5a_down = slim.conv3d(res4b_relu, 512, kernel_size=[3, 3, 3], stride=[2, 2, 2], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res5a_1'): res5a1 = slim.conv3d(res4b_relu, 512, kernel_size=[3, 3, 3], stride=[2, 2, 2], scope='conv3d1') with tf.variable_scope('res5a_2'): res5a2 = slim.conv3d(res5a1, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res5a'): res5a = tf.add(res5a_down, res5a2) res5a_bn = bn_func(res5a) res5a_relu = tf.nn.relu(res5a_bn) with tf.variable_scope('res5b_1'): res5b1 = slim.conv3d(res5a_relu, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res5b_2'): res5b2 = slim.conv3d(res5b1, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res5b'): res5b = tf.add(res5a, res5b2) res5b_bn = bn_func(res5b) res5b_relu = tf.nn.relu(res5b_bn) with tf.variable_scope('global_avg'): logits = slim.avg_pool3d(res5b_relu, kernel_size = [4,7,7], stride = [1,1,1]) with tf.variable_scope('res_logits'): logits = slim.flatten(logits) logits = tf.nn.dropout(logits, self.dropout) return logits
data_format=data_format, block=self.spec.blocks[3]) if add_and_check_endpoint(net, 'res_block_5'): return endpoints # Adds one more endpoint denoting the last cell before logits. if add_and_check_endpoint(net, 'LastCell'): return endpoints with tf.variable_scope('Logits'): shape = net.get_shape().as_list() s = shape[3] pool_size = (min( shape[1] if data_format == 'channels_last' else shape[2], 2), s, s) <<<<<<< HEAD net = contrib_slim.avg_pool3d( inputs=net, kernel_size=pool_size, stride=1, padding='VALID') net = contrib_slim.dropout( ======= net = slim.avg_pool3d( inputs=net, kernel_size=pool_size, stride=1, padding='VALID') net = slim.dropout( >>>>>>> EvaNet code and models. net, self.dropout_keep_prob, scope='Dropout_0b', is_training=is_training) <<<<<<< HEAD net = contrib_slim.conv3d( ======= net = slim.conv3d( >>>>>>> EvaNet code and models.