def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, batch_norm_decay=0.9997, batch_norm_epsilon=0.001): """Yields the scope with the default parameters for inception_v3. Args: weight_decay: the weight decay for weights variables. stddev: standard deviation of the truncated guassian weight distribution. batch_norm_decay: decay for the moving average of batch_norm momentums. batch_norm_epsilon: small float added to variance to avoid dividing by zero. Yields: a arg_scope with the parameters needed for inception_v3. """ # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=weight_decay): # Set stddev, activation and parameters for batch_norm. with scopes.arg_scope([ops.conv2d], stddev=stddev, activation=tf.nn.relu, batch_norm_params={ 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon }) as arg_scope: yield arg_scope
def inference(images): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ # We instantiate all variables using tf.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.get_variable() with tf.Variable(). # with scopes.arg_scope([ops.conv2d, ops.fc], stddev=0.1, bias=0.1, batch_norm_params={}): # with scopes.arg_scope([ops.conv2d, ops.fc], stddev=0.1, bias=0.1): with scopes.arg_scope([ops.conv2d], kernel_size=[3,3], padding='SAME'): with scopes.arg_scope([ops.max_pool], kernel_size=[3,3], padding='SAME'): net = ops.conv2d(images, num_filters_out=64) net = ops.conv2d(net, num_filters_out=64) net = ops.max_pool(net) net = ops.conv2d(net, num_filters_out=128) net = ops.conv2d(net, num_filters_out=128) net = ops.max_pool(net) net = ops.conv2d(net, num_filters_out=256) net = ops.conv2d(net, num_filters_out=256) net = ops.max_pool(net) net = ops.conv2d(net, num_filters_out=512) net = ops.conv2d(net, num_filters_out=512) net = ops.avg_pool(net, kernel_size=[3,3], padding='SAME') net = ops.flatten(net) # net = ops.fc(net, num_units_out=1024) # net = ops.fc(net, num_units_out=256) net = ops.fc(net, num_units_out=10) return net
def testVariableCollectionsWithArgScopeNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], collections='A'): a = variables.variable('a', []) with scopes.arg_scope([variables.variable], collections='B'): b = variables.variable('b', []) self.assertEquals(a, tf.get_collection('A')[0]) self.assertEquals(b, tf.get_collection('B')[0])
def testVariableCollectionsWithArgScopeNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], collections="A"): a = variables.variable("a", []) with scopes.arg_scope([variables.variable], collections="B"): b = variables.variable("b", []) self.assertEquals(a, tf.get_collection("A")[0]) self.assertEquals(b, tf.get_collection("B")[0])
def testVariableCollectionsWithArgScopeNonNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], collections='A'): a = variables.variable('a', []) with scopes.arg_scope([variables.variable], collections='B'): b = variables.variable('b', []) variables.variable('c', []) self.assertListEqual([a], tf.get_collection('A')) self.assertListEqual([b], tf.get_collection('B'))
def testReuseArgScope(self): func1_kwargs = {'a': 1, 'b': None, 'c': [1]} key_op = (func1.__module__, func1.__name__) current_scope = {key_op: func1_kwargs.copy()} with self.test_session(): with scopes.arg_scope([func1], a=1, b=None, c=[1]) as scope1: pass with scopes.arg_scope(scope1) as scope: self.assertDictEqual(scope, current_scope)
def lenet(inputs, dropout_keep_prob=1.0, num_classes=10, is_training=True, restore_logits=True, weight_decay=0.0005, seed=1, scope=''): """LeNet in Caffe https://github.com/BVLC/caffe/blob/master/examples/mnist/lenet_train_test.prototxt Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for name_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. print ("Warning: batch_norm_params is always None in lenet") end_points = {} with tf.name_scope(scope, 'lenet', [inputs]): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.fc], bias=0.0, batch_norm_params=None, seed=seed): with scopes.arg_scope([ops.conv2d], stride=1, padding='SAME'): with scopes.arg_scope([ops.max_pool], stride=2, padding='SAME'): # 32 x 32 x 3 end_points['conv1'] = ops.conv2d(inputs, 20, [5, 5], stride=1, stddev=0.05, weight_decay=weight_decay, seed=seed+1, scope='conv1') end_points['pool1'] = ops.max_pool(end_points['conv1'], [2, 2], scope='pool1') end_points['conv2'] = ops.conv2d(end_points['pool1'], 50, [5, 5], stride=1, stddev=0.05, weight_decay=weight_decay, seed=seed+2, scope='conv2') end_points['pool2'] = ops.max_pool(end_points['conv2'], [2, 2], scope='pool2') end_points['pool2'] = ops.flatten(end_points['pool2'], scope='flatten') net = ops.fc(end_points['pool2'], 500, stddev=0.048, weight_decay=weight_decay, seed = seed +3, scope='fc3') # Final pooling and prediction with tf.variable_scope('logits'): logits = ops.fc(net, num_classes, activation=None, stddev=0.0767, weight_decay=weight_decay, scope='logits', seed = seed +5, restore=restore_logits) # 10 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax(logits, name='predictions') # There is no aux_logits for LeNet end_points['aux_logits'] = tf.constant(0) return logits, end_points
def testVariableCollectionsWithArgScopeNonNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], collections="A"): a = variables.variable("a", []) with scopes.arg_scope([variables.variable], collections="B"): b = variables.variable("b", []) variables.variable("c", []) self.assertListEqual([a], tf.get_collection("A")) self.assertListEqual([b], tf.get_collection("B"))
def testCurrentArgScopeNested(self): func1_kwargs = {'a': 1, 'b': None, 'c': [1]} func2_kwargs = {'b': 2, 'd': [2]} key = lambda f: (f.__module__, f.__name__) current_scope = {key(func1): func1_kwargs.copy(), key(func2): func2_kwargs.copy()} with self.test_session(): with scopes.arg_scope([func1], a=1, b=None, c=[1]): with scopes.arg_scope([func2], b=2, d=[2]) as scope: self.assertDictEqual(scope, current_scope)
def testVariableRestoreWithArgScopeNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], restore=True): a = variables.variable("a", []) with scopes.arg_scope([variables.variable], trainable=False, collections=["A", "B"]): b = variables.variable("b", []) c = variables.variable("c", []) self.assertListEqual([a, b, c], variables.get_variables_to_restore()) self.assertListEqual([a, c], tf.trainable_variables()) self.assertListEqual([b], tf.get_collection("A")) self.assertListEqual([b], tf.get_collection("B"))
def testVariableRestoreWithArgScopeNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], restore=True): a = variables.variable('a', []) with scopes.arg_scope([variables.variable], trainable=False, collections=['A', 'B']): b = variables.variable('b', []) c = variables.variable('c', []) self.assertListEqual([a, b, c], variables.get_variables_to_restore()) self.assertListEqual([a, c], tf.trainable_variables()) self.assertListEqual([b], tf.get_collection('A')) self.assertListEqual([b], tf.get_collection('B'))
def testNestedArgScope(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with scopes.arg_scope([func1], a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) func1_kwargs['b'] = 2 with scopes.arg_scope([func1], b=2): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testNestedArgScope(self): func1_args = (0, ) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with scopes.arg_scope([func1], a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) func1_kwargs['b'] = 2 with scopes.arg_scope([func1], b=2): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testVariableRestoreWithArgScopeNested(self): with self.test_session(): with scopes.arg_scope([variables.variable], restore=True): a = variables.variable('a', []) with scopes.arg_scope([variables.variable], trainable=False, collections=['A', 'B']): b = variables.variable('b', []) c = variables.variable('c', []) self.assertListEqual([a, b, c], tf.get_collection(variables.VARIABLES_TO_RESTORE)) self.assertListEqual([a, c], tf.trainable_variables()) self.assertListEqual([b], tf.get_collection('A')) self.assertListEqual([b], tf.get_collection('B'))
def testPartiallySharedArgScope(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} func2_args = (1,) func2_kwargs = {'a': 1, 'b': None, 'd': [2]} with scopes.arg_scope([func1, func2], a=1, b=None): with scopes.arg_scope([func1], c=[1]), scopes.arg_scope([func2], d=[2]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) args, kwargs = func2(1) self.assertTupleEqual(args, func2_args) self.assertDictEqual(kwargs, func2_kwargs)
def nin(inputs, num_classes=10, is_training=True, restore_logits=True, scope=''): # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.op_scope([inputs], scope, 'nin'): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm], is_training=is_training): # conv1 end_points['conv1'] = ops.conv2d(inputs,192,[5,5],scope='conv1') end_points['conv1_1'] = ops.conv2d(end_points['conv1'],160,[1,1],scope='conv1_1') end_points['conv1_2'] = ops.conv2d(end_points['conv1_1'],96,[1,1],scope='conv1_2') end_points['pool1'] = ops.max_pool(end_points['conv1_2'],[3,3],stride=2, padding='SAME',scope='pool1') net = ops.dropout(end_points['pool1'],0.5) # conv2 end_points['conv2'] = ops.conv2d(net,192,[5,5],scope='conv2') end_points['conv2_1'] = ops.conv2d(end_points['conv2'],192,[1,1],scope='conv2_1') end_points['conv2_2'] = ops.conv2d(end_points['conv2_1'],192,[1,1],scope='conv2_2') end_points['pool2'] = ops.max_pool(end_points['conv2_2'],[3,3],stride=2, padding='SAME',scope='pool2') net = ops.dropout(end_points['pool2'],0.5) # conv3 end_points['conv3'] = ops.conv2d(net,192,[3,3],scope='conv3') end_points['conv3_1'] = ops.conv2d(end_points['conv3'],192,[1,1],scope='conv3_1') end_points['conv3_2'] = ops.conv2d(end_points['conv3_1'],10,[1,1],scope='conv3_2') net = ops.avg_pool(end_points['conv3_2'],[8,8],scope='avg_pool') flatten = ops.flatten(net,scope='flatten') #TODO take care this,using num_classes but 10.. end_points['logits'] = ops.fc(flatten,num_classes,activation=None,scope='fc') return end_points['logits'],end_points
def testVariableWithVariableDeviceChooser(self): with tf.Graph().as_default(): device_fn = variables.VariableDeviceChooser( num_parameter_servers=2) with scopes.arg_scope([variables.variable], device=device_fn): a = variables.variable('a', []) b = variables.variable('b', []) c = variables.variable('c', [], device='cpu:12') d = variables.variable('d', []) with tf.device('cpu:99'): e_init = tf.constant(12) e = variables.variable('e', initializer=e_init) # The values below highlight how the VariableDeviceChooser puts initial # values on the same device as the variable job. self.assertDeviceEqual(a.device, '/job:ps/task:0/cpu:0') self.assertDeviceEqual(a.initial_value.device, a.device) self.assertDeviceEqual(b.device, '/job:ps/task:1/cpu:0') self.assertDeviceEqual(b.initial_value.device, b.device) self.assertDeviceEqual(c.device, '/cpu:12') self.assertDeviceEqual(c.initial_value.device, c.device) self.assertDeviceEqual(d.device, '/job:ps/task:0/cpu:0') self.assertDeviceEqual(d.initial_value.device, d.device) self.assertDeviceEqual(e.device, '/job:ps/task:1/cpu:0') self.assertDeviceEqual(e.initial_value.device, '/cpu:99')
def testVariableWithDeviceFunction(self): class DevFn(object): def __init__(self): self.counter = -1 def __call__(self, op): self.counter += 1 return "cpu:%d" % self.counter with self.test_session(): with scopes.arg_scope([variables.variable], device=DevFn()): a = variables.variable("a", []) b = variables.variable("b", []) c = variables.variable("c", [], device="cpu:12") d = variables.variable("d", []) with tf.device("cpu:99"): e_init = tf.constant(12) e = variables.variable("e", initializer=e_init) self.assertDeviceEqual(a.device, "cpu:0") self.assertDeviceEqual(a.initial_value.device, "cpu:0") self.assertDeviceEqual(b.device, "cpu:1") self.assertDeviceEqual(b.initial_value.device, "cpu:1") self.assertDeviceEqual(c.device, "cpu:12") self.assertDeviceEqual(c.initial_value.device, "cpu:12") self.assertDeviceEqual(d.device, "cpu:2") self.assertDeviceEqual(d.initial_value.device, "cpu:2") self.assertDeviceEqual(e.device, "cpu:3") self.assertDeviceEqual(e.initial_value.device, "cpu:99")
def net(inputs, is_training=True, scope=''): with tf.op_scope([inputs], scope, 'resnet'): with scopes.arg_scope([ops.conv2d, ops.deconv2d, ops.batch_norm], is_training=is_training): # 256 x 256 x 3 net = ops.conv2d(inputs, 32, [9, 9], stride=1, scope='conv1') # 256 x 256 x 32 net = ops.conv2d(net, 64, [3, 3], stride=2, scope='conv2') # 128 x 128 x 64 net = ops.conv2d(net, 128, [3, 3], stride=2, scope='conv3') # 64 x 64 x 128 added = net for i in xrange(5): x = added net = ops.conv2d(x, 128, [3, 3], stride=1, scope='res' + str(i) + 'c1') net = ops.conv2d(net, 128, [3, 3], activation=None, stride=1, scope='res' + str(i) + 'c2') added = x + net net = added # print net # 64 x 64 x 128 net = ops.deconv2d(net, 64, [3, 3], stride=2, scope='deconv1') net = ops.deconv2d(net, 32, [3, 3], stride=2, scope='deconv2') net = ops.deconv2d(net, 3, [9, 9], stride=1, activation=tf.nn.tanh, scope='deconv3') return (net + 1) * 127.5
def testOverwriteArgScope(self): func1_args = (0, ) func1_kwargs = {'a': 1, 'b': 2, 'c': [1]} with scopes.arg_scope([func1], a=1, b=None, c=[1]): args, kwargs = func1(0, b=2) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testOverwriteArgScope(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': 2, 'c': [1]} with scopes.arg_scope([func1], a=1, b=None, c=[1]): args, kwargs = func1(0, b=2) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testVariableWithDeviceFunction(self): class DevFn(object): def __init__(self): self.counter = -1 def __call__(self, op): self.counter += 1 return 'cpu:%d' % self.counter with self.test_session(): with scopes.arg_scope([variables.variable], device=DevFn()): a = variables.variable('a', []) b = variables.variable('b', []) c = variables.variable('c', [], device='cpu:12') d = variables.variable('d', []) with tf.device('cpu:99'): e_init = tf.constant(12) e = variables.variable('e', initializer=e_init) self.assertDeviceEqual(a.device, 'cpu:0') self.assertDeviceEqual(a.initial_value.device, 'cpu:0') self.assertDeviceEqual(b.device, 'cpu:1') self.assertDeviceEqual(b.initial_value.device, 'cpu:1') self.assertDeviceEqual(c.device, 'cpu:12') self.assertDeviceEqual(c.initial_value.device, 'cpu:12') self.assertDeviceEqual(d.device, 'cpu:2') self.assertDeviceEqual(d.initial_value.device, 'cpu:2') self.assertDeviceEqual(e.device, 'cpu:3') self.assertDeviceEqual(e.initial_value.device, 'cpu:99')
def testSimpleArgScopeWithTuple(self): func1_args = (0, ) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with self.test_session(): with scopes.arg_scope((func1, ), a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testSimpleArgScopeWithTuple(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with self.test_session(): with scopes.arg_scope((func1,), a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testReuseFCWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height * width * 3), seed=1) with scopes.arg_scope([ops.fc], batch_norm_params={'decay': 0.9}): net = ops.fc(images, 27, scope='fc1') net = ops.fc(net, 27, scope='fc1', reuse=True) self.assertEqual(len(variables.get_variables()), 4) self.assertEqual(len(variables.get_variables('fc1/BatchNorm')), 3)
def testReuseFCWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height * width * 3), seed=1) with scopes.arg_scope([ops.fc], batch_norm_params={'decay': 0.9}): net = ops.fc(images, 27, scope='fc1') net = ops.fc(net, 27, scope='fc1', reuse=True) self.assertEquals(len(variables.get_variables()), 4) self.assertEquals(len(variables.get_variables('fc1/BatchNorm')), 3)
def nin_dssm(inputs, num_classes, num_of_exs, is_training=True, restore_logits=True, scope=''): # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.op_scope([inputs], scope, 'nin'): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm], is_training=is_training): # conv1 end_points['conv1'] = ops.conv2d(inputs,192,[5,5],scope='conv1') end_points['conv1_1'] = ops.conv2d(end_points['conv1'],160,[1,1],scope='conv1_1') end_points['conv1_2'] = ops.conv2d(end_points['conv1_1'],96,[1,1],scope='conv1_2') end_points['pool1'] = ops.max_pool(end_points['conv1_2'],[3,3],stride=2, padding='SAME',scope='pool1') net = ops.dropout(end_points['pool1'],0.5) # conv2 96*16*16 end_points['conv2'] = ops.conv2d(net,192,[5,5],scope='conv2') end_points['conv2_1'] = ops.conv2d(end_points['conv2'],192,[1,1],scope='conv2_1') end_points['conv2_2'] = ops.conv2d(end_points['conv2_1'],192,[1,1],scope='conv2_2') end_points['pool2'] = ops.max_pool(end_points['conv2_2'],[3,3],stride=2, padding='SAME',scope='pool2') net = ops.dropout(end_points['pool2'],0.5) # conv3 192*8*8 end_points['conv3'] = ops.conv2d(net,192,[3,3],scope='conv3') # 192 * 8 * 8 end_points['conv3_1'] = ops.conv2d(end_points['conv3'],192,[1,1],scope='conv3_1') # 192 * 8 * 8 #TODO using which layer feature? #firstly,consider conv3_1, and then consider fusion conv3 & conv3_1 end_points['max_pool'] = ops.max_pool(end_points['conv3_1'],[8,8],scope='max_pool') end_points['avg_pool'] = ops.avg_pool(end_points['conv3_1'],[8,8],scope='avg_pool') end_points['hybrid_pool'] = 0.9*end_points['max_pool'] + 0.1*end_points['avg_pool'] end_points['feature'] = tf.nn.l2_normalize(tf.squeeze(end_points['hybrid_pool']),dim=1) #OUTPUT (batch_size * num_negs_and_pos+1) * 192 ,eg. batch_size*3*192 imgs = tf.split(0,num_of_exs ,end_points['feature']) anchors = imgs[0] positives = imgs[1] rst=[tf.reduce_sum(tf.mul(anchors,positives),1)] for k in xrange(2,num_of_exs): rst.append(tf.reduce_sum(tf.mul(anchors,imgs[k]),1)) #batch*(negs-1) end_points['dssm'] = tf.concat(1,[tf.expand_dims(_,-1) for _ in rst]) end_points['conv3_2'] = ops.conv2d(end_points['conv3_1'],10,[1,1],scope='conv3_2') net = ops.avg_pool(end_points['conv3_2'],[8,8],scope='avg_pool') flatten = ops.flatten(net,scope='flatten') #TODO take care this,using num_classes but 10.. end_points['logits'] = ops.fc(flatten,num_classes,activation=None,scope='fc') return end_points['logits'],end_points['dssm'],end_points
def testConvWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height, width, 3), seed=1) with scopes.arg_scope([ops.conv2d], batch_norm_params={}): net = ops.conv2d(images, 32, [3, 3], scope='conv1') net = ops.conv2d(net, 32, [3, 3], scope='conv2') self.assertEquals(len(tf.get_collection('moving_vars')), 4) self.assertEquals(len(variables.get_variables('conv1/BatchNorm')), 3) self.assertEquals(len(variables.get_variables('conv2/BatchNorm')), 3)
def testFCWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height * width * 3), seed=1) with scopes.arg_scope([ops.fc], batch_norm_params={}): net = ops.fc(images, 32, scope='fc1') net = ops.fc(net, 32, scope='fc2') self.assertEquals(len(tf.get_collection('moving_vars')), 4) self.assertEquals(len(variables.get_variables('fc1/BatchNorm')), 3) self.assertEquals(len(variables.get_variables('fc2/BatchNorm')), 3)
def testFCWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height * width * 3), seed=1) with scopes.arg_scope([ops.fc], batch_norm_params={}): net = ops.fc(images, 27) net = ops.fc(net, 27) self.assertEquals(len(variables.get_variables()), 8) self.assertEquals(len(variables.get_variables('FC/BatchNorm')), 3) self.assertEquals(len(variables.get_variables('FC_1/BatchNorm')), 3)
def testReuseConvWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height, width, 32), seed=1) with scopes.arg_scope([ops.conv2d], batch_norm_params={'decay': 0.9}): net = ops.conv2d(images, 32, [3, 3], scope='Conv') net = ops.conv2d(net, 32, [3, 3], scope='Conv', reuse=True) self.assertEquals(len(variables.get_variables()), 4) self.assertEquals(len(variables.get_variables('Conv/BatchNorm')), 3) self.assertEquals(len(variables.get_variables('Conv_1/BatchNorm')), 0)
def testSharedArgScopeTuple(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with scopes.arg_scope((func1, func2), a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) args, kwargs = func2(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testSharedArgScopeTuple(self): func1_args = (0, ) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} with scopes.arg_scope((func1, func2), a=1, b=None, c=[1]): args, kwargs = func1(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) args, kwargs = func2(0) self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs)
def testFCWithBatchNorm(self): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height * width * 3), seed=1) with scopes.arg_scope([ops.fc], batch_norm_params={}): net = ops.fc(images, 27) net = ops.fc(net, 27) self.assertEqual(len(variables.get_variables()), 8) self.assertEqual(len(variables.get_variables('FC/BatchNorm')), 3) self.assertEqual(len(variables.get_variables('FC_1/BatchNorm')), 3)
def resnet(inputs, is_train=True, scope=''): batch_norm_params = { 'decay': 0.9997, 'epsilon': 0.001, } with scopes.arg_scope([ops.conv2d, ops.deconv2d], weight_decay=0.0005, stddev=0.1, activation=tf.nn.relu, batch_norm_params=batch_norm_params): return net(inputs, is_train, scope)
def testReplicaDeviceSetter(self): device_fn = tf.train.replica_device_setter(2) with tf.Graph().as_default(): with scopes.arg_scope([variables.global_step], device=device_fn): gs = variables.global_step() gs2 = variables.global_step() self.assertEquals(gs, gs2) self.assertDeviceEqual(gs.device, '/job:ps/task:0') self.assertDeviceEqual(gs.initial_value.device, '/job:ps/task:0') self.assertDeviceEqual(gs2.device, '/job:ps/task:0') self.assertDeviceEqual(gs2.initial_value.device, '/job:ps/task:0')
def testReplicaDeviceSetter(self): device_fn = tf.train.replica_device_setter(2) with tf.Graph().as_default(): with scopes.arg_scope([variables.global_step], device=device_fn): gs = variables.global_step() gs2 = variables.global_step() self.assertEquals(gs, gs2) self.assertDeviceEqual(gs.device, "/job:ps/task:0") self.assertDeviceEqual(gs.initial_value.device, "/job:ps/task:0") self.assertDeviceEqual(gs2.device, "/job:ps/task:0") self.assertDeviceEqual(gs2.initial_value.device, "/job:ps/task:0")
def conv2d(inputs, num_filters_out, kernel_size, stride=1, padding='SAME', activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse): kernel_h, kernel_w = _two_element_tuple(kernel_size) stride_h, stride_w = _two_element_tuple(stride) num_filters_in = inputs.get_shape()[-1] weights_shape = [kernel_h, kernel_w, num_filters_in, num_filters_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if batch_norm_params is not None: with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(conv, **batch_norm_params) else: bias_shape = [ num_filters_out, ] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.bias_add(conv, biases) if activation: outputs = activation(outputs) return outputs
def testVariableWithVariableDeviceChooser(self): with tf.Graph().as_default(): device_fn = variables.VariableDeviceChooser() with scopes.arg_scope([variables.global_step], device=device_fn): gs = variables.global_step() gs2 = variables.global_step() self.assertEquals(gs, gs2) self.assertDeviceEqual(gs.device, "cpu:0") self.assertDeviceEqual(gs.initial_value.device, gs.device) self.assertDeviceEqual(gs2.device, "cpu:0") self.assertDeviceEqual(gs2.initial_value.device, gs2.device)
def testVariableWithVariableDeviceChooser(self): with tf.Graph().as_default(): device_fn = variables.VariableDeviceChooser() with scopes.arg_scope([variables.global_step], device=device_fn): gs = variables.global_step() gs2 = variables.global_step() self.assertEquals(gs, gs2) self.assertDeviceEqual(gs.device, 'cpu:0') self.assertDeviceEqual(gs.initial_value.device, gs.device) self.assertDeviceEqual(gs2.device, 'cpu:0') self.assertDeviceEqual(gs2.initial_value.device, gs2.device)
def testReuseArgScopeNested(self): func1_kwargs = {'a': 1, 'b': None, 'c': [1]} func2_kwargs = {'b': 2, 'd': [2]} def key(f): return (f.__module__, f.__name__) current_scope1 = {key(func1): func1_kwargs.copy()} current_scope2 = { key(func1): func1_kwargs.copy(), key(func2): func2_kwargs.copy() } with self.test_session(): with scopes.arg_scope([func1], a=1, b=None, c=[1]) as scope1: with scopes.arg_scope([func2], b=2, d=[2]) as scope2: pass with scopes.arg_scope(scope1): self.assertDictEqual(scopes._current_arg_scope(), current_scope1) with scopes.arg_scope(scope2): self.assertDictEqual(scopes._current_arg_scope(), current_scope2)
def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, batch_norm_decay=0.9997, batch_norm_epsilon=0.001): """Yields the scope with the default parameters for inception_v3. Args: weight_decay: the weight decay for weights variables. stddev: standard deviation of the truncated guassian weight distribution. batch_norm_decay: decay for the moving average of batch_norm momentums. batch_norm_epsilon: small float added to variance to avoid dividing by zero. Yields: a arg_scope with the parameters needed for inception_v3. """ # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=weight_decay): # Set stddev, activation and parameters for batch_norm. with scopes.arg_scope([ops.conv2d], stddev=stddev, activation=tf.nn.relu, batch_norm_params={ 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon}) as arg_scope: yield arg_scope
def testDeviceFn(self): class DevFn(object): def __init__(self): self.counter = -1 def __call__(self, op): self.counter += 1 return '/cpu:%d' % self.counter with tf.Graph().as_default(): with scopes.arg_scope([variables.global_step], device=DevFn()): gs = variables.global_step() gs2 = variables.global_step() self.assertDeviceEqual(gs.device, '/cpu:0') self.assertEquals(gs, gs2) self.assertDeviceEqual(gs2.device, '/cpu:0')
def testDeviceFn(self): class DevFn(object): def __init__(self): self.counter = -1 def __call__(self, op): self.counter += 1 return "/cpu:%d" % self.counter with tf.Graph().as_default(): with scopes.arg_scope([variables.global_step], device=DevFn()): gs = variables.global_step() gs2 = variables.global_step() self.assertDeviceEqual(gs.device, "/cpu:0") self.assertEquals(gs, gs2) self.assertDeviceEqual(gs2.device, "/cpu:0")
def fc(inputs, num_units_out, activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): with tf.variable_scope(scope, 'FC', [inputs], reuse=reuse): num_units_in = inputs.get_shape()[1] weights_shape = [num_units_in, num_units_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) if batch_norm_params is not None: outputs = tf.matmul(inputs, weights) with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(outputs, **batch_norm_params) else: bias_shape = [ num_units_out, ] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.xw_plus_b(inputs, weights, biases) if activation: outputs = activation(outputs) return outputs
def testVariableGPUPlacement(self): with tf.Graph().as_default(): device_fn = variables.VariableDeviceChooser(placement="gpu:0") with scopes.arg_scope([variables.variable], device=device_fn): a = variables.variable("a", []) b = variables.variable("b", []) c = variables.variable("c", [], device="cpu:12") d = variables.variable("d", []) with tf.device("cpu:99"): e_init = tf.constant(12) e = variables.variable("e", initializer=e_init) # The values below highlight how the VariableDeviceChooser puts initial # values on the same device as the variable job. self.assertDeviceEqual(a.device, "/gpu:0") self.assertDeviceEqual(a.initial_value.device, a.device) self.assertDeviceEqual(b.device, "/gpu:0") self.assertDeviceEqual(b.initial_value.device, b.device) self.assertDeviceEqual(c.device, "/cpu:12") self.assertDeviceEqual(c.initial_value.device, c.device) self.assertDeviceEqual(d.device, "/gpu:0") self.assertDeviceEqual(d.initial_value.device, d.device) self.assertDeviceEqual(e.device, "/gpu:0") self.assertDeviceEqual(e.initial_value.device, "/cpu:99")
def testDevice(self): with tf.Graph().as_default(): with scopes.arg_scope([variables.global_step], device='/gpu:0'): gs = variables.global_step() self.assertDeviceEqual(gs.device, '/gpu:0')
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1001, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for op_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.op_scope([inputs], scope, 'baxNet'): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 256 x 256 x 3 end_points['conv0'] = ops.conv2d(inputs, 8, [5, 5], stride=1, scope='conv0', padding='SAME') end_points['batch_norm1'] = ops.batch_norm(end_points['conv0'], scope='batch_norm1') # 256 x 256 x 32 end_points['conv1'] = ops.conv2d(end_points['batch_norm1'], 16, [3, 3], scope='conv1', padding='SAME') end_points['batch_norm2'] = ops.batch_norm(end_points['conv1'], scope='batch_norm2') # 128 x 128 x 64 end_points['conv2'] = ops.conv2d(end_points['batch_norm2'], 16, [3, 3], scope='conv2', padding='SAME') end_points['batch_norm3'] = ops.batch_norm(end_points['conv2'], scope='batch_norm3') in_net = end_points['batch_norm3'] print('IN_NET SHAPE') print(in_net.get_shape()) curr_filters = 16 base_layer_num = [32,16,8,4] for i in xrange(1,5): for j in xrange(1,base_layer_num[i-1] + i): with tf.variable_scope('res%d_%d' % (i,j)): if (j < (base_layer_num[i-1] + i - 1)): curr_padding = 'SAME' curr_stride = 1 else: curr_filters = 2*curr_filters curr_padding = 'SAME' curr_stride = 2 conv1_1 = ops.conv2d(in_net, curr_filters, [3, 3], padding=curr_padding, stride=curr_stride, scope='conv1_1') batch_norm1_1 = ops.batch_norm(conv1_1, scope='batch_norm1_1') conv1_2 = ops.conv2d(batch_norm1_1, curr_filters, [3, 3], padding='SAME', scope='conv1_2') if (j < (base_layer_num[i-1] + i - 1)): combined = in_net + conv1_2 else: combined = ops.conv2d(in_net, curr_filters, [1, 1], padding='SAME', stride=2, scope='combined') combined = combined + conv1_2 print('DOWN SAMPLE') print(in_net.get_shape()) print(combined.get_shape()) batch_norm1_2 = ops.batch_norm(combined, scope='batch_norm1_2') in_net = batch_norm1_2 end_points['res%d_%d' %(i,j)] = in_net # for i in xrange(1,int(np.log2(in_net.get_shape()[1])) + 1): # print('SHAPPEEEE') print(in_net.get_shape()) for i in xrange(1,3): with tf.variable_scope('res_final%d' % i): conv1_1 = ops.conv2d(in_net, curr_filters, [3, 3], padding='SAME', stride=2, scope='conv1_1') batch_norm1_1 = ops.batch_norm(conv1_1, scope='batch_norm1_1') conv1_2 = ops.conv2d(batch_norm1_1, curr_filters, [3, 3], padding='SAME', scope='conv1_2') combined = ops.conv2d(in_net, curr_filters, [1, 1], padding='SAME', stride=2, scope='combined') combined = combined + conv1_2 batch_norm1_2 = ops.batch_norm(combined, scope='batch_norm1_2') in_net = batch_norm1_2 end_points['res_final%d' % i] = in_net with tf.variable_scope('logits'): shape = in_net.get_shape() print('FINAL SHAPE') print(shape) if (shape[1] > 1): in_net = ops.avg_pool(in_net, shape[1:3], padding='VALID', scope='avg_pool') in_net = ops.flatten(in_net, scope='flatten') logits = ops.fc(in_net, num_classes, activation=None, scope='logits', restore=restore_logits) end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax(logits, name='predictions') return logits, end_points
def batch_norm(inputs, decay=0.999, scale=False, epsilon=0.001, moving_vars='moving_vars', activation=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a Batch Normalization layer. Args: inputs: a tensor of size [batch_size, height, width, channels] or [batch_size, channels]. decay: decay for the moving average. scale: If True, multiply by gamma. If False, gamma is not used. When the next layer is linear (also e.g. ReLU), this can be disabled since the scaling can be done by the next layer. epsilon: small float added to variance to avoid dividing by zero. moving_vars: collection to store the moving_mean and moving_variance. activation: activation function. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: a tensor representing the output of the operation. """ inputs_shape = inputs.get_shape() with tf.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse): axis = range(len(inputs_shape) - 1) params_shape = inputs_shape[-1:] with scopes.arg_scope([variables.variable], restore=restore): # Allocate parameters for the beta and gamma of the normalization. beta = variables.variable('beta', params_shape, initializer=tf.zeros_initializer, trainable=trainable) if scale: gamma = variables.variable('gamma', params_shape, initializer=tf.ones, trainable=trainable) else: gamma = None # Create moving_mean and moving_variance add them to moving_vars and # GraphKeys.MOVING_AVERAGE_VARIABLES collections. with scopes.arg_scope([variables.variable], trainable=False, collections=[ moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES]): moving_mean = variables.variable('moving_mean', params_shape, initializer=tf.zeros_initializer) moving_variance = variables.variable('moving_variance', params_shape, initializer=tf.ones) if is_training: # Calculate the moments based on the individual batch. mean, variance = tf.nn.moments(inputs, axis) update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) else: # Just use the moving_mean and moving_variance. mean = moving_mean variance = moving_variance # Normalize the activations. outputs = tf.nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) outputs.set_shape(inputs.get_shape()) if activation: outputs = activation(outputs) return outputs
def fc(inputs, num_units_out, activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a fully connected layer followed by an optional batch_norm layer. FC creates a variable called 'weights', representing the fully connected weight matrix, that is multiplied by the input. If `batch_norm` is None, a second variable called 'biases' is added to the result of the initial vector-matrix multiplication. Args: inputs: a [B x N] tensor where B is the batch size and N is the number of input units in the layer. num_units_out: the number of output units in the layer. activation: activation function. stddev: the standard deviation for the weights. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: the tensor variable representing the result of the series of operations. """ with tf.variable_op_scope([inputs], scope, 'FC', reuse=reuse): num_units_in = inputs.get_shape()[1] weights_shape = [num_units_in, num_units_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) if batch_norm_params is not None: outputs = tf.matmul(inputs, weights) with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(outputs, **batch_norm_params) else: bias_shape = [num_units_out,] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.xw_plus_b(inputs, weights, biases) if activation: outputs = activation(outputs) return outputs
def conv2d(inputs, num_filters_out, kernel_size, stride=1, padding='SAME', activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a 2D convolution followed by an optional batch_norm layer. conv2d creates a variable called 'weights', representing the convolutional kernel, that is convolved with the input. If `batch_norm_params` is None, a second variable called 'biases' is added to the result of the convolution operation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_filters_out: the number of output filters. kernel_size: a list of length 2: [kernel_height, kernel_width] of of the filters. Can be an int if both values are the same. stride: a list of length 2: [stride_height, stride_width]. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of 'VALID' or 'SAME'. activation: activation function. stddev: standard deviation of the truncated guassian weight distribution. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: a tensor representing the output of the operation. """ with tf.variable_op_scope([inputs], scope, 'Conv', reuse=reuse): kernel_h, kernel_w = _two_element_tuple(kernel_size) stride_h, stride_w = _two_element_tuple(stride) num_filters_in = inputs.get_shape()[-1] weights_shape = [kernel_h, kernel_w, num_filters_in, num_filters_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if batch_norm_params is not None: with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(conv, **batch_norm_params) else: bias_shape = [num_filters_out,] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.bias_add(conv, biases) if activation: outputs = activation(outputs) return outputs
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1000, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for name_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.name_scope(scope, 'inception_v3', [inputs]): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 299 x 299 x 3 end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, scope='conv0') # 149 x 149 x 32 end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], scope='conv1') # 147 x 147 x 32 end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], padding='SAME', scope='conv2') # 147 x 147 x 64 end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], stride=2, scope='pool1') # 73 x 73 x 64 end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], scope='conv3') # 73 x 73 x 80. end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], scope='conv4') # 71 x 71 x 192. end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], stride=2, scope='pool2') # 35 x 35 x 192. net = end_points['pool2'] # Inception blocks with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. with tf.variable_scope('mixed_35x35x256a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x256a'] = net # mixed_1: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288a'] = net # mixed_2: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288b'] = net # mixed_3: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_17x17x768a'] = net # mixed4: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 128, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 128, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768b'] = net # mixed_5: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768c'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768c'] = net # mixed_6: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768d'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768d'] = net # mixed_7: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768e'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 192, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 192, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768e'] = net # Auxiliary Head logits aux_logits = tf.identity(end_points['mixed_17x17x768e']) with tf.variable_scope('aux_logits'): aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') # Shape of feature map before the final layer. shape = aux_logits.get_shape() aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, padding='VALID') aux_logits = ops.flatten(aux_logits) aux_logits = ops.fc(aux_logits, num_classes, activation=None, stddev=0.001, restore=restore_logits) end_points['aux_logits'] = aux_logits # mixed_8: 8 x 8 x 1280. # Note that the scope below is not changed to not void previous # checkpoints. # (TODO) Fix the scope when appropriate. with tf.variable_scope('mixed_17x17x1280a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 192, [1, 1]) branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch7x7x3'): branch7x7x3 = ops.conv2d(net, 192, [1, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3) end_points['mixed_17x17x1280a'] = net # mixed_9: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1])], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1])], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048a'] = net # mixed_10: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1])], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1])], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048b'] = net # Final pooling and prediction with tf.variable_scope('logits'): shape = net.get_shape() net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') # 1 x 1 x 2048 net = ops.dropout(net, dropout_keep_prob, scope='dropout') net = ops.flatten(net, scope='flatten') # 2048 logits = ops.fc(net, num_classes, activation=None, scope='logits', restore=restore_logits) # 1000 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax(logits, name='predictions') return logits, end_points