def i3d_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'): """Defines the I3D base architecture. Note that we use the names as defined in Inception V1 to facilitate checkpoint conversion from an image-trained Inception V1 checkpoint to I3D checkpoint. Args: inputs: A 5-D float tensor of size [batch_size, num_frames, height, width, channels]. final_endpoint: Specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] scope: Optional variable_scope. Returns: A dictionary from components of the network to the corresponding activation. Raises: ValueError: if final_endpoint is not set to one of the predefined values. """ return s3dg.s3dg_base(inputs, first_temporal_kernel_size=7, temporal_conv_startat='Conv2d_2c_3x3', gating_startat=None, final_endpoint=final_endpoint, min_depth=16, depth_multiplier=1.0, data_format='NDHWC', scope=scope)
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 num_frames = 64 height, width = 224, 224 inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) _, end_points = s3dg.s3dg_base(inputs, final_endpoint='Mixed_5c') endpoints_shapes = {'Conv2d_1a_7x7': [5, 32, 112, 112, 64], 'MaxPool_2a_3x3': [5, 32, 56, 56, 64], 'Conv2d_2b_1x1': [5, 32, 56, 56, 64], 'Conv2d_2c_3x3': [5, 32, 56, 56, 192], 'MaxPool_3a_3x3': [5, 32, 28, 28, 192], 'Mixed_3b': [5, 32, 28, 28, 256], 'Mixed_3c': [5, 32, 28, 28, 480], 'MaxPool_4a_3x3': [5, 16, 14, 14, 480], 'Mixed_4b': [5, 16, 14, 14, 512], 'Mixed_4c': [5, 16, 14, 14, 512], 'Mixed_4d': [5, 16, 14, 14, 512], 'Mixed_4e': [5, 16, 14, 14, 528], 'Mixed_4f': [5, 16, 14, 14, 832], 'MaxPool_5a_2x2': [5, 8, 7, 7, 832], 'Mixed_5b': [5, 8, 7, 7, 832], 'Mixed_5c': [5, 8, 7, 7, 1024]} self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name, expected_shape in endpoints_shapes.iteritems(): self.assertTrue(endpoint_name in end_points) self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), expected_shape)
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 num_frames = 64 height, width = 224, 224 inputs = tf.random.uniform((batch_size, num_frames, height, width, 3)) _, end_points = s3dg.s3dg_base(inputs, final_endpoint='Mixed_5c') endpoints_shapes = { 'Conv2d_1a_7x7': [5, 32, 112, 112, 64], 'MaxPool_2a_3x3': [5, 32, 56, 56, 64], 'Conv2d_2b_1x1': [5, 32, 56, 56, 64], 'Conv2d_2c_3x3': [5, 32, 56, 56, 192], 'MaxPool_3a_3x3': [5, 32, 28, 28, 192], 'Mixed_3b': [5, 32, 28, 28, 256], 'Mixed_3c': [5, 32, 28, 28, 480], 'MaxPool_4a_3x3': [5, 16, 14, 14, 480], 'Mixed_4b': [5, 16, 14, 14, 512], 'Mixed_4c': [5, 16, 14, 14, 512], 'Mixed_4d': [5, 16, 14, 14, 512], 'Mixed_4e': [5, 16, 14, 14, 528], 'Mixed_4f': [5, 16, 14, 14, 832], 'MaxPool_5a_2x2': [5, 8, 7, 7, 832], 'Mixed_5b': [5, 8, 7, 7, 832], 'Mixed_5c': [5, 8, 7, 7, 1024] } self.assertItemsEqual(list(endpoints_shapes.keys()), list(end_points.keys())) for endpoint_name, expected_shape in six.iteritems(endpoints_shapes): self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape)
def i3d_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'): """Defines the I3D base architecture. Note that we use the names as defined in Inception V1 to facilitate checkpoint conversion from an image-trained Inception V1 checkpoint to I3D checkpoint. Args: inputs: A 5-D float tensor of size [batch_size, num_frames, height, width, channels]. final_endpoint: Specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] scope: Optional variable_scope. Returns: A dictionary from components of the network to the corresponding activation. Raises: ValueError: if final_endpoint is not set to one of the predefined values. """ return s3dg.s3dg_base( inputs, first_temporal_kernel_size=7, temporal_conv_startat='Conv2d_2c_3x3', gating_startat=None, final_endpoint=final_endpoint, min_depth=16, depth_multiplier=1.0, data_format='NDHWC', scope=scope)
def testTenFrames(self): batch_size = 5 num_frames = 10 height, width = 224, 224 inputs = tf.random.uniform((batch_size, num_frames, height, width, 3)) mixed_5c, _ = s3dg.s3dg_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 2, 7, 7, 1024])
def testHalfSizeImages(self): batch_size = 5 num_frames = 64 height, width = 112, 112 inputs = tf.random.uniform((batch_size, num_frames, height, width, 3)) mixed_5c, _ = s3dg.s3dg_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 8, 4, 4, 1024])
def testTenFrames(self): batch_size = 5 num_frames = 10 height, width = 224, 224 inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) mixed_5c, _ = s3dg.s3dg_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 2, 7, 7, 1024])
def testHalfSizeImages(self): batch_size = 5 num_frames = 64 height, width = 112, 112 inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) mixed_5c, _ = s3dg.s3dg_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 8, 4, 4, 1024])
def testBuildBaseNetwork(self): batch_size = 5 num_frames = 64 height, width = 224, 224 inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) mixed_6c, end_points = s3dg.s3dg_base(inputs) self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_6c.get_shape().as_list(), [batch_size, 8, 7, 7, 1024]) expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpointNoGating(self): batch_size = 5 num_frames = 64 height, width = 224, 224 endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] for index, endpoint in enumerate(endpoints): with tf.Graph().as_default(): inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) out_tensor, end_points = s3dg.s3dg_base( inputs, final_endpoint=endpoint, gating_startat=None) print(endpoint, out_tensor.op.name) self.assertTrue(out_tensor.op.name.startswith( 'InceptionV1/' + endpoint)) self.assertItemsEqual(endpoints[:index+1], end_points)