def __init__(self, nclass, pretrained_base=True, input_channel=3, partial_bn=True, dropout_ratio=0.8, init_std=0.001, feat_dim=2048, num_segments=1, num_crop=1, **kwargs): super(ActionRecInceptionV3, self).__init__() self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = feat_dim pretrained_model = inception_v3(pretrained=pretrained_base, partial_bn=partial_bn, **kwargs) inception_features = pretrained_model.features if input_channel == 3: self.features = inception_features else: self.features = nn.HybridSequential(prefix='') with pretrained_model.name_scope(): if 'norm_layer' not in dir(): norm_layer = nn.BatchNorm else: if norm_layer is None: norm_layer = nn.BatchNorm self.features.add( _make_basic_conv( in_channels=input_channel, channels=32, kernel_size=3, strides=2, norm_layer=norm_layer, norm_kwargs=None, weight_initializer=mx.init.Xavier(magnitude=2))) self.features[0].initialize() for layer in inception_features[1:]: self.features.add(layer) def update_dropout_ratio(block): if isinstance(block, nn.basic_layers.Dropout): block._rate = self.dropout_ratio self.apply(update_dropout_ratio) self.output = nn.Dense( units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output.initialize()
def __init__(self, nclass=1000, pretrained=False, pretrained_base=True, num_segments=1, num_crop=1, dropout_ratio=0.5, init_std=0.01, partial_bn=False, ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(I3D_InceptionV3, self).__init__(**kwargs) self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = 2048 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( _make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add( _make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( _make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add( _make_basic_conv(in_channels=64, channels=80, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( _make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs)) self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs)) self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs)) self.features.add(_make_B('B_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs)) self.features.add(_make_D('D_', norm_layer, norm_kwargs)) self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs)) self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense( units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base and not pretrained: inceptionv3_2d = inception_v3(pretrained=True) weights2d = inceptionv3_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len( weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to( temporal_2d, shape=[0, 0, temporal_dim, 0, 0 ]) / temporal_dim assert inflated_2d.shape == weights3d[ key3d].shape, 'the shape of %s and %s does not match. ' % ( key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[ key3d].shape, 'the shape of %s and %s does not match. ' % ( key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len( weights2d.keys() ), 'Not all parameters have been ported, check the initialization.'