def __init__(self, block, layers, channels, classes=1000, thumbnail=False, caption_length=50, **kwargs): super(ResNetV1, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.caption_length = caption_length self.features = nn.HybridSequential(prefix='') if thumbnail: self.features.add(_conv3x3(channels[0], 1, 0)) else: self.features.add( nn.Conv3D(channels[0], 7, 2, 3, use_bias=False)) self.features.add(nn.BatchNorm()) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool3D(3, 2, 1)) for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=channels[i])) self.features.add(nn.GlobalAvgPool3D()) #self.features.add(nn.Dense(classes, in_units=in_channels)) self.output = nn.Dense(caption_length * caption_length)
def __init__(self, block, layers, channels, classes=400, **kwargs): super(R21DV1, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( _conv21d(channels[0], [3, 7, 7], strides=[1, 2, 2], padding=[1, 3, 3], mid_channels=45, prefix='init_')) self.features.add( nn.BatchNorm(epsilon=1e-3, momentum=0.9, use_global_stats=True, prefix='init_')) self.features.add(nn.LeakyReLU(0.0)) for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=channels[i])) self.avg = nn.GlobalAvgPool3D() self.dense = nn.Dense(classes, in_units=channels[-1])
def __init__(self, nclass, block, layers, dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, use_lateral=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(R2Plus1D, self).__init__() self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.use_lateral = use_lateral self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer(in_channels=45, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.conv2 = conv3x1x1(in_planes=45, out_planes=64) self.bn2 = norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool3D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
def test_pool(): layers1d = [ nn.MaxPool1D(), nn.MaxPool1D(3), nn.MaxPool1D(3, 2), nn.AvgPool1D(), nn.AvgPool1D(count_include_pad=False), nn.GlobalAvgPool1D(), ] for layer in layers1d: check_layer_forward(layer, (1, 2, 10)) layers2d = [ nn.MaxPool2D(), nn.MaxPool2D((3, 3)), nn.MaxPool2D(3, 2), nn.AvgPool2D(), nn.AvgPool2D(count_include_pad=False), nn.GlobalAvgPool2D(), ] for layer in layers2d: check_layer_forward(layer, (1, 2, 10, 10)) layers3d = [ nn.MaxPool3D(), nn.MaxPool3D((3, 3, 3)), nn.MaxPool3D(3, 2), nn.AvgPool3D(), nn.AvgPool3D(count_include_pad=False), nn.GlobalAvgPool3D(), ] for layer in layers3d: check_layer_forward(layer, (1, 2, 10, 10, 10)) # test ceil_mode x = mx.nd.zeros((2, 2, 10, 10)) layer = nn.MaxPool2D(3, ceil_mode=False) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 3, 3)) layer = nn.MaxPool2D(3, ceil_mode=True) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 4, 4))
def __init__(self, nclass, depth, num_stages=4, pretrained_base=True, num_segments=1, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, inflate_freq=(1, 1, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 1, 1, 0), nonlocal_cfg=None, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(I3D_ResNetV1, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.nclass = nclass self.depth = depth self.num_stages = num_stages self.pretrained_base = pretrained_base self.num_segments = num_segments self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.frozen_stages = frozen_stages self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.first_stage = nn.HybridSequential(prefix='') self.first_stage.add( nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7), strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), use_bias=False)) self.first_stage.add( norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))) self.first_stage.add(nn.Activation('relu')) self.first_stage.add( nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1))) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=(0, 0, 0)) self.res_layers = nn.HybridSequential(prefix='') for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i layer_name = 'layer{}_'.format(i + 1) res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name=layer_name) self.inplanes = planes * self.block.expansion self.res_layers.add(res_layer) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed. # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g. # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0) # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner. self.st_avg = nn.GlobalAvgPool3D() self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.fc) self.init_weights()
def __init__(self, nclass, block=Bottleneck, layers=None, pretrained=False, pretrained_base=False, num_segments=1, num_crop=1, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, alpha=8, beta_inv=8, fusion_conv_channel_ratio=2, fusion_kernel_size=5, width_per_group=64, num_groups=1, slow_temporal_stride=16, fast_temporal_stride=2, slow_frames=4, fast_frames=32, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(SlowFast, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.dropout_ratio = dropout_ratio self.init_std = init_std self.alpha = alpha self.beta_inv = beta_inv self.fusion_conv_channel_ratio = fusion_conv_channel_ratio self.fusion_kernel_size = fusion_kernel_size self.width_per_group = width_per_group self.num_groups = num_groups self.dim_inner = self.num_groups * self.width_per_group self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio self.slow_temporal_stride = slow_temporal_stride self.fast_temporal_stride = fast_temporal_stride self.slow_frames = slow_frames self.fast_frames = fast_frames with self.name_scope(): # build fast pathway fast = nn.HybridSequential(prefix='fast_') with fast.name_scope(): self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv, kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False) self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv, **({} if norm_kwargs is None else norm_kwargs)) self.fast_relu = nn.Activation('relu') self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv, planes=self.dim_inner // self.beta_inv, num_blocks=layers[0], head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res2_') self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv, planes=self.dim_inner * 2 // self.beta_inv, num_blocks=layers[1], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res3_') self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv, planes=self.dim_inner * 4 // self.beta_inv, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res4_') self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv, planes=self.dim_inner * 8 // self.beta_inv, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res5_') # build lateral connections self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_') with self.lateral_p1.name_scope(): self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv, channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_p1.add(nn.Activation('relu')) self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_') with self.lateral_res2.name_scope(): self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv, channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res2.add(nn.Activation('relu')) self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_') with self.lateral_res3.name_scope(): self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv, channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res3.add(nn.Activation('relu')) self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_') with self.lateral_res4.name_scope(): self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv, channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res4.add(nn.Activation('relu')) # build slow pathway slow = nn.HybridSequential(prefix='slow_') with slow.name_scope(): self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.slow_bn1 = norm_layer(in_channels=self.width_per_group, **({} if norm_kwargs is None else norm_kwargs)) self.slow_relu = nn.Activation('relu') self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio, planes=self.dim_inner, num_blocks=layers[0], head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res2_') self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio, planes=self.dim_inner * 2, num_blocks=layers[1], strides=2, head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res3_') self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio, planes=self.dim_inner * 4, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res4_') self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio, planes=self.dim_inner * 8, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res5_') # build classifier self.avg = nn.GlobalAvgPool3D() self.dp = nn.Dropout(rate=self.dropout_ratio) self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32 self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True) self.initialize(init.MSRAPrelu(), ctx=ctx)
def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1, norm_kwargs=None, partial_bn=False, pretrained_base=True, dropout_ratio=0.5, init_std=0.01, ctx=None, **kwargs): super(I3D_InceptionV1, self).__init__(**kwargs) self.num_segments = num_segments self.feat_dim = 1024 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1))) self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2))) self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base: inceptionv1_2d = googlenet(pretrained=True) weights2d = inceptionv1_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
def __init__(self, nclass=1000, pretrained=False, pretrained_base=True, num_segments=1, num_crop=1, feat_ext=False, dropout_ratio=0.5, init_std=0.01, partial_bn=False, ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(I3D_InceptionV3, self).__init__(**kwargs) self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = 2048 self.dropout_ratio = dropout_ratio self.init_std = init_std self.feat_ext = feat_ext with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_basic_conv(in_channels=64, channels=80, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs)) self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs)) self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs)) self.features.add(_make_B('B_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs)) self.features.add(_make_D('D_', norm_layer, norm_kwargs)) self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs)) self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base and not pretrained: inceptionv3_2d = inception_v3(pretrained=True) weights2d = inceptionv3_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'