def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BottleneckV2, self).__init__(**kwargs) self.bn1 = nn.BatchNorm() self.conv1 = nn.Conv3D(channels // 4, kernel_size=1, strides=1, use_bias=False) self.bn2 = nn.BatchNorm() self.conv2 = _conv3x3(channels // 4, stride, channels // 4) self.bn3 = nn.BatchNorm() self.conv3 = nn.Conv3D(channels, kernel_size=1, strides=1, use_bias=False) if downsample: self.downsample = nn.Conv3D(channels, 1, stride, use_bias=False, in_channels=in_channels) else: self.downsample = None
def __init__(self, inplanes, planes, strides=1, downsample=None, head_conv=1, norm_layer=BatchNorm, norm_kwargs=None, layer_name=''): super(Bottleneck, self).__init__() bottleneck = nn.HybridSequential(prefix=layer_name) with bottleneck.name_scope(): if head_conv == 1: self.conv1 = nn.Conv3D(in_channels=inplanes, channels=planes, kernel_size=1, use_bias=False) self.bn1 = norm_layer(in_channels=planes, **({} if norm_kwargs is None else norm_kwargs)) elif head_conv == 3: self.conv1 = nn.Conv3D(in_channels=inplanes, channels=planes, kernel_size=(3, 1, 1), padding=(1, 0, 0), use_bias=False) self.bn1 = norm_layer(in_channels=planes, **({} if norm_kwargs is None else norm_kwargs)) else: raise ValueError("Unsupported head_conv!") self.conv2 = nn.Conv3D(in_channels=planes, channels=planes, kernel_size=(1, 3, 3), strides=(1, strides, strides), padding=(0, 1, 1), use_bias=False) self.bn2 = norm_layer(in_channels=planes, **({} if norm_kwargs is None else norm_kwargs)) self.conv3 = nn.Conv3D(in_channels=planes, channels=planes * self.expansion, kernel_size=1, strides=1, use_bias=False) self.bn3 = norm_layer(in_channels=planes * self.expansion, gamma_initializer='zeros', **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.downsample = downsample
def __init__(self, in_channel, **kwargs): super(TCL, self).__init__() self.branch1 = nn.HybridSequential() self.branch1.add( nn.Conv3D(in_channels=in_channel, channels=32, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), # nn.BatchNorm(), nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1))) self.branch2 = nn.HybridSequential() self.branch2.add( nn.Conv3D(in_channels=in_channel, channels=32, kernel_size=(5, 1, 1), strides=(1, 1, 1), padding=(2, 0, 0), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), # nn.BatchNorm(), nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1)))
def __init__(self, in_channel,out_channel,spatial_stride=1,temporal_stride=1,**kwargs): super(Res21D_Block,self).__init__() self.MidChannel1 = int( (27*in_channel*out_channel) / (9*in_channel + 3*out_channel) ) self.MidChannel2 = int( (27*out_channel*out_channel) / (12 * out_channel) ) self.conv1_2D = nn.Conv3D(in_channels=in_channel, channels=self.MidChannel1, kernel_size=(1,3,3), strides=(1,spatial_stride,spatial_stride),padding=(0,1,1),weight_initializer=init.Xavier(),bias_initializer='zero') self.bn1_2D = nn.BatchNorm(in_channels=self.MidChannel1) self.conv1_1D = nn.Conv3D(in_channels=self.MidChannel1, channels=out_channel, kernel_size=(3,1,1), strides=(temporal_stride,1,1),padding=(1,0,0),weight_initializer=init.Xavier(),bias_initializer='zero') self.bn1_1D = nn.BatchNorm(in_channels=out_channel) self.conv2_2D = nn.Conv3D(in_channels=out_channel, channels=self.MidChannel2, kernel_size=(1,3,3), strides=(1,1,1),padding=(0,1,1),weight_initializer=init.Xavier(),bias_initializer='zero') self.bn2_2D = nn.BatchNorm(in_channels=self.MidChannel2) self.conv2_1D = nn.Conv3D(in_channels=self.MidChannel2, channels=out_channel, kernel_size=(3,1,1),strides=(1,1,1), padding=(1,0,0),weight_initializer=init.Xavier(),bias_initializer='zero') self.bn2_1D = nn.BatchNorm(in_channels=out_channel) self.relu = nn.Activation('relu') if in_channel != out_channel or spatial_stride != 1 or temporal_stride != 1: self.down_sample = nn.HybridSequential() self.down_sample.add( nn.Conv3D(in_channels=in_channel, channels=out_channel, kernel_size=(1,1,1), strides=(temporal_stride,spatial_stride,spatial_stride),weight_initializer=init.Xavier(),use_bias=False), nn.BatchNorm(in_channels=out_channel) ) else: self.down_sample = None
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BottleneckV1, self).__init__(**kwargs) self.body = nn.HybridSequential(prefix='') self.body.add(nn.Conv3D(channels // 4, kernel_size=1, strides=stride)) self.body.add(nn.BatchNorm()) self.body.add(nn.Activation('relu')) self.body.add(_conv3x3(channels // 4, 1, channels // 4)) self.body.add(nn.BatchNorm()) self.body.add(nn.Activation('relu')) self.body.add(nn.Conv3D(channels, kernel_size=1, strides=1)) self.body.add(nn.BatchNorm()) if downsample: self.downsample = nn.HybridSequential(prefix='') self.downsample.add( nn.Conv3D(channels, kernel_size=1, strides=stride, use_bias=False, in_channels=in_channels)) self.downsample.add(nn.BatchNorm()) else: self.downsample = None
def __init__(self, inplanes, planes, midplanes, stride=1, padding=1, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(Conv2Plus1D, self).__init__() with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=inplanes, channels=midplanes, kernel_size=(1, 3, 3), strides=(1, stride, stride), padding=(0, padding, padding), use_bias=False) self.bn1 = norm_layer( in_channels=midplanes, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.conv2 = nn.Conv3D(in_channels=midplanes, channels=planes, kernel_size=(3, 1, 1), strides=(stride, 1, 1), padding=(padding, 0, 0), use_bias=False)
def get_R2plus1d(num_class=101, no_bias=0, model_depth=18, final_spatial_kernel=7, final_temporal_kernel=4): comp_count = 0 net = nn.Sequential() net.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3)), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0)), nn.BatchNorm(), nn.Activation(activation='relu')) (n1, n2, n3, n4) = BLOCK_CONFIG[model_depth] # conv_2x for _ in range(n1): net.add(R3DBlock(input_filter=64, num_filter=64, comp_index=comp_count)) comp_count += 1 #conv_3x net.add( R3DBlock(input_filter=64, num_filter=128, comp_index=comp_count, downsampling=True)) comp_count += 1 for _ in range(n2 - 1): net.add( R3DBlock(input_filter=128, num_filter=128, comp_index=comp_count)) comp_count += 1 #conv_4x net.add(R3DBlock(128, 256, comp_index=comp_count, downsampling=True)) comp_count += 1 for _ in range(n3 - 1): net.add(R3DBlock(256, 256)) comp_count += 1 #conv_5x net.add(R3DBlock(256, 512, comp_index=comp_count, downsampling=True)) for _ in range(n4 - 1): net.add(R3DBlock(512, 512, comp_count)) comp_count += 1 # final layers net.add( nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0))) net.add(nn.Dense(units=num_class)) #,activation='sigmoid',use_bias=True)) return net
def __init__(self, growth_rate, **kwargs): super(BottConvBlock, self).__init__(**kwargs) bott_channels = 2 * growth_rate self.ops = nn.HybridSequential() self.ops.add( nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=bott_channels, kernel_size=1, use_bias=False), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=bott_channels, kernel_size=3, strides=1, padding=1, use_bias=False))
def __init__(self, channels_num, **kwargs): super(BlockV1, self).__init__(**kwargs) self.body = nn.HybridSequential(prefix='') self.body.add( nn.BatchNorm(epsilon=0.0001), nn.Activation('relu'), nn.Conv3D(channels=channels_num, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1)), nn.BatchNorm(epsilon=0.0001), nn.Activation('relu'), nn.Conv3D(channels=channels_num, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1)))
def __init__(self,**kwargs): super(Refiner_hybrid,self).__init__(**kwargs) self.layer1 = nn.HybridSequential() self.layer1.add( nn.Conv3D(32, kernel_size=4, padding=2), nn.BatchNorm(in_channels=32), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer2 = nn.HybridSequential() self.layer2.add( nn.Conv3D(64, kernel_size=4, padding=2), nn.BatchNorm(in_channels=64), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer3 = nn.HybridSequential() self.layer3.add( nn.Conv3D(128, kernel_size=4, padding=2), nn.BatchNorm(in_channels=128), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer4 = nn.HybridSequential() self.layer4.add( nn.Dense(2048,activation = 'relu') ) self.layer5 = nn.HybridSequential() self.layer5.add( nn.Dense(8192,activation='relu') ) self.layer6 = nn.HybridSequential() self.layer6.add( nn.Conv3DTranspose(64, kernel_size=4, strides=2, padding=1, use_bias=False ), nn.BatchNorm(in_channels = 64), nn.Activation('relu') ) self.layer7 = nn.HybridSequential() self.layer7.add( nn.Conv3DTranspose(32, kernel_size=4, strides=2, padding=1, use_bias=False), nn.BatchNorm(in_channels =32), nn.Activation('relu') ) self.layer8 = nn.HybridSequential() self.layer8.add( nn.Conv3DTranspose(1, kernel_size=4, strides=2, padding=1, use_bias=False), nn.Activation('sigmoid') )
def __init__(self, input_filter, num_filter, comp_index=-1, downsampling=False, spation_batch_norm=True, only_spatial_downsampling=False, use_bias=False): super(R3DBlock, self).__init__() if comp_index is -1: print("error construct a residual block") if downsampling: self.use_striding = [1, 2, 2 ] if only_spatial_downsampling else [2, 2, 2] else: self.use_striding = [1, 1, 1] self.spatial_temporal_conv1 = get_spatial_temporal_conv( input_filter, num_filter, self.use_striding, use_bias=use_bias) self.bn1 = nn.BatchNorm() self.relu1 = nn.Activation(activation='relu') self.spatial_temporal_conv2 = get_spatial_temporal_conv( num_filter, num_filter, stride=[1, 1, 1], use_bias=use_bias) self.bn2 = nn.BatchNorm() self.num_filter = num_filter self.input_filter = input_filter self.downsampling = downsampling if num_filter != input_filter or downsampling: self.branch_conv = nn.Conv3D(channels=num_filter, kernel_size=[1, 1, 1], strides=self.use_striding, use_bias=use_bias) self.branch_bn = nn.BatchNorm()
def __init__(self, block, layers, channels, classes=1000, thumbnail=False, caption_length=50, **kwargs): super(ResNetV1, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.caption_length = caption_length self.features = nn.HybridSequential(prefix='') if thumbnail: self.features.add(_conv3x3(channels[0], 1, 0)) else: self.features.add( nn.Conv3D(channels[0], 7, 2, 3, use_bias=False)) self.features.add(nn.BatchNorm()) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool3D(3, 2, 1)) for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=channels[i])) self.features.add(nn.GlobalAvgPool3D()) #self.features.add(nn.Dense(classes, in_units=in_channels)) self.output = nn.Dense(caption_length * caption_length)
def _conv3x3(channels, stride, in_channels): return nn.Conv3D(channels, kernel_size=3, strides=stride, padding=1, use_bias=False, in_channels=in_channels)
def add(self, layers, kernel, stride, dilation, channel): for l in range(layers): self.encoder.append(nn.Conv3D(channel, kernel_size = [kernel, 1, 1], strides = [stride, 1, 1], padding = [dilation, 0, 0], dilation = [dilation, 1, 1])) conv = nn.Conv3D if 'bottleneck' in self.arch else nn.Conv3DTranspose self.decoder.insert(0, conv(channel, kernel_size = [kernel, 1, 1], strides = [stride, 1, 1], padding = [dilation, 0, 0], dilation = [dilation, 1, 1])) self.register_child(self.encoder[-1]) self.register_child(self.decoder[0]) if self.reconstruct: assert('bottleneck' not in self.arch) assert('encoder' in self.arch and 'decoder' in self.arch) channel = channel if l != layers - 1 else self.feature self.reconstructor.append(nn.Conv3DTranspose(channel, kernel_size = [kernel, 1, 1], strides = [stride, 1, 1], padding = [dilation, 0, 0], dilation = [dilation, 1, 1])) self.register_child(self.reconstructor[-1]) if self.norm: self.enorm.append(self.block['norm'](axis = 2)) self.dnorm.append(self.block['norm'](axis = 2)) self.register_child(self.enorm[-1]) self.register_child(self.dnorm[-1]) if self.reconstruct: self.rnorm.append(self.block['norm'](axis = 2)) self.register_child(self.rnorm[-1])
def __init__(self, out_channels, kernel_size, strides=(1, 1, 1), padding=(0, 0, 0), dilation=(1, 1, 1), groups=1, layout='NCDHW', activation=None, use_bias=True, weight_initializer=None, bias_initializer='zeros', in_channels=0, **kwargs): super(Conv3DRepPad, self).__init__(**kwargs) self.t_axis = layout.find('D') if isinstance(padding, int): padding = (padding, padding, padding) self.padding = padding[0] self.conv = nn.Conv3D(out_channels, kernel_size, strides=strides, padding=(0, padding[1], padding[2]), dilation=dilation, groups=groups, layout=layout, activation=activation, use_bias=use_bias, weight_initializer=weight_initializer, bias_initializer=bias_initializer, in_channels=in_channels, **kwargs)
def _make_layer_slow(self, inplanes, planes, num_blocks, num_block_temp_kernel_slow=None, block=Bottleneck, strides=1, head_conv=1, norm_layer=BatchNorm, norm_kwargs=None, layer_name=''): """Build each stage of within the slow branch.""" downsample = None if strides != 1 or inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix=layer_name + 'downsample_') with downsample.name_scope(): downsample.add( nn.Conv3D(in_channels=inplanes, channels=planes * block.expansion, kernel_size=1, strides=(1, strides, strides), use_bias=False)) downsample.add( norm_layer(in_channels=planes * block.expansion, **({} if norm_kwargs is None else norm_kwargs))) layers = nn.HybridSequential(prefix=layer_name) cnt = 0 with layers.name_scope(): layers.add( block(inplanes=inplanes, planes=planes, strides=strides, downsample=downsample, head_conv=head_conv, layer_name='block%d_' % cnt)) inplanes = planes * block.expansion cnt += 1 for _ in range(1, num_blocks): if num_block_temp_kernel_slow is not None: if cnt < num_block_temp_kernel_slow: layers.add( block(inplanes=inplanes, planes=planes, head_conv=head_conv, layer_name='block%d_' % cnt)) else: layers.add( block(inplanes=inplanes, planes=planes, head_conv=1, layer_name='block%d_' % cnt)) else: layers.add( block(inplanes=inplanes, planes=planes, head_conv=head_conv, layer_name='block%d_' % cnt)) cnt += 1 return layers
def _make_3d_feature(self, config_3d_conv, config_3d_pool, batch_normal): featurizer = nn.HybridSequential(prefix='') conv_layer, conv_channels = config_3d_conv pool_size, pool_stride, pool_padding = config_3d_pool assert len(conv_layer) == len(conv_channels) == len(pool_size) == len( pool_stride) == len(pool_padding) for i, num in enumerate(conv_layer): for _ in range(num): featurizer.add( nn.Conv3D(channels=conv_channels[i], kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1), weight_initializer=init.Xavier( rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zero')) if batch_normal: featurizer.add(nn.BatchNorm()) featurizer.add(nn.Activation('relu')) featurizer.add( nn.MaxPool3D(pool_size=pool_size[i], strides=pool_stride[i], padding=pool_padding[i])) # flatten to (N, 8192) featurizer.add(nn.Flatten()) return featurizer
def _make_res_layer(self, block, planes, blocks, stride=1, norm_layer=BatchNorm, norm_kwargs=None, layer_name=''): """Build each stage of a ResNet""" downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix=layer_name + 'downsample_') with downsample.name_scope(): downsample.add(nn.Conv3D(in_channels=self.inplanes, channels=planes * block.expansion, kernel_size=1, strides=(stride, stride, stride), use_bias=False)) downsample.add(norm_layer(in_channels=planes * block.expansion, **({} if norm_kwargs is None else norm_kwargs))) layers = nn.HybridSequential(prefix=layer_name) with layers.name_scope(): layers.add(block(inplanes=self.inplanes, planes=planes, stride=stride, downsample=downsample)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.add(block(inplanes=self.inplanes, planes=planes)) return layers
def __init__(self, in_channel, out_channel, spatial_stride=1, temporal_stride=1, downsample=None, **kwargs): super(BasicBlock, self).__init__() self.conv1 = nn.Conv3D( in_channels=in_channel, channels=out_channel, kernel_size=(3, 3, 3), strides=(temporal_stride, spatial_stride, spatial_stride), padding=(1, 1, 1), weight_initializer=init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zero') self.conv2 = nn.Conv3D(in_channels=out_channel, channels=out_channel, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1), weight_initializer=init.Xavier( rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zero') self.bn1 = nn.BatchNorm(in_channels=out_channel, epsilon=0.001) self.bn2 = nn.BatchNorm(in_channels=out_channel, epsilon=0.001) self.relu1 = nn.Activation('relu') self.relu2 = nn.Activation('relu') if in_channel != out_channel or spatial_stride != 1 or temporal_stride != 1: self.down_sample = nn.HybridSequential() self.down_sample.add( nn.Conv3D(in_channels=in_channel, channels=out_channel, kernel_size=1, strides=(temporal_stride, spatial_stride, spatial_stride), weight_initializer=init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2), use_bias=False), nn.BatchNorm(in_channels=out_channel, epsilon=0.001)) else: self.down_sample = None
def __init__(self, **kwargs): super(Merger_hybrid, self).__init__(**kwargs) self.layer1 = nn.HybridSequential() self.layer1.add(nn.Conv3D(16, kernel_size=3, padding=1), nn.BatchNorm(in_channels=16), nn.LeakyReLU(.2)) self.layer2 = nn.HybridSequential() self.layer2.add(nn.Conv3D(8, kernel_size=3, padding=1), nn.BatchNorm(in_channels=8), nn.LeakyReLU(.2)) self.layer3 = nn.HybridSequential() self.layer3.add(nn.Conv3D(4, kernel_size=3, padding=1), nn.BatchNorm(in_channels=4), nn.LeakyReLU(.2)) self.layer4 = nn.HybridSequential() self.layer4.add(nn.Conv3D(2, kernel_size=3, padding=1), nn.BatchNorm(in_channels=2), nn.LeakyReLU(.2)) self.layer5 = nn.HybridSequential() self.layer5.add(nn.Conv3D(1, kernel_size=3, padding=1), nn.BatchNorm(in_channels=1), nn.LeakyReLU(.2))
def conv3x3x3(in_planes, out_planes, spatial_stride=1, temporal_stride=1, dilation=1): "3x3x3 convolution with padding" return nn.Conv3D(in_channels=in_planes, channels=out_planes, kernel_size=3, strides=(temporal_stride, spatial_stride, spatial_stride), dilation=dilation, use_bias=False)
def __init__(self,c2, kernel_size=3, strides=1, padding=1, activation='relu',\ bias=False): super(BasicConv, self).__init__() self.c1 = nn.Conv3D(c2, kernel_size=kernel_size, \ strides=strides, padding=padding, \ activation='relu', weight_initializer=init.Xavier(), use_bias=bias) self.bn = nn.BatchNorm()
def __init__(self, out_channels, **kwargs): super(InputConvBlock, self).__init__(**kwargs) self.ops = nn.HybridSequential() self.ops.add( nn.Conv3D(channels=out_channels, kernel_size=3, padding=1, use_bias=False), nn.BatchNorm(), nn.Activation(activation='relu'))
def conv_factory(k=3, channels=8, bn=False): """A convenient conv implementation""" body = nn.HybridSequential() if bn: body.add((nn.BatchNorm())) p = int((k - 1) / 2) body.add((nn.Conv3D(kernel_size=k, padding=p, strides=1, channels=channels))) return body
def __init__(self, dr_rate, **kwargs): super(LipNet, self).__init__(**kwargs) with self.name_scope(): self.conv1 = nn.Conv3D(32, kernel_size=(3, 5, 5), strides=(1, 2, 2), padding=(1, 2, 2)) self.bn1 = nn.InstanceNorm(in_channels=32) self.dr1 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool1 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.conv2 = nn.Conv3D(64, kernel_size=(3, 5, 5), strides=(1, 1, 1), padding=(1, 2, 2)) self.bn2 = nn.InstanceNorm(in_channels=64) self.dr2 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool2 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.conv3 = nn.Conv3D(96, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 2, 2)) self.bn3 = nn.InstanceNorm(in_channels=96) self.dr3 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool3 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.gru1 = rnn.GRU(256, bidirectional=True) self.gru2 = rnn.GRU(256, bidirectional=True) self.dense = nn.Dense(27+1, flatten=False)
def __init__(self, out_channels, **kwargs): super(TransitionBlockDown, self).__init__(**kwargs) self.ops = nn.HybridSequential() self.ops.add( nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=out_channels, kernel_size=1, strides=1, use_bias=False), nn.MaxPool3D(pool_size=2, strides=2))
def _conv21d(out_channels, kernel, padding, strides, norm_layer=BatchNorm, norm_kwargs=None): """R(2+1)D from 'A Closer Look at Spatiotemporal Convolutions for Action Recognition'""" cell = nn.HybridSequential(prefix='R(2+1)D') if isinstance(strides, int): strides = (strides, strides, strides) cell.add( nn.Conv3D(out_channels, kernel_size=(1, kernel, kernel), strides=(1, strides[1], strides[2]), padding=(0, padding, padding), use_bias=False, groups=1)) cell.add( norm_layer(epsilon=1e-5, momentum=0.9, **({} if norm_kwargs is None else norm_kwargs))) cell.add(nn.LeakyReLU(0.1)) if kernel == 3: # we need to do a special repeat pad as the zeros effect the middle correct 2d pathway cell.add( Conv3DRepPad(out_channels, kernel_size=(kernel, 1, 1), strides=(strides[0], 1, 1), padding=(1, 0, 0), use_bias=False, groups=out_channels)) else: cell.add( nn.Conv3D(out_channels, kernel_size=(kernel, 1, 1), strides=(strides[0], 1, 1), padding=(padding, 0, 0), use_bias=False, groups=out_channels)) # cell.add(nn.LeakyReLU(0.1)) # this breaks the imgnet pretrain flow return cell
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BasicBlockV2, self).__init__(**kwargs) self.bn1 = nn.BatchNorm() self.conv1 = _conv3x3(channels, stride, in_channels) self.bn2 = nn.BatchNorm() self.conv2 = _conv3x3(channels, 1, channels) if downsample: self.downsample = nn.Conv3D(channels, 1, stride, use_bias=False, in_channels=in_channels) else: self.downsample = None
def __init__(self, nclass, block, layers, dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, use_lateral=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(R2Plus1D, self).__init__() self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.use_lateral = use_lateral self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer(in_channels=45, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.conv2 = conv3x1x1(in_planes=45, out_planes=64) self.bn2 = norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool3D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
def get_spatial_temporal_conv(in_filters, out_filter, stride, use_bias=False): blk = nn.HybridSequential() i = 3 * in_filters * out_filter * 3 * 3 i /= in_filters * 3 * 3 + 3 * out_filter middle_filters = int(i) #print("Number of middle filters: {0}".format(middle_filters)) blk.add( nn.Conv3D(channels=middle_filters, kernel_size=(1, 3, 3), strides=(1, stride[0], stride[1]), padding=(0, 1, 1), use_bias=use_bias), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=out_filter, kernel_size=(3, 1, 1), strides=(stride[0], 1, 1), padding=(1, 0, 0), use_bias=use_bias)) return blk