示例#1
0
    def __init__(self,
                 embed_dims,
                 feedforward_channels,
                 act_cfg=dict(type='GELU'),
                 ffn_drop=0.,
                 dropout_layer=None,
                 use_conv=False,
                 init_cfg=None):
        super(MixFFN, self).__init__(init_cfg=init_cfg)

        self.embed_dims = embed_dims
        self.feedforward_channels = feedforward_channels
        self.act_cfg = act_cfg
        activate = build_activation_layer(act_cfg)

        in_channels = embed_dims
        fc1 = Conv2d(
            in_channels=in_channels,
            out_channels=feedforward_channels,
            kernel_size=1,
            stride=1,
            bias=True)
        if use_conv:
            # 3x3 depth wise conv to provide positional encode information
            dw_conv = Conv2d(
                in_channels=feedforward_channels,
                out_channels=feedforward_channels,
                kernel_size=3,
                stride=1,
                padding=(3 - 1) // 2,
                bias=True,
                groups=feedforward_channels)
        fc2 = Conv2d(
            in_channels=feedforward_channels,
            out_channels=in_channels,
            kernel_size=1,
            stride=1,
            bias=True)
        drop = nn.Dropout(ffn_drop)
        layers = [fc1, activate, drop, fc2, drop]
        if use_conv:
            layers.insert(1, dw_conv)
        self.layers = Sequential(*layers)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else torch.nn.Identity()
示例#2
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 feedforward_channels,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 num_fcs=2,
                 qkv_bias=True,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 sr_ratio=1.,
                 init_cfg=None):
        super(GSAEncoderLayer, self).__init__(init_cfg=init_cfg)

        self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
        self.attn = GlobalSubsampledAttention(embed_dims=embed_dims,
                                              num_heads=num_heads,
                                              attn_drop=attn_drop_rate,
                                              proj_drop=drop_rate,
                                              dropout_layer=dict(
                                                  type='DropPath',
                                                  drop_prob=drop_path_rate),
                                              qkv_bias=qkv_bias,
                                              norm_cfg=norm_cfg,
                                              sr_ratio=sr_ratio)

        self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1]
        self.ffn = FFN(embed_dims=embed_dims,
                       feedforward_channels=feedforward_channels,
                       num_fcs=num_fcs,
                       ffn_drop=drop_rate,
                       dropout_layer=dict(type='DropPath',
                                          drop_prob=drop_path_rate),
                       act_cfg=act_cfg,
                       add_identity=False)

        self.drop_path = build_dropout(
            dict(type='DropPath', drop_prob=drop_path_rate)
        ) if drop_path_rate > 0. else nn.Identity()
示例#3
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 feedforward_channels,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 num_fcs=2,
                 bias='qv_bias',
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 window_size=None,
                 attn_cfg=dict(),
                 ffn_cfg=dict(add_identity=False),
                 init_values=None):
        attn_cfg.update(dict(window_size=window_size, qk_scale=None))

        super(BEiTTransformerEncoderLayer,
              self).__init__(embed_dims=embed_dims,
                             num_heads=num_heads,
                             feedforward_channels=feedforward_channels,
                             attn_drop_rate=attn_drop_rate,
                             drop_path_rate=0.,
                             drop_rate=0.,
                             num_fcs=num_fcs,
                             qkv_bias=bias,
                             act_cfg=act_cfg,
                             norm_cfg=norm_cfg,
                             attn_cfg=attn_cfg,
                             ffn_cfg=ffn_cfg)

        # NOTE: drop path for stochastic depth, we shall see if
        # this is better than dropout here
        dropout_layer = dict(type='DropPath', drop_prob=drop_path_rate)
        self.drop_path = build_dropout(
            dropout_layer) if dropout_layer else nn.Identity()
        self.gamma_1 = nn.Parameter(init_values * torch.ones((embed_dims)),
                                    requires_grad=True)
        self.gamma_2 = nn.Parameter(init_values * torch.ones((embed_dims)),
                                    requires_grad=True)
示例#4
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 feedforward_channels,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 num_fcs=2,
                 qkv_bias=True,
                 qk_scale=None,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 window_size=1,
                 init_cfg=None):

        super(LSAEncoderLayer, self).__init__(init_cfg=init_cfg)

        self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
        self.attn = LocallyGroupedSelfAttention(embed_dims, num_heads,
                                                qkv_bias, qk_scale,
                                                attn_drop_rate, drop_rate,
                                                window_size)

        self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1]
        self.ffn = FFN(embed_dims=embed_dims,
                       feedforward_channels=feedforward_channels,
                       num_fcs=num_fcs,
                       ffn_drop=drop_rate,
                       dropout_layer=dict(type='DropPath',
                                          drop_prob=drop_path_rate),
                       act_cfg=act_cfg,
                       add_identity=False)

        self.drop_path = build_dropout(
            dict(type='DropPath', drop_prob=drop_path_rate)
        ) if drop_path_rate > 0. else nn.Identity()