Пример #1
0
 def __init__(self,
              in_planes,
              rel_planes,
              out_planes,
              share_planes,
              sa_type=0,
              kernel_size=3,
              stride=1,
              dilation=1):
     super(SAM, self).__init__()
     self.sa_type, self.kernel_size, self.stride = sa_type, kernel_size, stride
     self.conv1 = nn.Conv2d(in_planes, rel_planes, kernel_size=1)
     self.conv2 = nn.Conv2d(in_planes, rel_planes, kernel_size=1)
     self.conv3 = nn.Conv2d(in_planes, out_planes, kernel_size=1)
     if sa_type == 0:
         self.conv_w = nn.Sequential(
             nn.LeakyReLU(0.2),
             nn.Conv2d(rel_planes + 2,
                       rel_planes,
                       kernel_size=1,
                       bias=False),
             nn.Conv2d(rel_planes,
                       out_planes // share_planes,
                       kernel_size=1))
         self.conv_p = nn.Conv2d(2, 2, kernel_size=1)
         self.subtraction = Subtraction(
             kernel_size,
             stride, (dilation * (kernel_size - 1) + 1) // 2,
             dilation,
             pad_mode=1)
         self.subtraction2 = Subtraction2(
             kernel_size,
             stride, (dilation * (kernel_size - 1) + 1) // 2,
             dilation,
             pad_mode=1)
         self.softmax = nn.Softmax(dim=-2)
     else:
         self.conv_w = nn.Sequential(
             nn.LeakyReLU(0.2),
             nn.Conv2d(rel_planes * (pow(kernel_size, 2) + 1),
                       out_planes // share_planes,
                       kernel_size=1,
                       bias=False),
             nn.Conv2d(out_planes // share_planes,
                       pow(kernel_size, 2) * out_planes // share_planes,
                       kernel_size=1))
         self.unfold_i = nn.Unfold(kernel_size=1,
                                   dilation=dilation,
                                   padding=0,
                                   stride=stride)
         self.unfold_j = nn.Unfold(kernel_size=kernel_size,
                                   dilation=dilation,
                                   padding=0,
                                   stride=stride)
         self.pad = nn.ReflectionPad2d(kernel_size // 2)
     self.aggregation = Aggregation(kernel_size,
                                    stride,
                                    (dilation * (kernel_size - 1) + 1) // 2,
                                    dilation,
                                    pad_mode=1)
Пример #2
0
def make_patches(tensor, patch_size=16, scale=4):
    #mask = torch.ones_like(tensor)
    tensor = tensor.unsqueeze(0)
    stride = patch_size // 2
    wo = tensor.size(2)
    ho = tensor.size(3)
    wn = wo + stride - (wo % stride)
    hn = ho + stride - (ho % stride)
    if stride - (wo % stride) > 0 and stride - (wo % stride) < stride:
        #pad = nn.ReplicationPad2d((0,0,0,wn-wo))
        #tensor = pad(tensor)
        tensor = np.pad(tensor, ((0, 0), (0, 0), (0, wn - wo), (0, 0)),
                        mode='edge')
        tensor = torch.from_numpy(tensor)
    else:
        wn = wo
    if stride - (ho % stride) > 0 and stride - (ho % stride) < stride:
        #pad = nn.ReplicationPad2d((0,hn-ho,0,0))
        #tensor = pad(tensor)
        tensor = np.pad(tensor, ((0, 0), (0, 0), (0, 0), (0, hn - ho)),
                        mode='edge')
        tensor = torch.from_numpy(tensor)
    else:
        hn = ho
    mask = torch.ones(
        (tensor.size()[0], tensor.size()[1], tensor.size()[2] * scale,
         tensor.size()[3] * scale))
    # use torch.nn.Unfold
    unfold = nn.Unfold(kernel_size=(patch_size, patch_size), stride=stride)
    unfold2 = nn.Unfold(kernel_size=(patch_size * scale, patch_size * scale),
                        stride=stride * scale)
    # Apply to mask and original image
    mask_p = unfold2(mask)
    patches = unfold(tensor)

    patches = patches.reshape(3, patch_size, patch_size,
                              -1).permute(3, 0, 1, 2)
    if tensor.is_cuda:
        patches_base = torch.zeros(
            (patches.size()[0], patches.size()[1], patches.size()[2] * scale,
             patches.size()[3] * scale),
            device=tensor.get_device())
    else:
        patches_base = torch.zeros(
            (patches.size()[0], patches.size()[1], patches.size()[2] * scale,
             patches.size()[3] * scale))

    tiles = []
    for t in range(patches.size(0)):
        tiles.append(torch.squeeze(patches[[t], :, :, :]))
    return tiles, mask_p, patches_base, (tensor.size(2) * scale,
                                         tensor.size(3) *
                                         scale), ((wn - wo) * scale,
                                                  (hn - ho) * scale)
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 padding=0,
                 stride=1,
                 dilation=1,
                 groups=1,
                 mixtures=1,
                 bias=False):

        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.dilation = dilation
        self.groups = groups
        self.mixtures = mixtures

        self.conv1 = nn.Conv2d(in_channels,
                               out_channels,
                               kernel_size=1,
                               groups=groups,
                               bias=bias)
        self.conv2 = nn.Conv2d(in_channels,
                               out_channels,
                               kernel_size=1,
                               groups=groups,
                               bias=bias)
        self.conv3 = nn.Conv2d(in_channels,
                               out_channels * mixtures,
                               kernel_size=1,
                               groups=groups,
                               bias=bias)

        self.row_embeddings = nn.Parameter(
            torch.randn(out_channels, kernel_size))
        self.col_embeddings = nn.Parameter(
            torch.randn(out_channels, kernel_size))
        self.mix_embeddings = nn.Parameter(torch.randn(out_channels, mixtures))

        self.unfold1 = nn.Unfold(kernel_size=1, stride=stride)
        self.unfold2 = nn.Unfold(kernel_size=kernel_size,
                                 padding=padding,
                                 stride=stride,
                                 dilation=dilation)
        self.unfold3 = nn.Unfold(kernel_size=kernel_size,
                                 padding=padding,
                                 stride=stride,
                                 dilation=dilation)
Пример #4
0
    def __init__(self, in_ch, dy_filter_size=9):
        super(Deform_DFN, self).__init__()
        self._filter_size = dy_filter_size
        self.filter = dy_filter_size**2

        self.en1 = single_conv(in_ch, 32)
        self.en2 = single_conv(32, 32, stride=2)
        self.en3 = single_conv(32, 64)
        self.en4 = single_conv(64, 64, stride=2)
        self.en5 = single_conv(64, 64)

        self.mid1 = single_conv(64, 128)
        self.mid2 = single_conv(128, 128)
        self.mid_h1 = single_conv(128, 128)
        self.mid_h2 = single_conv(128, 128)

        self.de1 = single_conv(128, 64)
        self.de2 = single_conv(64, 64)
        self.de_up1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.de3 = single_conv(64, 64)
        self.de_up2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.de4 = single_conv(64, 64)
        self.de5 = single_conv(64, 128, kernel_size=1, padding=0)

        self.dyf = nn.Conv2d(128,
                             self.filter,
                             kernel_size=1,
                             stride=1,
                             padding=0)

        self.de1_2 = single_conv(128, 64)
        self.de2_2 = single_conv(64, 64)
        self.de_up1_2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.de3_2 = single_conv(64, 64)
        self.de_up2_2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.de4_2 = single_conv(64, 64)
        self.de5_2 = single_conv(64, 128, kernel_size=1, padding=0)

        self.dyf_2 = nn.Conv2d(128,
                               2 * self.filter,
                               kernel_size=1,
                               stride=1,
                               padding=0)

        self.unfold = nn.Unfold(kernel_size=self._filter_size,
                                padding=self._filter_size // 2)
        self.deform = DeformConv2D(1, 1, kernel_size=self._filter_size)
        self.unfold_deform = nn.Unfold(kernel_size=(self._filter_size,
                                                    self._filter_size),
                                       padding=self._filter_size // 2,
                                       stride=self._filter_size)
Пример #5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 heads=4,
                 stride=1):
        super(SASAConv2d, self).__init__()

        assert heads > 0, 'SASAConv2d requires a positive number of heads'
        assert type(
            kernel_size) == int, 'SASAConv2d requires integer kernel_size'
        assert out_channels % heads == 0, 'SASAConv2d requires out_channels divisible by the number of heads'

        padding = (kernel_size - 1) // 2
        self.heads = heads
        self.kernel_size = kernel_size
        self.out_channels = out_channels
        self.q_conv = nn.Sequential(
            nn.Conv2d(in_channels,
                      out_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False), nn.Unfold(1, 1, 0, stride),
            Rearrange('N (M D) HW -> (N HW M) () D', M=self.heads))
        self.q_conv.apply(init_weights)
        self.k_conv = nn.Sequential(
            nn.Conv2d(in_channels,
                      out_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False), nn.Unfold(kernel_size, 1, padding, stride),
            RelativeEmbeddings2d(extent=kernel_size,
                                 embedding_size=out_channels),
            Rearrange('N (M D KK) HW -> (N HW M) D KK',
                      M=self.heads,
                      KK=self.kernel_size**2))
        self.k_conv.apply(init_weights)
        self.v_conv = nn.Sequential(
            nn.Conv2d(in_channels,
                      out_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False), nn.Unfold(kernel_size, 1, padding, stride),
            Rearrange('N (M D KK) HW -> (N HW M) KK D',
                      M=self.heads,
                      KK=self.kernel_size**2))
        self.v_conv.apply(init_weights)
Пример #6
0
    def __init__(self, num_frames, patchsize, nh_size, img_size):

        # -- init vars --
        self.num_frames = num_frames
        self._patchsize = patchsize
        self.nh_size = nh_size  # number of patches around center pixel
        self.img_size = img_size

        # -- unfold input patches --
        padding, stride, ipad = 1, 1, self.ps // 2
        self.unfold_input = nn.Unfold(self._patchsize, 1, padding, stride)

        # -- create grid to compute indice --
        index_grid = torch.arange(0,
                                  img_size**2).reshape(1, 1, img_size,
                                                       img_size)
        self.index_grid = F.pad(index_grid.type(torch.float),
                                (ipad, ipad, ipad, ipad),
                                mode='reflect')[0, 0].type(torch.long)
        self.index_pad = (self.index_grid.shape[0] - self.img_size) // 2

        # -- indexing bursts --
        self.midx = num_frames // num_frames if num_frames != 2 else 1
        self.no_mid_idx = np.r_[np.r_[:self.midx],
                                np.r_[self.midx + 1:num_frames]]
        self.no_mid_idx = torch.LongTensor(self.no_mid_idx)
Пример #7
0
 def __init__(self,
              channels,
              output_channels,
              scale_factor,
              up_kernel=5,
              up_group=1,
              encoder_kernel=3,
              encoder_dilation=1,
              compressed_channels=64):
     super(CARAFEPack, self).__init__()
     self.channels = channels
     self.scale_factor = scale_factor
     self.up_kernel = up_kernel
     self.up_group = up_group
     self.encoder_kernel = encoder_kernel
     self.encoder_dilation = encoder_dilation
     self.compressed_channels = compressed_channels
     self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
                                         1)
     self.content_encoder = nn.Conv2d(
         self.compressed_channels,
         self.up_kernel * self.up_kernel * self.up_group *
         self.scale_factor * self.scale_factor,
         self.encoder_kernel,
         padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
         dilation=self.encoder_dilation,
         groups=1)
     self.upsample = nn.Upsample(scale_factor=self.scale_factor,
                                 mode='nearest')
     self.unfold = nn.Unfold(kernel_size=self.up_kernel,
                             dilation=self.scale_factor,
                             padding=self.up_kernel // 2 *
                             self.scale_factor)
     self.proj = nn.Conv2d(channels, output_channels, 1)
     self.init_weights()
Пример #8
0
def test_complex_1F():
    unfold = nn.Unfold(kernel_size=(2, 3))
    input = torch.randn(2, 5, 3, 4)
    output = unfold(input)
    output_h = unfold(input.hammerblade())
    assert output_h.device == torch.device("hammerblade")
    assert torch.allclose(output, output_h.cpu())
Пример #9
0
def flatten_patches(image,ps=3):
    unfold = nn.Unfold(ps,1,0,1)
    image_pad = F.pad(image,(ps//2,ps//2,ps//2,ps//2),mode='reflect')
    patches = unfold(image_pad)
    patches = rearrange(patches,'b (c ps1 ps2) r -> b r (ps1 ps2 c)',ps1=ps,ps2=ps)
    patches = patches.contiguous()
    return patches
Пример #10
0
    def __init__(self, channels, kernel_size, stride, dilation=1):
        super(Involution, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.dilation = dilation
        self.channels = channels
        reduction_ratio = 4
        self.group_channels = 16
        self.groups = self.channels // self.group_channels
        self.conv1 = ConvModule(in_channels=channels,
                                out_channels=channels // reduction_ratio,
                                kernel_size=1,
                                conv_cfg=None,
                                norm_cfg=dict(type='BN'),
                                act_cfg=dict(type='ReLU'))
        self.conv2 = ConvModule(in_channels=channels // reduction_ratio,
                                out_channels=kernel_size**2 * self.groups,
                                kernel_size=1,
                                stride=1,
                                conv_cfg=None,
                                norm_cfg=None,
                                act_cfg=None)
        if stride > 1:
            self.avgpool = nn.AvgPool2d(stride, stride)

        self.unfold = nn.Unfold(kernel_size, dilation,
                                (self.kernel_size + (self.kernel_size - 1) *
                                 (self.dilation - 1) - 1) // 2, stride)
Пример #11
0
    def __init__(self):
        super(Net, self).__init__()
        # self.lk1 = LongConv(10, 12288)
        # self.lk2 = LongConv(10, 108300)
        # self.lk3 = LongConv(100, 3072)
        # self.lk4 = LongConv(50, 3072)
        # self.lk5 = LongConv(50, 3072)
        # self.lk6 = LongConv(50, 3072)
        #
        # self.lc = Looper(100, 3072)
        # # self.lm = LongMem(1, 3072)
        # self.lm = LongMem(10, 12288, 3, 3)
        # self.lm2 = LongMem(10, 13872, 3, 3)
        # self.lm3 = LongMem(10, 15552, 3, 3)
        #
        # self.conv = nn.Conv2d(3, 3, (3, 3), stride=1)
        # self.conv2 = nn.Conv2d(3, 3, (3, 3), stride=1)
        # self.conv3 = nn.Conv2d(3, 3, (3, 3), stride=1)
        #
        # self.tconv = nn.ConvTranspose2d(3, 3, (10, 10))
        # self.tconv2 = nn.ConvTranspose2d(3, 3, (10, 10))

        # self.m1 = MemA(10, 75*784, 75 * 15376)
        # self.m2 = MemA(10, 75 * 784, 75 * 15376)
        # self.m3 = MemA(10, 75 * 784, 75 * 15376)
        # self.m4 = MemA(10, 32 * 32 * 3, 128 * 128 * 3)
        # self.m5 = MemA(10, 16 * 16 * 3, 128 * 128 * 3)

        self.mb1 = MemB(10, 128 * 128 * 3, 128 * 128 * 3)
        # self.m2 = MemC(10, 32*32*3, 128*128*3)
        self.unfold = nn.Unfold(kernel_size=(5, 5))
        self.fold = nn.Fold(kernel_size=(5, 5),
                            output_size=(128, 128),
                            stride=5)
        self.patches = nn.Parameter(torch.randn((1, 75, 15376)))
Пример #12
0
    def __init__(self, opt):
        super(DAML, self).__init__()

        self.opt = opt
        self.num_fea = 2  # ID + DOC
        self.user_word_embs = nn.Embedding(opt.vocab_size, opt.word_dim)  # vocab_size * 300
        self.item_word_embs = nn.Embedding(opt.vocab_size, opt.word_dim)  # vocab_size * 300

        # share
        self.word_cnn = nn.Conv2d(1, 1, (5, opt.word_dim), padding=(2, 0))
        # document-level cnn
        self.user_doc_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.word_dim), padding=(1, 0))
        self.item_doc_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.word_dim), padding=(1, 0))
        # abstract-level cnn
        self.user_abs_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.filters_num))
        self.item_abs_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.filters_num))

        self.unfold = nn.Unfold((3, opt.filters_num), padding=(1, 0))

        # fc layer
        self.user_fc = nn.Linear(opt.filters_num, opt.id_emb_size)
        self.item_fc = nn.Linear(opt.filters_num, opt.id_emb_size)

        self.uid_embedding = nn.Embedding(opt.user_num + 2, opt.id_emb_size)
        self.iid_embedding = nn.Embedding(opt.item_num + 2, opt.id_emb_size)

        self.reset_para()
Пример #13
0
    def __init__(self, c, c_mid=64, scale=2, k_up=5, k_enc=3):
        """ The unofficial implementation of the CARAFE module.
        The details are in "https://arxiv.org/abs/1905.02188".
        Args:
            c: The channel number of the input and the output.
            c_mid: The channel number after compression.
            scale: The expected upsample scale.
            k_up: The size of the reassembly kernel.
            k_enc: The kernel size of the encoder.
        Returns:
            X: The upsampled feature map.
        """
        super(CARAFE, self).__init__()
        self.scale = scale

        self.comp = ConvBNReLU(c,
                               c_mid,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               dilation=1)
        self.enc = ConvBNReLU(c_mid, (scale * k_up)**2,
                              kernel_size=k_enc,
                              stride=1,
                              padding=k_enc // 2,
                              dilation=1,
                              use_relu=False)
        self.pix_shf = nn.PixelShuffle(scale)

        self.upsmp = nn.Upsample(scale_factor=scale, mode='nearest')
        self.unfold = nn.Unfold(kernel_size=k_up,
                                dilation=scale,
                                padding=k_up // 2 * scale)
Пример #14
0
 def __init__(self, kernel_size=5):
     super(kernel_computation, self).__init__()
     self.kernel_size = kernel_size
     self.unfolder = nn.Unfold(kernel_size=kernel_size,
                               dilation=1,
                               padding=kernel_size // 2,
                               stride=1)
Пример #15
0
def sliding_window(images: torch.Tensor, patch_size: Tuple[int, int],
                   stride: Tuple[int, int]) -> torch.Tensor:
    """Creates patches of an image.

    Args:
        images (torch.Tensor): A Torch tensor of a 4D image(s), i.e. (batch, channel, height, width).
        patch_size (Tuple[int, int]): The size of the patches to generate, e.g. 28x28 for EMNIST.
        stride (Tuple[int, int]): The stride of the sliding window.

    Returns:
        torch.Tensor: A tensor with the shape (batch, patches, height, width).

    """
    unfold = nn.Unfold(kernel_size=patch_size, stride=stride)
    # Preform the sliding window, unsqueeze as the channel dimesion is lost.
    c = images.shape[1]
    patches = unfold(images)
    patches = rearrange(
        patches,
        "b (c h w) t -> b t c h w",
        c=c,
        h=patch_size[0],
        w=patch_size[1],
    )
    return patches
    def __init__(self,
                 dim,
                 num_heads=1,
                 kernel_size=3,
                 padding=1,
                 stride=1,
                 qkv_bias=False,
                 attn_drop=0.1):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.head_dim = dim // num_heads
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.scale = self.head_dim**(-0.5)

        self.v_pj = nn.Linear(dim, dim, bias=qkv_bias)
        self.attn = nn.Linear(dim, kernel_size**4 * num_heads)

        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(attn_drop)

        self.unflod = nn.Unfold(kernel_size, padding, stride)  #手动卷积
        self.pool = nn.AvgPool2d(kernel_size=stride,
                                 stride=stride,
                                 ceil_mode=True)
Пример #17
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 in_dim=48,
                 stride=4):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)
        # grid_size property necessary for resizing positional embedding
        self.grid_size = (img_size[0] // patch_size[0],
                          img_size[1] // patch_size[1])
        num_patches = (self.grid_size[0]) * (self.grid_size[1])
        self.img_size = img_size
        self.num_patches = num_patches
        self.in_dim = in_dim
        new_patch_size = [math.ceil(ps / stride) for ps in patch_size]
        self.new_patch_size = new_patch_size

        self.proj = nn.Conv2d(in_chans,
                              self.in_dim,
                              kernel_size=7,
                              padding=3,
                              stride=stride)
        self.unfold = nn.Unfold(kernel_size=new_patch_size,
                                stride=new_patch_size)
Пример #18
0
    def extract_patches(self,
                        x,
                        kernel_size,
                        stride,
                        dilation,
                        padding='same'):

        if padding == 'same':
            pad_fn = same_padding
        elif padding == 'valid':
            pad_fn = get_pad
        else:
            raise NotImplementedError(
                'Padding mode [{:s}] is not found'.format(padding))

        pad_size = pad_fn(x.shape[2], x.shape[3], [kernel_size, kernel_size],
                          [stride, stride], [dilation, dilation])

        padding_layer = _padding(pad_type='zero', padding=pad_size * 2)

        x = padding_layer(x)
        unfold = nn.Unfold(
            kernel_size=kernel_size,
            stride=stride,
            padding=0,
            dilation=dilation,
        )

        patches = unfold(x)
        return patches
Пример #19
0
    def __init__(self, channels, compressed_channels=64, scale_factor=2, up_kernel=5, encoder_kernel=3):
        """ The unofficial implementation of the CARAFE module.
        The details are in "https://arxiv.org/abs/1905.02188".
        Args:
            channels c: The channel number of the input and the output.
            compressed_channels c_mid: The channel number after compression.
            scale_factor scale: The expected upsample scale.
            up_kernel k_up: The size of the reassembly kernel.
            encoder_kernel k_enc: The kernel size of the encoder.
        Returns:
            X: The upsampled feature map.
        """
        super(CARAFE_3_sa_se, self).__init__()
        self.scale = scale_factor

        self.comp = ConvBNReLU(channels, compressed_channels, kernel_size=1, stride=1,
                               padding=0, dilation=1)
        self.enc = ConvBNReLU(compressed_channels, (scale_factor * up_kernel) ** 2, kernel_size=encoder_kernel,
                              stride=1, padding=encoder_kernel // 2, dilation=1,
                              use_relu=False)
        self.pix_shf = nn.PixelShuffle(scale_factor)

        self.upsmp = nn.Upsample(scale_factor=scale_factor, mode='nearest')
        self.unfold = nn.Unfold(kernel_size=up_kernel, dilation=scale_factor,
                                padding=up_kernel // 2 * scale_factor)

        # modified by zy 20210313
        # c = 100
        # self.fc1 = nn.Conv2d((scale_factor * up_kernel) ** 2, (scale_factor * up_kernel) ** 2 //16, kernel_size=1)
        # self.fc2 = nn.Conv2d((scale_factor * up_kernel) ** 2 //16, (scale_factor * up_kernel) ** 2, kernel_size=1)
        # modified by zy 20210316
        self.sa = SpatialAttention()
        self.se = SE((scale_factor * up_kernel) ** 2, 16)
Пример #20
0
    def __init__(self, cfg):
        super(Encoder, self).__init__()
        self.cnn = CNN()

        ## to use all grids
        self.unfold = nn.Unfold(1)
        self.linear = nn.Linear(512, cfg.vocab_size)
Пример #21
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_channels=3,
                 embed_dims_inner=48,
                 stride=4,
                 init_cfg=None):
        super(PixelEmbed, self).__init__(init_cfg=init_cfg)
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)
        # patches_resolution property necessary for resizing
        # positional embedding
        patches_resolution = [
            img_size[0] // patch_size[0], img_size[1] // patch_size[1]
        ]
        num_patches = patches_resolution[0] * patches_resolution[1]

        self.img_size = img_size
        self.num_patches = num_patches
        self.embed_dims_inner = embed_dims_inner

        new_patch_size = [math.ceil(ps / stride) for ps in patch_size]
        self.new_patch_size = new_patch_size

        self.proj = nn.Conv2d(in_channels,
                              self.embed_dims_inner,
                              kernel_size=7,
                              padding=3,
                              stride=stride)
        self.unfold = nn.Unfold(kernel_size=new_patch_size,
                                stride=new_patch_size)
Пример #22
0
    def __init__(self, phase, base, head, num_classes):
        super(S3FD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        '''
        self.priorbox = PriorBox(size,cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        '''
        # SSD network
        self.conv = ConvBNReLU(1, 4, stride=2)

        self.unfold = nn.Unfold(kernel_size=(8, 8), stride=(4, 4))

        self.rnn_model = RNNPool(8, 8, 16, 16, 4)  #num_init_features)

        self.mob = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm3_3 = L2Norm(32, 10)
        self.L2Norm4_3 = L2Norm(32, 8)
        self.L2Norm5_3 = L2Norm(96, 5)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if self.phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
    def __init__(self,
                 channels,
                 kernel_size,
                 stride,
                 group_ch=16,
                 red_ratio=2,
                 **kwargs):
        super().__init__(**kwargs)
        self.in_channels = channels
        self.out_channels = channels
        self.stride = stride
        self.kernel_size = kernel_size
        self.red_ratio = red_ratio
        self.group_ch = group_ch
        self.groups = channels // self.group_ch
        self.dilation = 1
        self.padding = (kernel_size - 1) // 2

        self.out = nn.AvgPool2d(stride,
                                stride) if self.stride > 1 else nn.Identity()
        self.reduce = nn.Conv2d(channels,
                                channels // self.red_ratio,
                                kernel_size=1)
        self.span = nn.Conv2d(channels // self.red_ratio,
                              kernel_size**2 * self.groups,
                              kernel_size=1,
                              stride=1)
        self.unfold = nn.Unfold(kernel_size, self.dilation, self.padding,
                                self.stride)

        # dynamic kernel generation function
        '''
Пример #24
0
    def forward(self, query, key, value, mask):
        """Compute 'Scaled Dot Product Attention'
        :param torch.Tensor query: (batch, time1, size)
        :param torch.Tensor key: (batch, time2, size)
        :param torch.Tensor value: (batch, time2, size)
        :param torch.Tensor mask: (batch, time1)
        :param torch.nn.Dropout dropout:
        :return torch.Tensor: attentined and transformed `value` (batch, time1, d_model)
             weighted by the query dot key attention (batch, head, time1, time2)
        """
        n_batch = query.size(0)
        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
        v = v.transpose(1, 2)  # (batch, head, time2, d_k)

        if self.restrict > 0:
            # TODO use stride or padding to make time2 equal to time1
            scale = k.shape[2] // q.shape[2]
            assert q.shape[2] == k.shape[
                2], "restricted attention is not implemented for source attention now"
            unfold = nn.Unfold(kernel_size=(self.restrict, 1),
                               stride=(1, 1),
                               padding=(self.restrict // 2, 0))
            # (batch, self.h * self.d_k * self.restrict, time2)
            k = unfold(
                k.transpose(2, 3).contiguous().view(n_batch, self.h * self.d_k,
                                                    -1, 1))
            # (batch, self.h, time2, self.d_k, self.restrict)
            k = k.view(n_batch, self.h, self.d_k, self.restrict,
                       -1).permute(0, 1, 4, 2, 3)
            # (batch, self.h * self.d_k * self.restrict, time2)
            v = unfold(
                v.transpose(2, 3).contiguous().view(n_batch, self.h * self.d_k,
                                                    -1, 1))
            # (batch, self.h, time2, self.restrict, self.d_k)
            v = v.view(n_batch, self.h, self.d_k, self.restrict,
                       -1).transpose(2, 4)
            # (batch, head, time1, 1, d_k) x (batch, head, time1, d_k, self.restrict) -> (batch, head, time1, 1, self.restrict)
            scores = q.unsqueeze(-2).matmul(k) / math.sqrt(self.d_k)
            if mask is not None:
                mask = mask.unsqueeze(-1).unsqueeze(-1)
                self.attn_ = torch.softmax(
                    scores, dim=-1)  # (batch, head, time1, time2)
                self.attn_ = self.attn_.masked_fill(mask == 0, 0)
        else:
            # (batch, head, time1, d_k) x (batch, head, d_k, time2) -> (batch, head, time1, time2)
            scores = q.matmul(k.transpose(-2, -1)) / math.sqrt(self.d_k)
            if mask is not None:
                mask = mask.unsqueeze(1)
                scores = scores.masked_fill(mask == 0, MIN_VALUE)
            self.attn_ = torch.softmax(scores,
                                       dim=-1)  # (batch, head, time1, time2)
        p_attn = self.dropout(self.attn_)
        x = torch.matmul(p_attn, v)  # (batch, head, time1, d_k)
        x = x.transpose(1, 2).contiguous().view(
            n_batch, -1, self.h * self.d_k)  # (batch, time1, d_model)
        return self.linear_out(x)  # (batch, time1, d_model)
Пример #25
0
    def __init__(self,
                 in_channel,
                 out_channel,
                 kernel_size,
                 stride=1,
                 padding=1,
                 dilation=1):
        super(depth_transform, self).__init__()
        # self.l1 = torch.nn.Linear(kernel_size*kernel_size*3, 64)
        # self.bn1 = nn.BatchNorm1d(64)
        # self.relu1 = nn.ReLU()
        # self.l2 = torch.nn.Linear(64, 9)
        # self.bn2 = nn.BatchNorm1d(9)
        # self.relu2 = nn.ReLU()
        # self.padding = padding
        # self.stride = stride
        # self.dilation = dilation
        # self.in_channel = in_channel
        # self.out_channel = out_channel
        # self.unfold = nn.Unfold(kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation)

        self.unfold = nn.Unfold(kernel_size=kernel_size,
                                stride=stride,
                                padding=padding,
                                dilation=dilation)
        self.l1 = nn.Linear(kernel_size * kernel_size * 3, 64)  # 输入是batch,L,C_
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(64, 9)
        self.padding = padding
        self.stride = stride
        self.dilation = dilation
        self.in_channel = in_channel
        self.out_channel = out_channel
 def __init__(self,
              patch_size,
              num_layers,
              h_dim,
              num_heads,
              num_classes,
              d_ff=2048,
              max_time_steps=None,
              use_clf_token=True,
              dropout=0.0,
              dropout_emb=0.0):
     super(ViT, self).__init__()
     self.proc = nn.Sequential(
         nn.Unfold((patch_size, patch_size),
                   stride=(patch_size, patch_size)),
         Transpose(1, 2),
         nn.Linear(3 * patch_size * patch_size, h_dim),
     )
     self.enc = ViTransformerEncoder(num_layers,
                                     h_dim,
                                     num_heads,
                                     d_ff=d_ff,
                                     max_time_steps=max_time_steps,
                                     use_clf_token=use_clf_token,
                                     dropout=dropout,
                                     dropout_emb=dropout_emb)
     self.mlp = nn.Linear(h_dim, num_classes)
Пример #27
0
    def __init__(self,
                 device,
                 mask,
                 imgsz,
                 kernel_size,
                 stride,
                 output_size,
                 bias=True):
        super(lp_pooling2d, self).__init__()
        self.mask = Parameter(mask, requires_grad=True)
        self.p_norm = Parameter(torch.zeros(output_size).add_(4),
                                requires_grad=True)

        self.eps = 1e-60
        self.sigmoid = nn.Sigmoid()
        self.temperture = 5
        self.pooling_operation = "Non_LSE"

        self.unfold = nn.Unfold(kernel_size=(kernel_size, kernel_size),
                                stride=stride)
        self.fold = nn.Fold(output_size=(imgsz // kernel_size,
                                         imgsz // kernel_size),
                            kernel_size=(1, 1))
        if bias:
            self.bias = Parameter(torch.Tensor(output_size))

        self.ondo_w = "True"
        self.exp_p = "True"
 def __init__(self, kernel_size=3, device="cpu"):
     super(GradientLoss, self).__init__()
     self.loss = nn.MSELoss()
     self.kernel_size = kernel_size
     self.pad_size = (self.kernel_size - 1) // 2
     self.unfold = nn.Unfold(self.kernel_size)
     self.device = device
Пример #29
0
    def __init__(
            self,
            in_channels,  # Input channels to convolution
            out_channels,  # Output channels from convolution
            kernel_size=1,  # Filter size
            stride=1,  # Stride
            padding=0,  # Padding
            dilation=1):  # Dilation

        super(PatchMMConvolution, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.padding = _pair(padding)
        self.stride = _pair(stride)
        self.dilation = _pair(dilation)

        # Initialize parameters of the layer
        self.unfold = nn.Unfold(self.kernel_size, self.dilation, self.padding,
                                self.stride)
        self.weight = nn.Parameter(
            torch.Tensor(self.out_channels, self.in_channels,
                         self.kernel_size[0], self.kernel_size[1]))
        self.bias = nn.Parameter(torch.Tensor(self.out_channels))

        self.reset_parameters()
Пример #30
0
    def __init__(self,
                 dim,
                 num_heads,
                 kernel_size=3,
                 padding=1,
                 stride=1,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop=0.,
                 proj_drop=0.):
        super().__init__()
        head_dim = dim // num_heads
        self.num_heads = num_heads
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.scale = qk_scale or head_dim**-0.5

        self.v = nn.Linear(dim, dim, bias=qkv_bias)
        self.attn = nn.Linear(dim, kernel_size**4 * num_heads)

        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        self.unfold = nn.Unfold(kernel_size=kernel_size,
                                padding=padding,
                                stride=stride)
        self.pool = nn.AvgPool2d(kernel_size=stride,
                                 stride=stride,
                                 ceil_mode=True)