def __init__(self, in_planes, rel_planes, out_planes, share_planes, sa_type=0, kernel_size=3, stride=1, dilation=1): super(SAM, self).__init__() self.sa_type, self.kernel_size, self.stride = sa_type, kernel_size, stride self.conv1 = nn.Conv2d(in_planes, rel_planes, kernel_size=1) self.conv2 = nn.Conv2d(in_planes, rel_planes, kernel_size=1) self.conv3 = nn.Conv2d(in_planes, out_planes, kernel_size=1) if sa_type == 0: self.conv_w = nn.Sequential( nn.LeakyReLU(0.2), nn.Conv2d(rel_planes + 2, rel_planes, kernel_size=1, bias=False), nn.Conv2d(rel_planes, out_planes // share_planes, kernel_size=1)) self.conv_p = nn.Conv2d(2, 2, kernel_size=1) self.subtraction = Subtraction( kernel_size, stride, (dilation * (kernel_size - 1) + 1) // 2, dilation, pad_mode=1) self.subtraction2 = Subtraction2( kernel_size, stride, (dilation * (kernel_size - 1) + 1) // 2, dilation, pad_mode=1) self.softmax = nn.Softmax(dim=-2) else: self.conv_w = nn.Sequential( nn.LeakyReLU(0.2), nn.Conv2d(rel_planes * (pow(kernel_size, 2) + 1), out_planes // share_planes, kernel_size=1, bias=False), nn.Conv2d(out_planes // share_planes, pow(kernel_size, 2) * out_planes // share_planes, kernel_size=1)) self.unfold_i = nn.Unfold(kernel_size=1, dilation=dilation, padding=0, stride=stride) self.unfold_j = nn.Unfold(kernel_size=kernel_size, dilation=dilation, padding=0, stride=stride) self.pad = nn.ReflectionPad2d(kernel_size // 2) self.aggregation = Aggregation(kernel_size, stride, (dilation * (kernel_size - 1) + 1) // 2, dilation, pad_mode=1)
def make_patches(tensor, patch_size=16, scale=4): #mask = torch.ones_like(tensor) tensor = tensor.unsqueeze(0) stride = patch_size // 2 wo = tensor.size(2) ho = tensor.size(3) wn = wo + stride - (wo % stride) hn = ho + stride - (ho % stride) if stride - (wo % stride) > 0 and stride - (wo % stride) < stride: #pad = nn.ReplicationPad2d((0,0,0,wn-wo)) #tensor = pad(tensor) tensor = np.pad(tensor, ((0, 0), (0, 0), (0, wn - wo), (0, 0)), mode='edge') tensor = torch.from_numpy(tensor) else: wn = wo if stride - (ho % stride) > 0 and stride - (ho % stride) < stride: #pad = nn.ReplicationPad2d((0,hn-ho,0,0)) #tensor = pad(tensor) tensor = np.pad(tensor, ((0, 0), (0, 0), (0, 0), (0, hn - ho)), mode='edge') tensor = torch.from_numpy(tensor) else: hn = ho mask = torch.ones( (tensor.size()[0], tensor.size()[1], tensor.size()[2] * scale, tensor.size()[3] * scale)) # use torch.nn.Unfold unfold = nn.Unfold(kernel_size=(patch_size, patch_size), stride=stride) unfold2 = nn.Unfold(kernel_size=(patch_size * scale, patch_size * scale), stride=stride * scale) # Apply to mask and original image mask_p = unfold2(mask) patches = unfold(tensor) patches = patches.reshape(3, patch_size, patch_size, -1).permute(3, 0, 1, 2) if tensor.is_cuda: patches_base = torch.zeros( (patches.size()[0], patches.size()[1], patches.size()[2] * scale, patches.size()[3] * scale), device=tensor.get_device()) else: patches_base = torch.zeros( (patches.size()[0], patches.size()[1], patches.size()[2] * scale, patches.size()[3] * scale)) tiles = [] for t in range(patches.size(0)): tiles.append(torch.squeeze(patches[[t], :, :, :])) return tiles, mask_p, patches_base, (tensor.size(2) * scale, tensor.size(3) * scale), ((wn - wo) * scale, (hn - ho) * scale)
def __init__(self, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, groups=1, mixtures=1, bias=False): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.padding = padding self.stride = stride self.dilation = dilation self.groups = groups self.mixtures = mixtures self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=groups, bias=bias) self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=groups, bias=bias) self.conv3 = nn.Conv2d(in_channels, out_channels * mixtures, kernel_size=1, groups=groups, bias=bias) self.row_embeddings = nn.Parameter( torch.randn(out_channels, kernel_size)) self.col_embeddings = nn.Parameter( torch.randn(out_channels, kernel_size)) self.mix_embeddings = nn.Parameter(torch.randn(out_channels, mixtures)) self.unfold1 = nn.Unfold(kernel_size=1, stride=stride) self.unfold2 = nn.Unfold(kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation) self.unfold3 = nn.Unfold(kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation)
def __init__(self, in_ch, dy_filter_size=9): super(Deform_DFN, self).__init__() self._filter_size = dy_filter_size self.filter = dy_filter_size**2 self.en1 = single_conv(in_ch, 32) self.en2 = single_conv(32, 32, stride=2) self.en3 = single_conv(32, 64) self.en4 = single_conv(64, 64, stride=2) self.en5 = single_conv(64, 64) self.mid1 = single_conv(64, 128) self.mid2 = single_conv(128, 128) self.mid_h1 = single_conv(128, 128) self.mid_h2 = single_conv(128, 128) self.de1 = single_conv(128, 64) self.de2 = single_conv(64, 64) self.de_up1 = nn.Upsample(scale_factor=2, mode='nearest') self.de3 = single_conv(64, 64) self.de_up2 = nn.Upsample(scale_factor=2, mode='nearest') self.de4 = single_conv(64, 64) self.de5 = single_conv(64, 128, kernel_size=1, padding=0) self.dyf = nn.Conv2d(128, self.filter, kernel_size=1, stride=1, padding=0) self.de1_2 = single_conv(128, 64) self.de2_2 = single_conv(64, 64) self.de_up1_2 = nn.Upsample(scale_factor=2, mode='nearest') self.de3_2 = single_conv(64, 64) self.de_up2_2 = nn.Upsample(scale_factor=2, mode='nearest') self.de4_2 = single_conv(64, 64) self.de5_2 = single_conv(64, 128, kernel_size=1, padding=0) self.dyf_2 = nn.Conv2d(128, 2 * self.filter, kernel_size=1, stride=1, padding=0) self.unfold = nn.Unfold(kernel_size=self._filter_size, padding=self._filter_size // 2) self.deform = DeformConv2D(1, 1, kernel_size=self._filter_size) self.unfold_deform = nn.Unfold(kernel_size=(self._filter_size, self._filter_size), padding=self._filter_size // 2, stride=self._filter_size)
def __init__(self, in_channels, out_channels, kernel_size, heads=4, stride=1): super(SASAConv2d, self).__init__() assert heads > 0, 'SASAConv2d requires a positive number of heads' assert type( kernel_size) == int, 'SASAConv2d requires integer kernel_size' assert out_channels % heads == 0, 'SASAConv2d requires out_channels divisible by the number of heads' padding = (kernel_size - 1) // 2 self.heads = heads self.kernel_size = kernel_size self.out_channels = out_channels self.q_conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), nn.Unfold(1, 1, 0, stride), Rearrange('N (M D) HW -> (N HW M) () D', M=self.heads)) self.q_conv.apply(init_weights) self.k_conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), nn.Unfold(kernel_size, 1, padding, stride), RelativeEmbeddings2d(extent=kernel_size, embedding_size=out_channels), Rearrange('N (M D KK) HW -> (N HW M) D KK', M=self.heads, KK=self.kernel_size**2)) self.k_conv.apply(init_weights) self.v_conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), nn.Unfold(kernel_size, 1, padding, stride), Rearrange('N (M D KK) HW -> (N HW M) KK D', M=self.heads, KK=self.kernel_size**2)) self.v_conv.apply(init_weights)
def __init__(self, num_frames, patchsize, nh_size, img_size): # -- init vars -- self.num_frames = num_frames self._patchsize = patchsize self.nh_size = nh_size # number of patches around center pixel self.img_size = img_size # -- unfold input patches -- padding, stride, ipad = 1, 1, self.ps // 2 self.unfold_input = nn.Unfold(self._patchsize, 1, padding, stride) # -- create grid to compute indice -- index_grid = torch.arange(0, img_size**2).reshape(1, 1, img_size, img_size) self.index_grid = F.pad(index_grid.type(torch.float), (ipad, ipad, ipad, ipad), mode='reflect')[0, 0].type(torch.long) self.index_pad = (self.index_grid.shape[0] - self.img_size) // 2 # -- indexing bursts -- self.midx = num_frames // num_frames if num_frames != 2 else 1 self.no_mid_idx = np.r_[np.r_[:self.midx], np.r_[self.midx + 1:num_frames]] self.no_mid_idx = torch.LongTensor(self.no_mid_idx)
def __init__(self, channels, output_channels, scale_factor, up_kernel=5, up_group=1, encoder_kernel=3, encoder_dilation=1, compressed_channels=64): super(CARAFEPack, self).__init__() self.channels = channels self.scale_factor = scale_factor self.up_kernel = up_kernel self.up_group = up_group self.encoder_kernel = encoder_kernel self.encoder_dilation = encoder_dilation self.compressed_channels = compressed_channels self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, 1) self.content_encoder = nn.Conv2d( self.compressed_channels, self.up_kernel * self.up_kernel * self.up_group * self.scale_factor * self.scale_factor, self.encoder_kernel, padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), dilation=self.encoder_dilation, groups=1) self.upsample = nn.Upsample(scale_factor=self.scale_factor, mode='nearest') self.unfold = nn.Unfold(kernel_size=self.up_kernel, dilation=self.scale_factor, padding=self.up_kernel // 2 * self.scale_factor) self.proj = nn.Conv2d(channels, output_channels, 1) self.init_weights()
def test_complex_1F(): unfold = nn.Unfold(kernel_size=(2, 3)) input = torch.randn(2, 5, 3, 4) output = unfold(input) output_h = unfold(input.hammerblade()) assert output_h.device == torch.device("hammerblade") assert torch.allclose(output, output_h.cpu())
def flatten_patches(image,ps=3): unfold = nn.Unfold(ps,1,0,1) image_pad = F.pad(image,(ps//2,ps//2,ps//2,ps//2),mode='reflect') patches = unfold(image_pad) patches = rearrange(patches,'b (c ps1 ps2) r -> b r (ps1 ps2 c)',ps1=ps,ps2=ps) patches = patches.contiguous() return patches
def __init__(self, channels, kernel_size, stride, dilation=1): super(Involution, self).__init__() self.kernel_size = kernel_size self.stride = stride self.dilation = dilation self.channels = channels reduction_ratio = 4 self.group_channels = 16 self.groups = self.channels // self.group_channels self.conv1 = ConvModule(in_channels=channels, out_channels=channels // reduction_ratio, kernel_size=1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')) self.conv2 = ConvModule(in_channels=channels // reduction_ratio, out_channels=kernel_size**2 * self.groups, kernel_size=1, stride=1, conv_cfg=None, norm_cfg=None, act_cfg=None) if stride > 1: self.avgpool = nn.AvgPool2d(stride, stride) self.unfold = nn.Unfold(kernel_size, dilation, (self.kernel_size + (self.kernel_size - 1) * (self.dilation - 1) - 1) // 2, stride)
def __init__(self): super(Net, self).__init__() # self.lk1 = LongConv(10, 12288) # self.lk2 = LongConv(10, 108300) # self.lk3 = LongConv(100, 3072) # self.lk4 = LongConv(50, 3072) # self.lk5 = LongConv(50, 3072) # self.lk6 = LongConv(50, 3072) # # self.lc = Looper(100, 3072) # # self.lm = LongMem(1, 3072) # self.lm = LongMem(10, 12288, 3, 3) # self.lm2 = LongMem(10, 13872, 3, 3) # self.lm3 = LongMem(10, 15552, 3, 3) # # self.conv = nn.Conv2d(3, 3, (3, 3), stride=1) # self.conv2 = nn.Conv2d(3, 3, (3, 3), stride=1) # self.conv3 = nn.Conv2d(3, 3, (3, 3), stride=1) # # self.tconv = nn.ConvTranspose2d(3, 3, (10, 10)) # self.tconv2 = nn.ConvTranspose2d(3, 3, (10, 10)) # self.m1 = MemA(10, 75*784, 75 * 15376) # self.m2 = MemA(10, 75 * 784, 75 * 15376) # self.m3 = MemA(10, 75 * 784, 75 * 15376) # self.m4 = MemA(10, 32 * 32 * 3, 128 * 128 * 3) # self.m5 = MemA(10, 16 * 16 * 3, 128 * 128 * 3) self.mb1 = MemB(10, 128 * 128 * 3, 128 * 128 * 3) # self.m2 = MemC(10, 32*32*3, 128*128*3) self.unfold = nn.Unfold(kernel_size=(5, 5)) self.fold = nn.Fold(kernel_size=(5, 5), output_size=(128, 128), stride=5) self.patches = nn.Parameter(torch.randn((1, 75, 15376)))
def __init__(self, opt): super(DAML, self).__init__() self.opt = opt self.num_fea = 2 # ID + DOC self.user_word_embs = nn.Embedding(opt.vocab_size, opt.word_dim) # vocab_size * 300 self.item_word_embs = nn.Embedding(opt.vocab_size, opt.word_dim) # vocab_size * 300 # share self.word_cnn = nn.Conv2d(1, 1, (5, opt.word_dim), padding=(2, 0)) # document-level cnn self.user_doc_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.word_dim), padding=(1, 0)) self.item_doc_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.word_dim), padding=(1, 0)) # abstract-level cnn self.user_abs_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.filters_num)) self.item_abs_cnn = nn.Conv2d(1, opt.filters_num, (opt.kernel_size, opt.filters_num)) self.unfold = nn.Unfold((3, opt.filters_num), padding=(1, 0)) # fc layer self.user_fc = nn.Linear(opt.filters_num, opt.id_emb_size) self.item_fc = nn.Linear(opt.filters_num, opt.id_emb_size) self.uid_embedding = nn.Embedding(opt.user_num + 2, opt.id_emb_size) self.iid_embedding = nn.Embedding(opt.item_num + 2, opt.id_emb_size) self.reset_para()
def __init__(self, c, c_mid=64, scale=2, k_up=5, k_enc=3): """ The unofficial implementation of the CARAFE module. The details are in "https://arxiv.org/abs/1905.02188". Args: c: The channel number of the input and the output. c_mid: The channel number after compression. scale: The expected upsample scale. k_up: The size of the reassembly kernel. k_enc: The kernel size of the encoder. Returns: X: The upsampled feature map. """ super(CARAFE, self).__init__() self.scale = scale self.comp = ConvBNReLU(c, c_mid, kernel_size=1, stride=1, padding=0, dilation=1) self.enc = ConvBNReLU(c_mid, (scale * k_up)**2, kernel_size=k_enc, stride=1, padding=k_enc // 2, dilation=1, use_relu=False) self.pix_shf = nn.PixelShuffle(scale) self.upsmp = nn.Upsample(scale_factor=scale, mode='nearest') self.unfold = nn.Unfold(kernel_size=k_up, dilation=scale, padding=k_up // 2 * scale)
def __init__(self, kernel_size=5): super(kernel_computation, self).__init__() self.kernel_size = kernel_size self.unfolder = nn.Unfold(kernel_size=kernel_size, dilation=1, padding=kernel_size // 2, stride=1)
def sliding_window(images: torch.Tensor, patch_size: Tuple[int, int], stride: Tuple[int, int]) -> torch.Tensor: """Creates patches of an image. Args: images (torch.Tensor): A Torch tensor of a 4D image(s), i.e. (batch, channel, height, width). patch_size (Tuple[int, int]): The size of the patches to generate, e.g. 28x28 for EMNIST. stride (Tuple[int, int]): The stride of the sliding window. Returns: torch.Tensor: A tensor with the shape (batch, patches, height, width). """ unfold = nn.Unfold(kernel_size=patch_size, stride=stride) # Preform the sliding window, unsqueeze as the channel dimesion is lost. c = images.shape[1] patches = unfold(images) patches = rearrange( patches, "b (c h w) t -> b t c h w", c=c, h=patch_size[0], w=patch_size[1], ) return patches
def __init__(self, dim, num_heads=1, kernel_size=3, padding=1, stride=1, qkv_bias=False, attn_drop=0.1): super().__init__() self.dim = dim self.num_heads = num_heads self.head_dim = dim // num_heads self.kernel_size = kernel_size self.padding = padding self.stride = stride self.scale = self.head_dim**(-0.5) self.v_pj = nn.Linear(dim, dim, bias=qkv_bias) self.attn = nn.Linear(dim, kernel_size**4 * num_heads) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(attn_drop) self.unflod = nn.Unfold(kernel_size, padding, stride) #手动卷积 self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True)
def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4): super().__init__() img_size = to_2tuple(img_size) patch_size = to_2tuple(patch_size) # grid_size property necessary for resizing positional embedding self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) num_patches = (self.grid_size[0]) * (self.grid_size[1]) self.img_size = img_size self.num_patches = num_patches self.in_dim = in_dim new_patch_size = [math.ceil(ps / stride) for ps in patch_size] self.new_patch_size = new_patch_size self.proj = nn.Conv2d(in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride) self.unfold = nn.Unfold(kernel_size=new_patch_size, stride=new_patch_size)
def extract_patches(self, x, kernel_size, stride, dilation, padding='same'): if padding == 'same': pad_fn = same_padding elif padding == 'valid': pad_fn = get_pad else: raise NotImplementedError( 'Padding mode [{:s}] is not found'.format(padding)) pad_size = pad_fn(x.shape[2], x.shape[3], [kernel_size, kernel_size], [stride, stride], [dilation, dilation]) padding_layer = _padding(pad_type='zero', padding=pad_size * 2) x = padding_layer(x) unfold = nn.Unfold( kernel_size=kernel_size, stride=stride, padding=0, dilation=dilation, ) patches = unfold(x) return patches
def __init__(self, channels, compressed_channels=64, scale_factor=2, up_kernel=5, encoder_kernel=3): """ The unofficial implementation of the CARAFE module. The details are in "https://arxiv.org/abs/1905.02188". Args: channels c: The channel number of the input and the output. compressed_channels c_mid: The channel number after compression. scale_factor scale: The expected upsample scale. up_kernel k_up: The size of the reassembly kernel. encoder_kernel k_enc: The kernel size of the encoder. Returns: X: The upsampled feature map. """ super(CARAFE_3_sa_se, self).__init__() self.scale = scale_factor self.comp = ConvBNReLU(channels, compressed_channels, kernel_size=1, stride=1, padding=0, dilation=1) self.enc = ConvBNReLU(compressed_channels, (scale_factor * up_kernel) ** 2, kernel_size=encoder_kernel, stride=1, padding=encoder_kernel // 2, dilation=1, use_relu=False) self.pix_shf = nn.PixelShuffle(scale_factor) self.upsmp = nn.Upsample(scale_factor=scale_factor, mode='nearest') self.unfold = nn.Unfold(kernel_size=up_kernel, dilation=scale_factor, padding=up_kernel // 2 * scale_factor) # modified by zy 20210313 # c = 100 # self.fc1 = nn.Conv2d((scale_factor * up_kernel) ** 2, (scale_factor * up_kernel) ** 2 //16, kernel_size=1) # self.fc2 = nn.Conv2d((scale_factor * up_kernel) ** 2 //16, (scale_factor * up_kernel) ** 2, kernel_size=1) # modified by zy 20210316 self.sa = SpatialAttention() self.se = SE((scale_factor * up_kernel) ** 2, 16)
def __init__(self, cfg): super(Encoder, self).__init__() self.cnn = CNN() ## to use all grids self.unfold = nn.Unfold(1) self.linear = nn.Linear(512, cfg.vocab_size)
def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dims_inner=48, stride=4, init_cfg=None): super(PixelEmbed, self).__init__(init_cfg=init_cfg) img_size = to_2tuple(img_size) patch_size = to_2tuple(patch_size) # patches_resolution property necessary for resizing # positional embedding patches_resolution = [ img_size[0] // patch_size[0], img_size[1] // patch_size[1] ] num_patches = patches_resolution[0] * patches_resolution[1] self.img_size = img_size self.num_patches = num_patches self.embed_dims_inner = embed_dims_inner new_patch_size = [math.ceil(ps / stride) for ps in patch_size] self.new_patch_size = new_patch_size self.proj = nn.Conv2d(in_channels, self.embed_dims_inner, kernel_size=7, padding=3, stride=stride) self.unfold = nn.Unfold(kernel_size=new_patch_size, stride=new_patch_size)
def __init__(self, phase, base, head, num_classes): super(S3FD, self).__init__() self.phase = phase self.num_classes = num_classes ''' self.priorbox = PriorBox(size,cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) ''' # SSD network self.conv = ConvBNReLU(1, 4, stride=2) self.unfold = nn.Unfold(kernel_size=(8, 8), stride=(4, 4)) self.rnn_model = RNNPool(8, 8, 16, 16, 4) #num_init_features) self.mob = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm3_3 = L2Norm(32, 10) self.L2Norm4_3 = L2Norm(32, 8) self.L2Norm5_3 = L2Norm(96, 5) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if self.phase == 'test': self.softmax = nn.Softmax(dim=-1)
def __init__(self, channels, kernel_size, stride, group_ch=16, red_ratio=2, **kwargs): super().__init__(**kwargs) self.in_channels = channels self.out_channels = channels self.stride = stride self.kernel_size = kernel_size self.red_ratio = red_ratio self.group_ch = group_ch self.groups = channels // self.group_ch self.dilation = 1 self.padding = (kernel_size - 1) // 2 self.out = nn.AvgPool2d(stride, stride) if self.stride > 1 else nn.Identity() self.reduce = nn.Conv2d(channels, channels // self.red_ratio, kernel_size=1) self.span = nn.Conv2d(channels // self.red_ratio, kernel_size**2 * self.groups, kernel_size=1, stride=1) self.unfold = nn.Unfold(kernel_size, self.dilation, self.padding, self.stride) # dynamic kernel generation function '''
def forward(self, query, key, value, mask): """Compute 'Scaled Dot Product Attention' :param torch.Tensor query: (batch, time1, size) :param torch.Tensor key: (batch, time2, size) :param torch.Tensor value: (batch, time2, size) :param torch.Tensor mask: (batch, time1) :param torch.nn.Dropout dropout: :return torch.Tensor: attentined and transformed `value` (batch, time1, d_model) weighted by the query dot key attention (batch, head, time1, time2) """ n_batch = query.size(0) q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k) k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k) v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k) q = q.transpose(1, 2) # (batch, head, time1, d_k) k = k.transpose(1, 2) # (batch, head, time2, d_k) v = v.transpose(1, 2) # (batch, head, time2, d_k) if self.restrict > 0: # TODO use stride or padding to make time2 equal to time1 scale = k.shape[2] // q.shape[2] assert q.shape[2] == k.shape[ 2], "restricted attention is not implemented for source attention now" unfold = nn.Unfold(kernel_size=(self.restrict, 1), stride=(1, 1), padding=(self.restrict // 2, 0)) # (batch, self.h * self.d_k * self.restrict, time2) k = unfold( k.transpose(2, 3).contiguous().view(n_batch, self.h * self.d_k, -1, 1)) # (batch, self.h, time2, self.d_k, self.restrict) k = k.view(n_batch, self.h, self.d_k, self.restrict, -1).permute(0, 1, 4, 2, 3) # (batch, self.h * self.d_k * self.restrict, time2) v = unfold( v.transpose(2, 3).contiguous().view(n_batch, self.h * self.d_k, -1, 1)) # (batch, self.h, time2, self.restrict, self.d_k) v = v.view(n_batch, self.h, self.d_k, self.restrict, -1).transpose(2, 4) # (batch, head, time1, 1, d_k) x (batch, head, time1, d_k, self.restrict) -> (batch, head, time1, 1, self.restrict) scores = q.unsqueeze(-2).matmul(k) / math.sqrt(self.d_k) if mask is not None: mask = mask.unsqueeze(-1).unsqueeze(-1) self.attn_ = torch.softmax( scores, dim=-1) # (batch, head, time1, time2) self.attn_ = self.attn_.masked_fill(mask == 0, 0) else: # (batch, head, time1, d_k) x (batch, head, d_k, time2) -> (batch, head, time1, time2) scores = q.matmul(k.transpose(-2, -1)) / math.sqrt(self.d_k) if mask is not None: mask = mask.unsqueeze(1) scores = scores.masked_fill(mask == 0, MIN_VALUE) self.attn_ = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) p_attn = self.dropout(self.attn_) x = torch.matmul(p_attn, v) # (batch, head, time1, d_k) x = x.transpose(1, 2).contiguous().view( n_batch, -1, self.h * self.d_k) # (batch, time1, d_model) return self.linear_out(x) # (batch, time1, d_model)
def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=1, dilation=1): super(depth_transform, self).__init__() # self.l1 = torch.nn.Linear(kernel_size*kernel_size*3, 64) # self.bn1 = nn.BatchNorm1d(64) # self.relu1 = nn.ReLU() # self.l2 = torch.nn.Linear(64, 9) # self.bn2 = nn.BatchNorm1d(9) # self.relu2 = nn.ReLU() # self.padding = padding # self.stride = stride # self.dilation = dilation # self.in_channel = in_channel # self.out_channel = out_channel # self.unfold = nn.Unfold(kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation) self.unfold = nn.Unfold(kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation) self.l1 = nn.Linear(kernel_size * kernel_size * 3, 64) # 输入是batch,L,C_ self.bn1 = nn.BatchNorm1d(64) self.relu = nn.ReLU() self.l2 = nn.Linear(64, 9) self.padding = padding self.stride = stride self.dilation = dilation self.in_channel = in_channel self.out_channel = out_channel
def __init__(self, patch_size, num_layers, h_dim, num_heads, num_classes, d_ff=2048, max_time_steps=None, use_clf_token=True, dropout=0.0, dropout_emb=0.0): super(ViT, self).__init__() self.proc = nn.Sequential( nn.Unfold((patch_size, patch_size), stride=(patch_size, patch_size)), Transpose(1, 2), nn.Linear(3 * patch_size * patch_size, h_dim), ) self.enc = ViTransformerEncoder(num_layers, h_dim, num_heads, d_ff=d_ff, max_time_steps=max_time_steps, use_clf_token=use_clf_token, dropout=dropout, dropout_emb=dropout_emb) self.mlp = nn.Linear(h_dim, num_classes)
def __init__(self, device, mask, imgsz, kernel_size, stride, output_size, bias=True): super(lp_pooling2d, self).__init__() self.mask = Parameter(mask, requires_grad=True) self.p_norm = Parameter(torch.zeros(output_size).add_(4), requires_grad=True) self.eps = 1e-60 self.sigmoid = nn.Sigmoid() self.temperture = 5 self.pooling_operation = "Non_LSE" self.unfold = nn.Unfold(kernel_size=(kernel_size, kernel_size), stride=stride) self.fold = nn.Fold(output_size=(imgsz // kernel_size, imgsz // kernel_size), kernel_size=(1, 1)) if bias: self.bias = Parameter(torch.Tensor(output_size)) self.ondo_w = "True" self.exp_p = "True"
def __init__(self, kernel_size=3, device="cpu"): super(GradientLoss, self).__init__() self.loss = nn.MSELoss() self.kernel_size = kernel_size self.pad_size = (self.kernel_size - 1) // 2 self.unfold = nn.Unfold(self.kernel_size) self.device = device
def __init__( self, in_channels, # Input channels to convolution out_channels, # Output channels from convolution kernel_size=1, # Filter size stride=1, # Stride padding=0, # Padding dilation=1): # Dilation super(PatchMMConvolution, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.padding = _pair(padding) self.stride = _pair(stride) self.dilation = _pair(dilation) # Initialize parameters of the layer self.unfold = nn.Unfold(self.kernel_size, self.dilation, self.padding, self.stride) self.weight = nn.Parameter( torch.Tensor(self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1])) self.bias = nn.Parameter(torch.Tensor(self.out_channels)) self.reset_parameters()
def __init__(self, dim, num_heads, kernel_size=3, padding=1, stride=1, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): super().__init__() head_dim = dim // num_heads self.num_heads = num_heads self.kernel_size = kernel_size self.padding = padding self.stride = stride self.scale = qk_scale or head_dim**-0.5 self.v = nn.Linear(dim, dim, bias=qkv_bias) self.attn = nn.Linear(dim, kernel_size**4 * num_heads) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) self.unfold = nn.Unfold(kernel_size=kernel_size, padding=padding, stride=stride) self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True)