def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4): super().__init__() img_size = to_2tuple(img_size) patch_size = to_2tuple(patch_size) # grid_size property necessary for resizing positional embedding self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) num_patches = (self.grid_size[0]) * (self.grid_size[1]) self.img_size = img_size self.num_patches = num_patches self.in_dim = in_dim new_patch_size = [math.ceil(ps / stride) for ps in patch_size] self.new_patch_size = new_patch_size self.proj = nn.Conv2d(in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride) self.unfold = nn.Unfold(kernel_size=new_patch_size, stride=new_patch_size)
def __init__(self, patch_size=16, stride=16, padding=0, in_chans=3, embed_dim=768, norm_layer=None): super().__init__() patch_size = to_2tuple(patch_size) stride = to_2tuple(stride) padding = to_2tuple(padding) self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride, padding=padding) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None,exist_overlap:bool=False,slide_step=None): super().__init__() img_size = to_2tuple(img_size) patch_size = to_2tuple(patch_size) self.img_size = img_size if exist_overlap: self.num_patches = ((img_size[0] - patch_size[0]) // slide_step + 1) * ((img_size[1] - patch_size[1]) // slide_step + 1) self.proj = nn.Conv2d(in_channels=in_chans, out_channels=embed_dim, kernel_size=patch_size, stride=(slide_step, slide_step)) else: self.patch_size = patch_size self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) self.num_patches = self.grid_size[0] * self.grid_size[1] self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()