def __init__(
        self,
        num_input_channel: int,
        base_block_name: str,
        num_blocks_in_conv_layer: list,
        *,
        num_classes: Optional[int] = None,
    ) -> None:

        assert (num_input_channel > 0), name_with_msg(
            self, "`num_input_channel` should be specified and greater than 0")
        assert (base_block_name in ResNetConfig.available_base_blocks(
        )), name_with_msg(
            self,
            f"`base_block_name` ({base_block_name}) should be one as listed below: \n{ResNetConfig.available_base_blocks()}"
        )
        assert (len(num_blocks_in_conv_layer) == 4), name_with_msg(
            "The length of `num_blocks_in_conv_layer` must be qual to 4 for conv_2 to conv_5"
        )

        if num_classes is not None:
            assert (num_classes > 0), name_with_msg(
                "`num_classes` should be specified and greater than 0")

        self.num_input_channel = num_input_channel
        self.base_block_name = base_block_name

        # make code more readable
        conv_keys = [f"conv_{idx}" for idx in range(2, 5 + 1)]
        self.num_blocks_in_conv_layer = dict(
            zip(conv_keys, num_blocks_in_conv_layer))

        self.num_classes = num_classes
示例#2
0
    def __init__(self,
                 image_size,
                 image_channel,
                 patch_size,
                 use_patch_and_flat=True):
        super().__init__()

        assert image_size is not None, name_with_msg(
            self, "Please specify input images' size")
        assert patch_size is not None, name_with_msg(
            self, "Please specify patches' size")

        self.patch_size = patch_size
        self.patch_dim = (patch_size**2) * image_channel
        self.num_patches = (image_size // patch_size)**2

        assert ((self.num_patches**0.5) *
                patch_size == image_size), name_with_msg(
                    self, "Image size must be divided by the patch size")

        if use_patch_and_flat:
            self.patch_and_flat = Rearrange(
                "b c (h p) (w q) -> b (h w) (p q c)",
                p=self.patch_size,
                q=self.patch_size)
示例#3
0
    def __init__(
        self,
        image_size: int,
        image_channel: int,
        patch_size: int,
        num_layers_in_stages: List[int],
        num_channels: List[int],
        expand_scales: List[int],
        kernel_size_on_heads: Dict[int, int],
        heads: Optional[int] = None,
    ) -> None:
        super().__init__(image_size,
                         image_channel,
                         patch_size,
                         use_patch_and_flat=False)

        self.image_channel = image_channel
        self.num_stages = len(num_layers_in_stages)
        self.num_layers_in_stages = num_layers_in_stages
        self.num_channels = num_channels
        self.expand_scales = expand_scales
        self.patch_sizes = [self.patch_size, *((2, ) * (self.num_stages - 1))]

        if heads is not None:
            assert (heads == sum(kernel_size_on_heads.values(
            ))), name_with_msg(
                f"Number of heads should be equal for `heads` ({heads}) and the sum of values of `kernel_size_on_heads` ({sum(kernel_size_on_heads.values())})"
            )
        self.heads = heads or sum(kernel_size_on_heads.values())
        self.kernel_size_on_heads = kernel_size_on_heads
示例#4
0
    def __init__(
        self,
        dim: int,
        kernel_size_on_heads: Dict[int, int],
        heads: Optional[int] = None,
        head_dim: Optional[int] = None,
        use_cls: bool = True,
        use_bias: bool = True,
        conv_relative_postion_encoder: Optional[nn.Module] = None,
        attention_dropout: float = 0.,
        ff_dropout: float = 0.,
    ) -> None:
        super().__init__()

        assert (heads is not None or head_dim is not None), name_with_msg(
            self,
            f"Either `heads` ({heads}) or `head_dim` ({head_dim}) must be specified"
        )

        self.heads = heads if heads is not None else dim // head_dim
        head_dim = head_dim if head_dim is not None else dim // heads

        assert (head_dim * self.heads == dim), name_with_msg(
            self,
            f"Head dimension ({head_dim}) times the number of heads ({self.heads}) must be equal to embedding dimension ({dim})"
        )

        self.relative_position_encoder = ConvolutionalRelativePositionEncoding(
            dim,
            heads,
            head_dim,
            kernel_size_on_heads=kernel_size_on_heads,
            use_cls=use_cls
        ) if conv_relative_postion_encoder is None else conv_relative_postion_encoder

        self.QKV = nn.Linear(dim, 3 * dim, bias=use_bias)
        self.out_linear = nn.Linear(dim, dim)

        self.attention_dropout = nn.Dropout(attention_dropout)
        self.out_dropout = nn.Dropout(ff_dropout)

        self.scale = head_dim**(-0.5)
        self.use_cls = use_cls
    def _get_relative_indices(self, height: int, width: int) -> torch.tensor:
        height, width = int(height), int(width)
        ticks_y, ticks_x = torch.arange(height), torch.arange(width)
        grid_y, grid_x = torch.meshgrid(ticks_y, ticks_x)
        out = torch.empty(height * width, height * width).fill_(float("nan"))

        for idx_y in range(height):
            for idx_x in range(width):
                rel_indices_y = grid_y - idx_y + height
                rel_indices_x = grid_x - idx_x + width
                flatten_indices = (rel_indices_y * width +
                                   rel_indices_x).flatten()
                out[idx_y * width + idx_x] = flatten_indices

        assert (not out.isnan().any()), name_with_msg(
            self, "`relative_indices` have blank indices")

        assert ((out >= 0).all()), name_with_msg(
            self, "`relative_indices` have negative indices")

        return out.to(torch.long)
    def __init__(self, image_height, image_width, num_blocks_in_layers,
                 num_channels_in_layers, block_type_in_layers,
                 expand_scale_in_layers):
        super().__init__()

        assert (len(num_blocks_in_layers) == 5), name_with_msg(
            self, "The length of `num_blocks_in_layers` must be 5")

        if isinstance(num_channels_in_layers, list):
            assert (len(num_channels_in_layers) == 5), name_with_msg(
                self, "The length of `num_channels_in_layers` must be 5")
        else:
            begin_channel = int(num_channels_in_layers)
            num_channels_in_layers = [
                int(begin_channel // (2**layer_idx))
                for layer_idx in range(0, 5)
            ]

        # We ignore `S0` here, so the length of the below lists should be 4
        assert (len(block_type_in_layers) == 4), name_with_msg(
            self, "The length of `block_type_in_layers` must be 4")

        if isinstance(expand_scale_in_layers, list):
            assert (len(expand_scale_in_layers) == 4), name_with_msg(
                self, "The length of `expand_scale_in_layers` must be 4")
        else:
            expand_scale = int(expand_scale_in_layers)
            expand_scale_in_layers = [expand_scale for _ in range(4)]

        self.height_in_layers = [
            int(image_height / (2**layer_idx)) for layer_idx in range(1, 6)
        ]
        self.width_in_layers = [
            int(image_width / (2**layer_idx)) for layer_idx in range(1, 6)
        ]
        self.num_blocks_in_layers = num_blocks_in_layers
        self.num_channels_in_layers = num_channels_in_layers
        self.block_type_in_layers = block_type_in_layers
        self.expand_scale_in_layers = expand_scale_in_layers
示例#7
0
    def __init__(
        self,
        dim: int,
        heads: Optional[int],
        head_dim: Optional[int],
        kernel_size_on_heads: Dict[int, int] = {
            3: 2,
            5: 3,
            7: 3
        },  # From: https://github.com/mlpc-ucsd/CoaT/blob/main/src/models/coat.py#L358
        use_cls: bool = True,
    ) -> None:
        super().__init__()

        head_list = list(kernel_size_on_heads.values())
        if heads is None and head_dim is None:
            if any([True if h is None or h <= 0 else False
                    for h in head_list]):
                raise ValueError(
                    "Please specify exact number (integers that are greater than 0) of heads for each kernel size when `heads` and `head_dim` are None."
                )

            self.heads = sum(head_list)
        else:
            self.heads = heads or dim // head_dim

        self.head_dim = head_dim or dim // self.heads

        assert (dim // self.heads == self.head_dim), name_with_msg(
            f"`dim` ({dim}) can't be divided by `heads` ({self.heads}). Please check `heads`, `head_dim`, or `kernel_size_on_heads`."
        )

        self.depth_wise_conv_list = nn.ModuleList([
            nn.Conv2d(
                self.head_dim * num_heads,
                self.head_dim * num_heads,
                kernel_size=kernel_size,
                stride=1,
                padding=kernel_size // 2,
                groups=self.head_dim * num_heads,
            ) for kernel_size, num_heads in kernel_size_on_heads.items()
        ])
        self.split_list = [
            num_heads * self.head_dim
            for num_heads in kernel_size_on_heads.values()
        ]

        self.use_cls = use_cls
    def __init__(
        self,
        depth_scale: float,
        width_scale: float,
        resolution: int,
        up_sampling_mode: Optional[
            str] = None,  # Check out: https://pytorch.org/docs/stable/generated/torch.nn.Upsample.html?highlight=up%20sample#torch.nn.Upsample
        return_feature_maps: bool = False,
        num_layers: Optional[List[int]] = None,
        channels: Optional[List[int]] = None,
        kernel_sizes: Optional[List[int]] = None,
        strides: Optional[List[int]] = None,
        expand_scales: Optional[List[Optional[int]]] = None,
        se_scales: Optional[List[Optional[int]]] = None,
        se_scale: Optional[float] = 0.25,
    ) -> None:
        super().__init__()

        # Table 1. from the official paper (all stages)
        self.num_layers = self.scale_and_round_layers(
            num_layers if num_layers is not None else
            [1, 1, 2, 2, 3, 3, 4, 1, 1], depth_scale)
        self.channels = self.scale_and_round_channels(
            channels if channels is not None else
            [32, 16, 24, 40, 80, 112, 192, 320, 1280], width_scale)
        self.kernel_sizes = kernel_sizes if kernel_sizes is not None else [
            3, 3, 3, 5, 3, 5, 5, 3, 1
        ]
        self.strides = strides if strides is not None else [
            1, 2, 1, 2, 2, 2, 1, 2, 1
        ]
        self.expand_scales = expand_scales if expand_scales is not None else [
            None, 1, 6, 6, 6, 6, 6, 6, None
        ]

        assert (se_scales is not None or se_scale is not None), name_with_msg(
            "Either `se_scales` or `se_scale` should be specified")

        self.se_scales = se_scales if se_scales is not None else [
            None, *((se_scale, ) * 7), None
        ]

        self.resolution = resolution
        # From: https://github.com/tensorflow/tpu/blob/3679ca6b979349dde6da7156be2528428b000c7c/models/official/efficientnet/preprocessing.py#L88
        # The default for resizing is `bicubic`
        self.up_sampling_mode = up_sampling_mode
        self.return_feature_maps = return_feature_maps
    def __init__(self, input_channel, channel_in_between,
                 num_res_blocks_in_between, vit_input_size, **kwargs):
        super().__init__()

        assert len(channel_in_between) >= 1, name_with_msg(
            self,
            "Please specify the number of channels for at least 1 layer.")

        channel_in_between = [input_channel] + channel_in_between
        self.layers = nn.ModuleList([
            TransUNetEncoderConvBlock(channel_in_between[idx],
                                      channel_in_between[idx + 1],
                                      num_res_blocks_in_between[idx])
            for idx in range(len(channel_in_between) - 1)
        ])
        self.vit = TransUNetViT(image_size=vit_input_size,
                                image_channel=channel_in_between[-1],
                                **kwargs)
示例#10
0
    def forward(self, *args: List[torch.Tensor],
                sizes: Tuple[Tuple[int, int]]) -> List[torch.Tensor]:
        num_inputs = len(args)
        assert (num_inputs == len(self.serial_block_list)), name_with_msg(
            self,
            f"The number of inputs ({num_inputs}) should be aligned with the number of feature maps ({len(self.serial_block_list)})"
        )

        #
        args = [
            conv_position_encoder(x, H, W)
            for x, H, W, conv_position_encoder in zip(
                args, sizes, self.conv_position_encoder)
        ]

        #
        args = [norm(x) for x, norm in zip(args, self.norm_0)]
        args = [
            conv_attn_module(x, H, W) for x, H, W, conv_attn_module in zip(
                args, sizes, self.conv_attn_module)
        ]

        for idx in range(num_inputs):
            args[idx] = torch.stack(
                [self.interpolate(x, size=sizes[idx]) for x in args],
                dim=0).sum(dim=0)

        args = [
            x + path_dropout(x)
            for x, path_dropout in zip(args, self.path_dropout_0)
        ]

        #
        args = [norm(x) for x, norm in zip(args, self.norm_1)]
        args = [ff_block(x) for x, ff_block in zip(args, self.ff_block)]
        args = [
            x + path_dropout(x)
            for x, path_dropout in zip(args, self.path_dropout_1)
        ]

        return args
    def __init__(
        self,
        in_channel: int,
        out_channel: Optional[int] = None,
        dimension: int = 2,
    ) -> None:
        super().__init__()

        self.in_channel = in_channel
        self.out_channel = out_channel if out_channel is not None else in_channel

        assert ((0 < dimension) and (dimension < 4)), name_with_msg(
            self, "`dimension` must be larger than 0 and smaller than 4")

        self.dimension = dimension

        if self.dimension == 1:
            self.conv = nn.Conv1d
        elif self.dimension == 2:
            self.conv = nn.Conv2d
        else:
            self.conv = nn.Conv3d
示例#12
0
    def __init__(
        self,
        image_channel: int,
        patch_size: int,
        dim: int,
    ) -> None:
        super().__init__()

        assert (log2(patch_size) == int(log2(patch_size))), name_with_msg(
            f"`patch_size: {patch_size} can't be divided by 2")

        base_dimension_scale = 1 / (patch_size // 2)
        num_layers = int(log2(patch_size))

        self.proj = nn.Sequential(*[
            nn.Conv2d(int(dim * base_dimension_scale *
                          2**((idx // 2) - 1)) if idx != 0 else image_channel,
                      int(dim * base_dimension_scale * 2**(idx // 2)),
                      kernel_size=3,
                      stride=2,
                      padding=1) if idx % 2 == 0 else nn.GELU()
            for idx in range(num_layers * 2 - 1)
        ])
    def __init__(
        self,
        in_channel: int,
        out_channel: Optional[int] = None,
        expand_channel: Optional[int] = None,
        expand_scale: Optional[int] = None,
        kernel_size: int = 3,
        stride: int = 1,
        padding: int = 1,
        norm_layer_name: str = "BatchNorm2d",
        act_fnc_name: str = "SiLU",
        se_scale: Optional[float] = None,
        se_act_fnc_name: str = "SiLU",
        dimension: int = 2,
        path_dropout: float = 0.,
        expansion_head_type: Literal["pixel_depth", "fused"] = "pixel_depth",
        **
        kwargs  # For example: `eps` and `elementwise_affine` for `nn.LayerNorm`
    ):
        super().__init__(in_channel, out_channel, dimension=dimension)

        assert (
            expand_channel is not None or expand_scale is not None
        ), name_with_msg(
            self,
            "Either `expand_channel` or `expand_scale` should be specified")
        expand_channel = expand_channel if expand_channel is not None else in_channel * expand_scale

        assert (
            isinstance(expansion_head_type, str)
            and expansion_head_type in ["pixel_depth", "fused"]
        ), name_with_msg(
            f"The specified `expansion_head_type` - {expansion_head_type} ({type(expansion_head_type)}) doesn't exist.\n \
            Please choose from here: ['pixel_depth', 'fused']")

        # Expansion Head
        if expansion_head_type == "pixel_depth":
            pixel_wise_conv_0 = nn.Sequential(
                self.conv(self.in_channel,
                          expand_channel,
                          kernel_size=1,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

            depth_wise_conv = nn.Sequential(
                self.conv(expand_channel,
                          expand_channel,
                          kernel_size,
                          stride=stride,
                          padding=padding,
                          groups=expand_channel,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

            self.expansion_head = nn.Sequential(pixel_wise_conv_0,
                                                depth_wise_conv)
        else:
            self.expansion_head = nn.Sequential(
                nn.Conv2d(self.in_channel,
                          expand_channel,
                          kernel_size,
                          stride=stride,
                          padding=padding,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

        #
        self.se_block = None
        if se_scale is not None:
            bottleneck_channel = int(expand_channel * se_scale)

            self.se_block = SEConvXd(
                expand_channel,
                bottleneck_channel,
                se_act_fnc_name=se_act_fnc_name,
            )

        #
        self.pixel_wise_conv_1 = nn.Sequential(
            self.conv(
                expand_channel,
                self.out_channel,
                kernel_size=1,
                bias=False,
            ),
            get_attr_if_exists(nn, norm_layer_name)(self.out_channel,
                                                    **kwargs))

        # From: https://github.com/tensorflow/tpu/blob/3679ca6b979349dde6da7156be2528428b000c7c/models/official/efficientnet/utils.py#L276
        # It's a batch-wise dropout
        self.path_dropout = PathDropout(path_dropout)
        self.skip = True if self.in_channel == self.out_channel and stride == 1 else False