def __init__(self,
                 *,
                 dim_text=512,
                 dim_image=512,
                 dim_latent=512,
                 num_text_tokens=10000,
                 num_visual_tokens=512,
                 text_enc_depth=6,
                 visual_enc_depth=6,
                 text_seq_len=256,
                 visual_seq_len=1024,
                 text_heads=8,
                 visual_heads=8,
                 vae=None):
        super().__init__()
        self.text_emb = nn.Embedding(num_text_tokens, dim_text)
        self.text_pos_emb = nn.Embedding(text_seq_len, dim_text)
        self.text_transformer = Encoder(dim=dim_text,
                                        depth=text_enc_depth,
                                        heads=text_heads)
        self.to_text_latent = nn.Linear(dim_text, dim_latent, bias=False)

        self.visual_emb = nn.Embedding(num_visual_tokens, dim_image)
        self.visual_pos_emb = nn.Embedding(visual_seq_len, dim_image)
        self.visual_transformer = Encoder(dim=dim_image,
                                          depth=visual_enc_depth,
                                          heads=visual_heads)
        self.to_visual_latent = nn.Linear(dim_image, dim_latent, bias=False)

        self.temperature = nn.Parameter(torch.tensor(1.))

        self.vae = vae
        if exists(self.vae):
            self.vae = vae
            self.visual_emb = vae.codebook
Exemplo n.º 2
0
    def __init__(
        self,
        *,
        dim_text = 512,
        dim_image = 512,
        dim_latent = 512,
        num_text_tokens = 10000,
        text_enc_depth = 6,
        text_seq_len = 256,
        text_heads = 8,
        num_visual_tokens = 512,
        visual_enc_depth = 6,
        visual_heads = 8,
        visual_image_size = 256,
        visual_patch_size = 32,
        channels = 3
    ):
        super().__init__()
        self.text_emb = nn.Embedding(num_text_tokens, dim_text)
        self.text_pos_emb = nn.Embedding(text_seq_len, dim_text)
        self.text_transformer = Encoder(dim = dim_text, depth = text_enc_depth, heads = text_heads)
        self.to_text_latent = nn.Linear(dim_text, dim_latent, bias = False)

        assert visual_image_size % visual_patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (visual_image_size // visual_patch_size) ** 2
        patch_dim = channels * visual_patch_size ** 2

        self.visual_patch_size = visual_patch_size
        self.to_visual_embedding = nn.Linear(patch_dim, dim_image)
        self.visual_pos_emb = nn.Embedding(num_patches, dim_image)
        self.visual_transformer = Encoder(dim = dim_image, depth = visual_enc_depth, heads = visual_heads)
        self.to_visual_latent = nn.Linear(dim_image, dim_latent, bias = False)

        self.temperature = nn.Parameter(torch.tensor(1.))
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.hidden_size = 2 * hidden_size  # As we concatinating word vectors and Char
        # vectors
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = Encoder(dim=self.hidden_size,
                           depth=1,
                           heads=3,
                           ff_glu=True,
                           ff_dropout=self.drop_prob,
                           attn_dropout=self.drop_prob,
                           use_scalenorm=True,
                           position_infused_attn=True)

        self.att = layers.TBiDAFAttention(hidden_size=self.hidden_size,
                                          drop_prob=drop_prob)

        self.mod = Encoder(dim=2 * self.hidden_size,
                           depth=3,
                           heads=6,
                           ff_glu=True,
                           ff_dropout=self.drop_prob,
                           attn_dropout=self.drop_prob,
                           use_scalenorm=True,
                           position_infused_attn=True)

        self.out = layers.BiDAFOutput(hidden_size=self.hidden_size,
                                      drop_prob=self.drop_prob)
Exemplo n.º 4
0
 def __init__(self, num_classes = 2,shuff_type="shuf",trans_dim=192):
     super(trans_shufflenet, self).__init__()
     self.trans_dim=trans_dim
     self.input_channel = 3
     self.num_output = num_classes
     param={"class_num":num_classes,
                 "channel_ratio":1}
     self.upsampel=nn.UpsamplingBilinear2d(scale_factor=2)
     # self.conv
     if shuff_type=="shuf":
         self.shufflenet=torchvision.models.shufflenet_v2_x1_0()
         print(shuff_type)
     elif shuff_type=="shuf_se":
         self.shufflenet=ShuffleNetV2SE(param)
         print(shuff_type)
     elif shuff_type=="shuf_k5_liteconv":
         self.shufflenet=ShuffleNetV2K5Lite(param)
         print(shuff_type)
     elif shuff_type=="shuf_liteconv":
         self.shufflenet=ShuffleNetV2LiteConv(param)
         print(shuff_type)
     elif shuff_type=="shuf_k5":
         self.shufflenet=ShuffleNetV2K5(param)
         print(shuff_type)
     elif shuff_type=="shuf_csp":
         self.shufflenet=ShuffleNetV2CSP(param)
         print(shuff_type)
     elif shuff_type=="shuf_sk":
         self.shufflenet=ShuffleNetV2SK(param)
         print(shuff_type)
     self.up1=UP(self.trans_dim,self.trans_dim//2)
     # self.up2=UP(self.trans_dim//2,self.trans_dim//4)
     # self.up3=UP(self.trans_dim//16,self.trans_dim//64)
     # self.up4 = UP(self.trans_dim // 64, self.trans_dim // 256)
     # self.shufflenet=ShuffleNetV2SE()
     self.Vit= ViT(
                     dim = self.trans_dim,
                     image_size = 256,
                     patch_size = 32,
                     num_classes = 2,
                     transformer = Encoder(
                         dim = self.trans_dim,                  # set to be the same as the wrapper
                         depth = 12,
                         heads = 8,
                         ff_glu = True,              # ex. feed forward GLU variant https://arxiv.org/abs/2002.05202
                         residual_attn = True        # ex. residual attention https://arxiv.org/abs/2012.11747
                     )
                 )
    def __init__(self, hidden_size, drop_prob):
        super(BiDAFOutput, self).__init__()
        self.att_linear_1 = nn.Linear(2 * hidden_size, 1)
        self.mod_linear_1 = nn.Linear(2 * hidden_size, 1)

        #         self.rnn = RNNEncoder(input_size=2 * hidden_size,
        #                               hidden_size=hidden_size,
        #                               num_layers=1,
        #                               drop_prob=drop_prob)

        self.rnn = Encoder(dim=2 * hidden_size,
                           depth=1,
                           heads=8,
                           ff_glu=True,
                           ff_dropout=drop_prob,
                           attn_dropout=drop_prob,
                           use_scalenorm=True,
                           position_infused_attn=True)

        self.att_linear_2 = nn.Linear(2 * hidden_size, 1)
        self.mod_linear_2 = nn.Linear(2 * hidden_size, 1)

        self.hidden_size = hidden_size
Exemplo n.º 6
0
    def __init__(self, num_classes=2):
        super(stn_trans_shufflenet, self).__init__()
        self.input_channel = 3
        self.num_output = num_classes
        self.shufflenet = torchvision.models.shufflenet_v2_x1_0(
            pretrained=False, num_classes=num_classes)
        self.Vit = ViT(
            dim=192,
            image_size=args.img_size,
            patch_size=8,
            num_classes=2,
            transformer=Encoder(
                dim=192,  # set to be the same as the wrapper
                depth=12,
                heads=8,
                ff_glu=
                True,  # ex. feed forward GLU variant https://arxiv.org/abs/2002.05202
                residual_attn=
                True  # ex. residual attention https://arxiv.org/abs/2012.11747
            ))
        # 空间变换器定位 - 网络
        self.localization = nn.Sequential(nn.Conv2d(3, 24, kernel_size=7),
                                          nn.MaxPool2d(2, stride=2),
                                          nn.ReLU(True),
                                          nn.Conv2d(24, 36, kernel_size=5),
                                          nn.MaxPool2d(2, stride=2),
                                          nn.ReLU(True))

        # 3 * 2 affine矩阵的回归量
        self.fc_loc = nn.Sequential(nn.Linear(36 * 10 * 10, 32), nn.ReLU(True),
                                    nn.Linear(32, 3 * 2))

        # 使用身份转换初始化权重/偏差
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(
            torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
    def __init__(self,
                 model_train='tf_efficientnetv2_b0',
                 num_classes=3,
                 diffaug_activate=False,
                 policy='color,translation',
                 aug=None):
        super().__init__()

        #############################################
        if model_train == 'efficientnet-b0':
            self.netD = EfficientNet.from_pretrained('efficientnet-b0',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b1':
            self.netD = EfficientNet.from_pretrained('efficientnet-b1',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b2':
            self.netD = EfficientNet.from_pretrained('efficientnet-b2',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b3':
            self.netD = EfficientNet.from_pretrained('efficientnet-b3',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b4':
            self.netD = EfficientNet.from_pretrained('efficientnet-b4',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b5':
            self.netD = EfficientNet.from_pretrained('efficientnet-b5',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b6':
            self.netD = EfficientNet.from_pretrained('efficientnet-b6',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b7':
            self.netD = EfficientNet.from_pretrained('efficientnet-b7',
                                                     num_classes=num_classes)

        elif model_train == 'mobilenetv3_small':
            from arch.mobilenetv3_arch import MobileNetV3
            self.netD = MobileNetV3(n_class=num_classes,
                                    mode='small',
                                    input_size=256)
        elif model_train == 'mobilenetv3_large':
            from arch.mobilenetv3_arch import MobileNetV3
            self.netD = MobileNetV3(n_class=num_classes,
                                    mode='large',
                                    input_size=256)

        elif model_train == 'resnet50':
            from arch.resnet_arch import resnet50
            self.netD = resnet50(num_classes=num_classes,
                                 pretrain=cfg['pretrain'])
        elif model_train == 'resnet101':
            from arch.resnet_arch import resnet101
            self.netD = resnet101(num_classes=num_classes,
                                  pretrain=cfg['pretrain'])
        elif model_train == 'resnet152':
            from arch.resnet_arch import resnet152
            self.netD = resnet152(num_classes=num_classes,
                                  pretrain=cfg['pretrain'])

        #############################################
        elif model_train == 'ViT':
            from vit_pytorch import ViT
            self.netD = ViT(image_size=256,
                            patch_size=32,
                            num_classes=num_classes,
                            dim=1024,
                            depth=6,
                            heads=16,
                            mlp_dim=2048,
                            dropout=0.1,
                            emb_dropout=0.1)

        elif model_train == 'DeepViT':
            from vit_pytorch.deepvit import DeepViT
            self.netD = DeepViT(image_size=256,
                                patch_size=32,
                                num_classes=num_classes,
                                dim=1024,
                                depth=6,
                                heads=16,
                                mlp_dim=2048,
                                dropout=0.1,
                                emb_dropout=0.1)

        #############################################

        elif model_train == 'RepVGG-A0':
            from arch.RepVGG_arch import create_RepVGG_A0
            self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-A1':
            from arch.RepVGG_arch import create_RepVGG_A1
            self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-A2':
            from arch.RepVGG_arch import create_RepVGG_A2
            self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B0':
            from arch.RepVGG_arch import create_RepVGG_B0
            self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B1':
            from arch.RepVGG_arch import create_RepVGG_B1
            self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B1g2':
            from arch.RepVGG_arch import create_RepVGG_B1g2
            self.netD = create_RepVGG_B1g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B1g4':
            from arch.RepVGG_arch import create_RepVGG_B1g4
            self.netD = create_RepVGG_B1g4(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B2':
            from arch.RepVGG_arch import create_RepVGG_B2
            self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B2g2':
            from arch.RepVGG_arch import create_RepVGG_B2g2
            self.netD = create_RepVGG_B2g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B2g4':
            from arch.RepVGG_arch import create_RepVGG_B2g4
            self.netD = create_RepVGG_B2g4(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B3':
            from arch.RepVGG_arch import create_RepVGG_B3
            self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B3g2':
            from arch.RepVGG_arch import create_RepVGG_B3g2
            self.netD = create_RepVGG_B3g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B3g4':
            from arch.RepVGG_arch import create_RepVGG_B3g4
            self.netD = create_RepVGG_B3g4(deploy=False,
                                           num_classes=num_classes)

        #############################################

        elif model_train == 'squeezenet_1_0':
            from arch.squeezenet_arch import SqueezeNet
            self.netD = SqueezeNet(num_classes=num_classes, version='1_0')

        elif model_train == 'squeezenet_1_1':
            from arch.squeezenet_arch import SqueezeNet
            self.netD = SqueezeNet(num_classes=num_classes, version='1_1')
        #############################################
        elif model_train == 'vgg11':
            from arch.vgg_arch import create_vgg11
            self.netD = create_vgg11(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg13':
            from arch.vgg_arch import create_vgg13
            self.netD = create_vgg13(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg16':
            from arch.vgg_arch import create_vgg16
            self.netD = create_vgg16(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg19':
            from arch.vgg_arch import create_vgg19
            self.netD = create_vgg19(num_classes, pretrained=cfg['pretrain'])

        #############################################
        elif model_train == 'SwinTransformer':
            from swin_transformer_pytorch import SwinTransformer

            self.netD = SwinTransformer(hidden_dim=96,
                                        layers=(2, 2, 6, 2),
                                        heads=(3, 6, 12, 24),
                                        channels=3,
                                        num_classes=num_classes,
                                        head_dim=32,
                                        window_size=8,
                                        downscaling_factors=(4, 2, 2, 2),
                                        relative_pos_embedding=True)

        elif model_train == 'effV2':
            if cfg['size'] == "s":
                from arch.efficientnetV2_arch import effnetv2_s
                self.netD = effnetv2_s(num_classes=num_classes)
            elif cfg['size'] == "m":
                from arch.efficientnetV2_arch import effnetv2_m
                self.netD = effnetv2_m(num_classes=num_classes)
            elif cfg['size'] == "l":
                from arch.efficientnetV2_arch import effnetv2_l
                self.netD = effnetv2_l(num_classes=num_classes)
            elif cfg['size'] == "xl":
                from arch.efficientnetV2_arch import effnetv2_xl
                self.netD = effnetv2_xl(num_classes=num_classes)

        elif model_train == 'x_transformers':
            from x_transformers import ViTransformerWrapper, Encoder
            self.netD = ViTransformerWrapper(image_size=cfg['image_size'],
                                             patch_size=cfg['patch_size'],
                                             num_classes=num_classes,
                                             attn_layers=Encoder(
                                                 dim=cfg['dim'],
                                                 depth=cfg['depth'],
                                                 heads=cfg['heads'],
                                             ))

        elif model_train == 'mobilevit':
            if cfg['model_size'] == "xxs":
                from arch.mobilevit_arch import mobilevit_xxs
                self.netD = mobilevit_xxs(num_classes=num_classes)
            elif cfg['model_size'] == "xs":
                from arch.mobilevit_arch import mobilevit_xs
                self.netD = mobilevit_xs(num_classes=num_classes)
            elif cfg['model_size'] == "x":
                from arch.mobilevit_arch import mobilevit_s
                self.netD = mobilevit_s(num_classes=num_classes)

        elif model_train == 'hrt':
            from arch.hrt_arch import HighResolutionTransformer
            self.netD = HighResolutionTransformer(num_classes)

        elif model_train == 'volo':
            if cfg['model_size'] == "volo_d1":
                from arch.volo_arch import volo_d1
                self.netD = volo_d1(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d2":
                from arch.volo_arch import volo_d2
                self.netD = volo_d2(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d3":
                from arch.volo_arch import volo_d3
                self.netD = volo_d3(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d4":
                from arch.volo_arch import volo_d4
                self.netD = volo_d4(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d5":
                from arch.volo_arch import volo_d5
                self.netD = volo_d5(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)

        elif model_train == 'pvt_v2':
            if cfg['model_size'] == "pvt_v2_b0":
                from arch.pvt_v2_arch import pvt_v2_b0
                self.netD = pvt_v2_b0(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b1":
                from arch.pvt_v2_arch import pvt_v2_b1
                self.netD = pvt_v2_b1(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b2":
                from arch.pvt_v2_arch import pvt_v2_b2
                self.netD = pvt_v2_b2(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b3":
                from arch.pvt_v2_arch import pvt_v2_b3
                self.netD = pvt_v2_b3(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b4":
                from arch.pvt_v2_arch import pvt_v2_b4
                self.netD = pvt_v2_b4(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b5":
                from arch.pvt_v2_arch import pvt_v2_b5
                self.netD = pvt_v2_b5(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b2_li":
                from arch.pvt_v2_arch import pvt_v2_b2_li
                self.netD = pvt_v2_b2_li(pretrained=cfg['pretrain'],
                                         num_classes=num_classes)

        elif model_train == 'ConvMLP':
            if cfg['model_size'] == "convmlp_s":
                from arch.ConvMLP_arch import convmlp_s
                self.netD = convmlp_s(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "convmlp_m":
                from arch.ConvMLP_arch import convmlp_m
                self.netD = convmlp_m(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "convmlp_l":
                from arch.ConvMLP_arch import convmlp_l
                self.netD = convmlp_l(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)

        elif model_train == 'FocalTransformer':
            from arch.focal_transformer_arch import FocalTransformer
            self.netD = FocalTransformer(num_classes=num_classes)

        elif model_train == 'mobile_former':
            from arch.mobile_former_arch import MobileFormer, config_52, config_294, config_508
            if cfg['model_size'] == "config_52":
                self.netD = MobileFormer(config_52)
            elif cfg['model_size'] == "config_294":
                self.netD = MobileFormer(config_294)
            elif cfg['model_size'] == "config_508":
                self.netD = MobileFormer(config_508)

        elif model_train == 'poolformer':
            if cfg['model_size'] == "poolformer_s12":
                from arch.poolformer_arch import poolformer_s12
                self.netD = poolformer_s12(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_s24":
                from arch.poolformer_arch import poolformer_s24
                self.netD = poolformer_s24(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_s36":
                from arch.poolformer_arch import poolformer_s36
                self.netD = poolformer_s36(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_m36":
                from arch.poolformer_arch import poolformer_m36
                self.netD = poolformer_m36(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_m48":
                from arch.poolformer_arch import poolformer_m48
                self.netD = poolformer_m48(pretrained=True,
                                           num_classes=num_classes)

        elif model_train == 'timm':
            import timm
            self.netD = timm.create_model(cfg['model_choise'],
                                          num_classes=num_classes,
                                          pretrained=True)

        #weights_init(self.netD, 'kaiming') #only use this if there is no pretrain

        if aug == 'gridmix':
            from GridMixupLoss import GridMixupLoss
            self.criterion = GridMixupLoss(alpha=(0.4, 0.7),
                                           hole_aspect_ratio=1.,
                                           crop_area_ratio=(0.5, 1),
                                           crop_aspect_ratio=(0.5, 2),
                                           n_holes_x=(2, 6))
        elif aug == 'cutmix':
            from cutmix import cutmix
            self.criterion = cutmix(alpha=(0.4, 0.7),
                                    hole_aspect_ratio=1.,
                                    crop_area_ratio=(0.5, 1),
                                    crop_aspect_ratio=(0.5, 2),
                                    n_holes_x=(2, 6))

        self.aug = aug

        if cfg['loss'] == 'CenterLoss':
            from centerloss import CenterLoss
            self.criterion = CenterLoss(num_classes=num_classes,
                                        feat_dim=2,
                                        use_gpu=True)
        elif cfg['loss'] == 'normal':
            self.criterion = torch.nn.CrossEntropyLoss()

        self.accuracy = []
        self.losses = []
        self.diffaug_activate = diffaug_activate
        self.accuracy_val = []
        self.losses_val = []

        self.policy = policy
        self.iter_check = 0

        if cfg['aug'] == 'MuAugment':
            rand_augment = BatchRandAugment(N_TFMS=3,
                                            MAGN=3,
                                            mean=cfg['means'],
                                            std=cfg['std'])
            self.mu_transform = MuAugment(rand_augment,
                                          N_COMPS=4,
                                          N_SELECTED=2)