def __init__(self, **kwargs): super().__init__() self.contains_unknown = kwargs["contains_unknown"] if self.contains_unknown: self.classifier = ViT( image_size = 300, patch_size = 1, num_classes = 3, dim = 128, depth = 12, heads = 8, channels = 1, mlp_dim = 256, dropout = 0.1, emb_dropout = 0.1 ) else: self.classifier = ViT( image_size = 300, patch_size = 1, num_classes = 2, dim = 128, depth = 12, heads = 8, channels = 1, mlp_dim = 256, dropout = 0.1, emb_dropout = 0.1 )
class VisualTrans(nn.Module): def __init__(self, file_path): super(VisualTrans, self).__init__() self.file_path = file_path self.model = ViT_modified( n_classes=1, image_size=(1, 962), # image size is a tuple of (height, width) patch_size=(1, 13), # patch size is a tuple of (height, width) dim=16, depth=3, heads=16, mlp_dim=512, dropout=0.1, emb_dropout=0.1) state_dict = torch.load(self.file_path, map_location='cpu') new_state_dict = OrderedDict() try: self.model.load_state_dict(state_dict) except RuntimeError as e: print('Ignoring test_dataset_size "' + str(e) + '"') def forward(self, inpt): theta, x = inpt theta = theta.unsqueeze_(1).unsqueeze_(1) x = x.unsqueeze_(1).unsqueeze_(1) x = torch.nn.functional.pad(x, (0, 2)) inp = torch.cat((theta, x), 3) out = self.model(inp)[0] #another [0]- when the n=2 return out
def __init__(self, face_recognition_cnn_path=None): super(Encoder2DViT, self).__init__() face_cnn = FaceRecognitionCNN() if face_recognition_cnn_path is not None: face_cnn = nn.DataParallel(face_cnn) state_dict = torch.load(face_recognition_cnn_path, map_location='cpu') face_cnn.load_state_dict(state_dict) if face_recognition_cnn_path: modules = list(face_cnn.module.resnet.children())[:-12] self.encoder2d = nn.Sequential(*modules) else: modules = list(face_cnn.resnet.children())[:-12] self.encoder2d = nn.Sequential(*modules) del face_cnn self.vit = ViT(image_size=IMG_SIZE, patch_size=PATCH_SIZE, num_classes=5, dim=DIM, depth=DEPTH, heads=HEADS, mlp_dim=MLP_DIM, dropout=0.1, emb_dropout=0.1, channels=CHANNELS)
def __init__(self): super(Encoder, self).__init__() self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) self.pool1 = nn.MaxPool2d(2) self.relu1 = nn.LeakyReLU() self.conv2 = nn.Conv2d(96, 128, kernel_size=3, stride=1, padding=1) self.pool2 = nn.MaxPool2d(2) self.relu2 = nn.LeakyReLU() self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) self.pool3 = nn.MaxPool2d(2) self.relu3 = nn.LeakyReLU() ''' self.conv4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.pool4 = nn.MaxPool2d(2) self.relu4 = nn.LeakyReLU() self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.pool5 = nn.MaxPool2d(2) self.relu5 = nn.LeakyReLU() self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.pool6 = nn.MaxPool2d(2) self.relu6 = nn.LeakyReLU() self.fc1 = nn.Linear(1024, 1024) self.relu7 = nn.LeakyReLU() ''' self.conv8 = nn.Conv2d(256, 3, kernel_size=3, stride=1, padding=1) self.v1 = ViT(image_size=128, patch_size=32, num_classes=768, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) self.v2 = ViT(image_size=16, patch_size=4, num_classes=64, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1)
def __init__(self): super(Model, self).__init__() v = ViT(image_size=16, patch_size=16, num_classes=16**2, dim=256, depth=1, heads=1, mlp_dim=256) self.transformer = v self.head = torch.nn.Linear(256, 3 * 32 * 32)
def __init__(self, num_classes): super(vit, self).__init__() self.Vtrans = ViT(image_size=224, patch_size=32, num_classes=12, dim=1024, depth=8, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1, channels=3)
def test(): v = ViT( image_size = 256, patch_size = 32, num_classes = 1000, dim = 1024, depth = 6, heads = 16, mlp_dim = 2048, dropout = 0.1, emb_dropout = 0.1 ) img = torch.randn(1, 3, 256, 256) preds = v(img) assert preds.shape == (1, 1000), 'correct logits outputted'
def Objective(trial): dim = trial.suggest_categorical('dim', [32, 64, 128]) #patch_size = trial.suggest_int('patch_size',7, 14, 7) patch_size = 7 depth = trial.suggest_categorical('depth', [8, 16, 32]) heads = trial.suggest_categorical('heads', [8, 16, 32]) mlp_dim = trial.suggest_categorical('mlp_dim', [128, 512, 1024]) optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop"]) lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) print('dim:', dim, 'mlp_dim:', mlp_dim, 'depth:', depth, 'heads:', heads) model = ViT( dim=dim, image_size=28, patch_size=patch_size, num_classes=10, depth=depth, # number of transformer blocks heads=heads, # number of multi-channel attention mlp_dim=mlp_dim, channels=1, #dropout=0.2, ) # vanila cnn : 0.96 # model = Net() model.to(device) criterion = nn.CrossEntropyLoss() # optimizer #optimizer = optim.Adam(model.parameters(), lr=0.001) optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr) # scheduler scheduler = StepLR(optimizer, step_size=1, gamma=gamma) for epoch in range(1, epochs + 1): train(model, criterion, device, train_loader, optimizer, epoch) val_acc = test(model, device, test_loader) scheduler.step() if 0: torch.save(model.state_dict(), "mnist_cnn.pt") trial.report(val_acc, epoch) # Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.exceptions.TrialPruned() wandb.log({'val_acc': val_acc}) return val_acc
def __init__(self, image_size, patch_size, num_classes, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1): super(BaseVit, self).__init__() self.vit = ViT(image_size=image_size, patch_size=patch_size, num_classes=num_classes, dim=dim, depth=depth, heads=heads, mlp_dim=mlp_dim, dropout=dropout, emb_dropout=emb_dropout)
def __init__(self, width: int, height: int, action_dim: int, motion_blur: int = 4): super(PolicyModelVIT, self).__init__() self.width = width self.height = height self.motion_blur = motion_blur self.image_encoder = ViT(image_size=64, patch_size=8, num_classes=action_dim, dim=128, depth=2, channels=4, heads=3, mlp_dim=256, dropout=0, emb_dropout=0)
def __init__(self, file_path): super(VisualTrans, self).__init__() self.file_path = file_path self.model = ViT_modified( n_classes=1, image_size=(1, 962), # image size is a tuple of (height, width) patch_size=(1, 13), # patch size is a tuple of (height, width) dim=16, depth=3, heads=16, mlp_dim=512, dropout=0.1, emb_dropout=0.1) state_dict = torch.load(self.file_path, map_location='cpu') new_state_dict = OrderedDict() try: self.model.load_state_dict(state_dict) except RuntimeError as e: print('Ignoring test_dataset_size "' + str(e) + '"')
# dim=768, ## seq_len=49+1, # 7x7 patches + 1 cls-token # seq_len=4+1, # 2x2 patches + 1 cls-token ## depth=1, # depth=12, ## heads=8, # heads=12, # k=256 #) # ViT-B model = ViT( dim=768, depth=12, heads=12, mlp_dim=3072, image_size=128, patch_size=16, num_classes=8, channels=3, ).to(device) ## ViT-L #model = ViT( # dim = 1024, # depth = 24, # heads = 16, # mlp_dim = 4096, # image_size=128, # patch_size=16, # num_classes=8, # channels=3,
def __init__(self, model_train='tf_efficientnetv2_b0', num_classes=3, diffaug_activate=False, policy='color,translation', aug=None): super().__init__() ############################################# if model_train == 'efficientnet-b0': self.netD = EfficientNet.from_pretrained('efficientnet-b0', num_classes=num_classes) elif model_train == 'efficientnet-b1': self.netD = EfficientNet.from_pretrained('efficientnet-b1', num_classes=num_classes) elif model_train == 'efficientnet-b2': self.netD = EfficientNet.from_pretrained('efficientnet-b2', num_classes=num_classes) elif model_train == 'efficientnet-b3': self.netD = EfficientNet.from_pretrained('efficientnet-b3', num_classes=num_classes) elif model_train == 'efficientnet-b4': self.netD = EfficientNet.from_pretrained('efficientnet-b4', num_classes=num_classes) elif model_train == 'efficientnet-b5': self.netD = EfficientNet.from_pretrained('efficientnet-b5', num_classes=num_classes) elif model_train == 'efficientnet-b6': self.netD = EfficientNet.from_pretrained('efficientnet-b6', num_classes=num_classes) elif model_train == 'efficientnet-b7': self.netD = EfficientNet.from_pretrained('efficientnet-b7', num_classes=num_classes) elif model_train == 'mobilenetv3_small': from arch.mobilenetv3_arch import MobileNetV3 self.netD = MobileNetV3(n_class=num_classes, mode='small', input_size=256) elif model_train == 'mobilenetv3_large': from arch.mobilenetv3_arch import MobileNetV3 self.netD = MobileNetV3(n_class=num_classes, mode='large', input_size=256) elif model_train == 'resnet50': from arch.resnet_arch import resnet50 self.netD = resnet50(num_classes=num_classes, pretrain=cfg['pretrain']) elif model_train == 'resnet101': from arch.resnet_arch import resnet101 self.netD = resnet101(num_classes=num_classes, pretrain=cfg['pretrain']) elif model_train == 'resnet152': from arch.resnet_arch import resnet152 self.netD = resnet152(num_classes=num_classes, pretrain=cfg['pretrain']) ############################################# elif model_train == 'ViT': from vit_pytorch import ViT self.netD = ViT(image_size=256, patch_size=32, num_classes=num_classes, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) elif model_train == 'DeepViT': from vit_pytorch.deepvit import DeepViT self.netD = DeepViT(image_size=256, patch_size=32, num_classes=num_classes, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) ############################################# elif model_train == 'RepVGG-A0': from arch.RepVGG_arch import create_RepVGG_A0 self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A1': from arch.RepVGG_arch import create_RepVGG_A1 self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A2': from arch.RepVGG_arch import create_RepVGG_A2 self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B0': from arch.RepVGG_arch import create_RepVGG_B0 self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1': from arch.RepVGG_arch import create_RepVGG_B1 self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g2': from arch.RepVGG_arch import create_RepVGG_B1g2 self.netD = create_RepVGG_B1g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g4': from arch.RepVGG_arch import create_RepVGG_B1g4 self.netD = create_RepVGG_B1g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2': from arch.RepVGG_arch import create_RepVGG_B2 self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g2': from arch.RepVGG_arch import create_RepVGG_B2g2 self.netD = create_RepVGG_B2g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g4': from arch.RepVGG_arch import create_RepVGG_B2g4 self.netD = create_RepVGG_B2g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3': from arch.RepVGG_arch import create_RepVGG_B3 self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g2': from arch.RepVGG_arch import create_RepVGG_B3g2 self.netD = create_RepVGG_B3g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g4': from arch.RepVGG_arch import create_RepVGG_B3g4 self.netD = create_RepVGG_B3g4(deploy=False, num_classes=num_classes) ############################################# elif model_train == 'squeezenet_1_0': from arch.squeezenet_arch import SqueezeNet self.netD = SqueezeNet(num_classes=num_classes, version='1_0') elif model_train == 'squeezenet_1_1': from arch.squeezenet_arch import SqueezeNet self.netD = SqueezeNet(num_classes=num_classes, version='1_1') ############################################# elif model_train == 'vgg11': from arch.vgg_arch import create_vgg11 self.netD = create_vgg11(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg13': from arch.vgg_arch import create_vgg13 self.netD = create_vgg13(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg16': from arch.vgg_arch import create_vgg16 self.netD = create_vgg16(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg19': from arch.vgg_arch import create_vgg19 self.netD = create_vgg19(num_classes, pretrained=cfg['pretrain']) ############################################# elif model_train == 'SwinTransformer': from swin_transformer_pytorch import SwinTransformer self.netD = SwinTransformer(hidden_dim=96, layers=(2, 2, 6, 2), heads=(3, 6, 12, 24), channels=3, num_classes=num_classes, head_dim=32, window_size=8, downscaling_factors=(4, 2, 2, 2), relative_pos_embedding=True) elif model_train == 'effV2': if cfg['size'] == "s": from arch.efficientnetV2_arch import effnetv2_s self.netD = effnetv2_s(num_classes=num_classes) elif cfg['size'] == "m": from arch.efficientnetV2_arch import effnetv2_m self.netD = effnetv2_m(num_classes=num_classes) elif cfg['size'] == "l": from arch.efficientnetV2_arch import effnetv2_l self.netD = effnetv2_l(num_classes=num_classes) elif cfg['size'] == "xl": from arch.efficientnetV2_arch import effnetv2_xl self.netD = effnetv2_xl(num_classes=num_classes) elif model_train == 'x_transformers': from x_transformers import ViTransformerWrapper, Encoder self.netD = ViTransformerWrapper(image_size=cfg['image_size'], patch_size=cfg['patch_size'], num_classes=num_classes, attn_layers=Encoder( dim=cfg['dim'], depth=cfg['depth'], heads=cfg['heads'], )) elif model_train == 'mobilevit': if cfg['model_size'] == "xxs": from arch.mobilevit_arch import mobilevit_xxs self.netD = mobilevit_xxs(num_classes=num_classes) elif cfg['model_size'] == "xs": from arch.mobilevit_arch import mobilevit_xs self.netD = mobilevit_xs(num_classes=num_classes) elif cfg['model_size'] == "x": from arch.mobilevit_arch import mobilevit_s self.netD = mobilevit_s(num_classes=num_classes) elif model_train == 'hrt': from arch.hrt_arch import HighResolutionTransformer self.netD = HighResolutionTransformer(num_classes) elif model_train == 'volo': if cfg['model_size'] == "volo_d1": from arch.volo_arch import volo_d1 self.netD = volo_d1(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d2": from arch.volo_arch import volo_d2 self.netD = volo_d2(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d3": from arch.volo_arch import volo_d3 self.netD = volo_d3(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d4": from arch.volo_arch import volo_d4 self.netD = volo_d4(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d5": from arch.volo_arch import volo_d5 self.netD = volo_d5(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'pvt_v2': if cfg['model_size'] == "pvt_v2_b0": from arch.pvt_v2_arch import pvt_v2_b0 self.netD = pvt_v2_b0(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b1": from arch.pvt_v2_arch import pvt_v2_b1 self.netD = pvt_v2_b1(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b2": from arch.pvt_v2_arch import pvt_v2_b2 self.netD = pvt_v2_b2(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b3": from arch.pvt_v2_arch import pvt_v2_b3 self.netD = pvt_v2_b3(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b4": from arch.pvt_v2_arch import pvt_v2_b4 self.netD = pvt_v2_b4(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b5": from arch.pvt_v2_arch import pvt_v2_b5 self.netD = pvt_v2_b5(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b2_li": from arch.pvt_v2_arch import pvt_v2_b2_li self.netD = pvt_v2_b2_li(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'ConvMLP': if cfg['model_size'] == "convmlp_s": from arch.ConvMLP_arch import convmlp_s self.netD = convmlp_s(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "convmlp_m": from arch.ConvMLP_arch import convmlp_m self.netD = convmlp_m(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "convmlp_l": from arch.ConvMLP_arch import convmlp_l self.netD = convmlp_l(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'FocalTransformer': from arch.focal_transformer_arch import FocalTransformer self.netD = FocalTransformer(num_classes=num_classes) elif model_train == 'mobile_former': from arch.mobile_former_arch import MobileFormer, config_52, config_294, config_508 if cfg['model_size'] == "config_52": self.netD = MobileFormer(config_52) elif cfg['model_size'] == "config_294": self.netD = MobileFormer(config_294) elif cfg['model_size'] == "config_508": self.netD = MobileFormer(config_508) elif model_train == 'poolformer': if cfg['model_size'] == "poolformer_s12": from arch.poolformer_arch import poolformer_s12 self.netD = poolformer_s12(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_s24": from arch.poolformer_arch import poolformer_s24 self.netD = poolformer_s24(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_s36": from arch.poolformer_arch import poolformer_s36 self.netD = poolformer_s36(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_m36": from arch.poolformer_arch import poolformer_m36 self.netD = poolformer_m36(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_m48": from arch.poolformer_arch import poolformer_m48 self.netD = poolformer_m48(pretrained=True, num_classes=num_classes) elif model_train == 'timm': import timm self.netD = timm.create_model(cfg['model_choise'], num_classes=num_classes, pretrained=True) #weights_init(self.netD, 'kaiming') #only use this if there is no pretrain if aug == 'gridmix': from GridMixupLoss import GridMixupLoss self.criterion = GridMixupLoss(alpha=(0.4, 0.7), hole_aspect_ratio=1., crop_area_ratio=(0.5, 1), crop_aspect_ratio=(0.5, 2), n_holes_x=(2, 6)) elif aug == 'cutmix': from cutmix import cutmix self.criterion = cutmix(alpha=(0.4, 0.7), hole_aspect_ratio=1., crop_area_ratio=(0.5, 1), crop_aspect_ratio=(0.5, 2), n_holes_x=(2, 6)) self.aug = aug if cfg['loss'] == 'CenterLoss': from centerloss import CenterLoss self.criterion = CenterLoss(num_classes=num_classes, feat_dim=2, use_gpu=True) elif cfg['loss'] == 'normal': self.criterion = torch.nn.CrossEntropyLoss() self.accuracy = [] self.losses = [] self.diffaug_activate = diffaug_activate self.accuracy_val = [] self.losses_val = [] self.policy = policy self.iter_check = 0 if cfg['aug'] == 'MuAugment': rand_augment = BatchRandAugment(N_TFMS=3, MAGN=3, mean=cfg['means'], std=cfg['std']) self.mu_transform = MuAugment(rand_augment, N_COMPS=4, N_SELECTED=2)
import torch from vit_pytorch import ViT v = ViT(image_size=256, patch_size=32, num_classes=7, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) img = torch.randn(1, 3, 256, 256) preds = v(img) # (1, 1000) print(preds)
def main(): parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--image', default='dataset/test/0_000.png', help='image to be classified') parser.add_argument('--lr', type=float, default=1e-3, metavar='S', help='learning rate (default: 1e-3)') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--layer-num', type=int, default=0, metavar='N', help='which layer to visualize (default: 0)') parser.add_argument( '--feature-num', type=int, default=0, metavar='N', help='which feature of a layer to visualize (default: 0)') parser.add_argument('--train', action='store_true', default=False, help='train the model (default: False)') parser.add_argument('--save-model', action='store_true', default=False, help='save the current model (default: False)') parser.add_argument('--restore-model', default=None, help='restore & eval this model file (default: False)') parser.add_argument('--normalize', action='store_true', default=False, help='normalize input dataset (default: False)') parser.add_argument( '--cnn', action='store_true', default=False, help='use cnn model instead of transformer (default: False)') parser.add_argument('--visualize', action='store_true', default=False, help='plot kernel and feature maps (default: False)') args = parser.parse_args() use_cuda = torch.cuda.is_available() kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} if args.normalize: transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) else: transform = transforms.Compose([transforms.ToTensor()]) x_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform) x_test = datasets.MNIST(root='./data', train=False, download=True, transform=transform) DataLoader = torch.utils.data.DataLoader train_loader = DataLoader(x_train, shuffle=True, batch_size=args.batch_size, **kwargs) test_loader = DataLoader(x_test, shuffle=False, batch_size=args.batch_size, **kwargs) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") if args.cnn: model = CNNModel().to(device) else: model = ViT( image_size=28, patch_size=14, num_classes=10, dim=128, depth=6, heads=8, mlp_dim=128, channels=1, ).to(device) if torch.cuda.device_count() > 1: print("Available GPUs:", torch.cuda.device_count()) model = nn.DataParallel(model) print("Model:", model) print("Device:", device) optimizer = optim.Adam(model.parameters()) start_time = datetime.datetime.now() best_top1 = 0 best_top5 = 0 if args.restore_model is not None: model.load_state_dict(torch.load(args.restore_model)) best_top1, best_top5 = test(args, model, device, test_loader) print("Best Top 1: %0.2f%%, Top 5: %0.2f%%" % (best_top1, best_top5)) if args.train: for epoch in range(1, args.epochs + 1): top1, top5 = train(args, model, device, train_loader, test_loader, optimizer, epoch) if top1 > best_top1: print("New best Top 1: %0.2f%%, Top 5: %0.2f%%" % (top1, top5)) best_top1 = top1 best_top5 = top5 if args.save_model: filename = "cnn-mnist.pth" if args.cnn else "transformer-mnist.pth" torch.save(model.state_dict(), filename) print("Saving best model on file: ", filename) print("Best Top 1: %0.2f%%, Top 5: %0.2f%% in %d epochs" % (best_top1, best_top5, args.epochs)) elapsed_time = datetime.datetime.now() - start_time print("Elapsed time (train): %s" % elapsed_time) if args.visualize: viz_features(args, model)
import numpy as np import matplotlib.pyplot as plt batch_size = 256 # folder num # image_num = batch_size * 32 num_workers = 8 # False epoch = 1000 dir_name = t.strftime('~%Y%m%d~%H%M%S', t.localtime(t.time())) log_train = './log/' + dir_name + '/train' writer = SummaryWriter(log_train) v = ViT( image_size=128, # 256 patch_size=8, # 32 num_classes=2, dim=512, # 1024 depth=2, heads=4, mlp_dim=2048, channels=1, dropout=0.3, emb_dropout=0.3) # small layers # pytorch_total_params = sum(p.numel() for p in v.parameters()) # print(pytorch_total_params) # pass ##### Hyperparams ##### # bce = nn.BCELoss() criterion = nn.CrossEntropyLoss() sigmoid = nn.Sigmoid() opt = torch.optim.Adam(v.parameters(), lr=3e-4)
model_name = args.model.lower() if model_name == "resnet20": model = resnet.resnet20() elif model_name == "resnet32": model = resnet.resnet32() elif model_name == "resnet44": model = resnet.resnet44() elif model_name == "resnet56": model = resnet.resnet56() elif model_name == "resnet110": model = resnet.resnet110() elif model_name == "vit": #hidden=256,very BAD!!! #lr=0.001 nearly same model = ViT(image_size = 32,patch_size = 4,num_classes = nClass,dim = 21,depth = 6,heads = 3,ff_hidden = 128,dropout = 0,emb_dropout = 0.1) # model = ImageTransformer(image_size=32, patch_size=4, num_classes=nClass, channels=3,dim=64, depth=6, heads=8, mlp_dim=128) # # model = ViT(image_size = 256,patch_size = 32,num_classes = 1000,dim = 1024,depth = 6,eads = 16,mlp_dim = 2048,dropout = 0.1,emb_dropout = 0.1) #24 overfit elif model_name == "distiller": teacher = resnet50(pretrained = True) teacher.cuda() model = DistillableViT(image_size = 32,patch_size = 4,num_classes = nClass,dim = 64,depth = 6,heads = 8,mlp_dim = 128,dropout = 0.1,emb_dropout = 0.1) distiller = DistillWrapper(student = model,teacher = teacher,temperature = 3,alpha = 0.5) elif model_name == "lamlay": args.batch_size = 128; args.weight_decay=0.0001 # model = lambda_resnet26() model = LambdaResNet18() args.log_dir=f"./logs/lamlay/" elif model_name == "jaggi": VoT_config['use_attention'] = config.self_attention
def main(): parser = argparse.ArgumentParser(description='ViT') parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats') parser.add_argument('--dataset', default='dvsc') parser.add_argument('--resume', default='dvsc-sgd-regularmodel_last.pth') parser.add_argument('--set', default = 'test') parser.add_argument('--mode', default='regular') parser.add_argument('--batch', default=8) parser.add_argument('--cuda', default=True) args = parser.parse_args() os.system('mkdir -p weights') dataset = {'smnist': SMNIST, 'dvsc': DVSC} if args.dataset == 'smnist': image_size = 60 patch_size = 10 num_classes = 10 samp = 6 elif args.dataset == 'dvsc': image_size = 384 patch_size = 32 num_classes = 2 samp = 12 if args.mode == 'normal': model = ViT( image_size = image_size, patch_size = patch_size, num_classes = num_classes, dim = 512, depth = 4, heads = 8, mlp_dim = 512, dropout = 0.1, emb_dropout = 0.1 ) else : model = ViT_sphere( image_size = image_size, patch_size = patch_size, num_classes = num_classes, dim = 512, depth = 4, heads = 8, mlp_dim = 512, base_order = 1, mode = args.mode, # face, vertex and regular samp = samp, dropout = 0.1, emb_dropout = 0.1 ) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Trainable parameters", params) path = 'weights/' model = load_model(model, os.path.join(path, args.resume)) cuda = args.cuda batch = args.batch test_data = dataset[args.dataset](args.data_dir, args.set, image_size, image_size, None) test_loader = DataLoader(dataset=test_data, batch_size=batch, shuffle=False) if cuda: model = model.cuda() model.eval() P=np.array([]) T=np.array([]) #df = pd.read_csv("dvsc.csv") for i, data in enumerate(tqdm(test_loader)): img, target = data if cuda: img = img.cuda() target = target.cuda() preds = model(img) probabilities = torch.nn.functional.softmax(preds, dim=1) preds = torch.argmax(probabilities, dim =1) P = np.concatenate([P,preds.cpu().numpy()]) T = np.concatenate([T,target.cpu().numpy()]) confusion = confusion_matrix(P, T) #df['pred_class'] = P #df.to_csv('dvsc_p_regular.csv') print('Confusion Matrix\n') print(confusion) print('\nClassification Report\n') print(classification_report(P, T, target_names=test_data.category))
gamma = 0.7 seed = 42 set_random_seeds(seed) efficient_transformer = Linformer( dim=128, seq_len=49 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) ### change channels=6 for 6 input plans #### l_model = ViT(dim=128, image_size=224, patch_size=32, num_classes=2, channels=6, transformer=efficient_transformer) ### change the class __init__ function to have more plans ### v_model = ViT(image_size=256, patch_size=32, num_classes=1000, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) x = torch.randn(1, 6, 224, 224) # can be any channels
def to_vit(self): v = ViT(*self.args, **self.kwargs) v.load_state_dict(self.state_dict()) return v
def main(): # options parser = argparse.ArgumentParser() parser.add_argument('--model_train', type=str, required=True) parser.add_argument('--model_path', type=str, required=True) parser.add_argument('--num_classes', type=int, required=True) parser.add_argument('--output_path', type=str, required=True) args = parser.parse_args() if args.model_train == 'efficientnet-b0': netD = EfficientNet.from_pretrained('efficientnet-b0', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b1': netD = EfficientNet.from_pretrained('efficientnet-b1', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b2': netD = EfficientNet.from_pretrained('efficientnet-b2', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b3': netD = EfficientNet.from_pretrained('efficientnet-b3', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b4': netD = EfficientNet.from_pretrained('efficientnet-b4', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b5': netD = EfficientNet.from_pretrained('efficientnet-b5', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b6': netD = EfficientNet.from_pretrained('efficientnet-b6', num_classes=args.num_classes) elif args.model_train == 'efficientnet-b7': netD = EfficientNet.from_pretrained('efficientnet-b7', num_classes=args.num_classes) elif args.model_train == 'mobilenetv3_small': from arch.mobilenetv3_arch import MobileNetV3 netD = MobileNetV3(n_class=args.num_classes, mode='small', input_size=256) elif args.model_train == 'mobilenetv3_large': from arch.mobilenetv3_arch import MobileNetV3 netD = MobileNetV3(n_class=args.num_classes, mode='large', input_size=256) elif args.model_train == 'resnet50': from arch.resnet_arch import resnet50 netD = resnet50(num_classes=args.num_classes, pretrain=True) elif args.model_train == 'resnet101': from arch.resnet_arch import resnet101 netD = resnet101(num_classes=args.num_classes, pretrain=True) elif args.model_train == 'resnet152': from arch.resnet_arch import resnet152 netD = resnet152(num_classes=args.num_classes, pretrain=True) ############################################# elif args.model_train == 'ViT': from vit_pytorch import ViT netD = ViT( image_size = 256, patch_size = 32, num_classes = args.num_classes, dim = 1024, depth = 6, heads = 16, mlp_dim = 2048, dropout = 0.1, emb_dropout = 0.1 ) elif args.model_train == 'DeepViT': from vit_pytorch.deepvit import DeepViT netD = DeepViT( image_size = 256, patch_size = 32, num_classes = args.num_classes, dim = 1024, depth = 6, heads = 16, mlp_dim = 2048, dropout = 0.1, emb_dropout = 0.1 ) ############################################# elif model_train == 'RepVGG-A0': from arch.RepVGG_arch import create_RepVGG_A0 self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A1': from arch.RepVGG_arch import create_RepVGG_A1 self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A2': from arch.RepVGG_arch import create_RepVGG_A2 self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B0': from arch.RepVGG_arch import create_RepVGG_B0 self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1': from arch.RepVGG_arch import create_RepVGG_B1 self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g2': from arch.RepVGG_arch import create_RepVGG_B1g2 self.netD = create_RepVGG_B1g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g4': from arch.RepVGG_arch import create_RepVGG_B1g4 self.netD = create_RepVGG_B1g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2': from arch.RepVGG_arch import create_RepVGG_B2 self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g2': from arch.RepVGG_arch import create_RepVGG_B2g2 self.netD = create_RepVGG_B2g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g4': from arch.RepVGG_arch import create_RepVGG_B2g4 self.netD = create_RepVGG_B2g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3': from arch.RepVGG_arch import create_RepVGG_B3 self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g2': from arch.RepVGG_arch import create_RepVGG_B3g2 self.netD = create_RepVGG_B3g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g4': from arch.RepVGG_arch import create_RepVGG_B3g4 self.netD = create_RepVGG_B3g4(deploy=False, num_classes=num_classes) ############################################# elif args.model_train == 'squeezenet_1_0': from arch.squeezenet_arch import SqueezeNet netD = SqueezeNet(num_classes=args.num_classes, version='1_0') elif args.model_train == 'squeezenet_1_1': from arch.squeezenet_arch import SqueezeNet netD = SqueezeNet(num_classes=args.num_classes, version='1_1') ############################################# elif args.model_train == 'vgg11': from arch.vgg_arch import create_vgg11 netD = create_vgg11(num_classes, pretrained=True) elif args.model_train == 'vgg13': from arch.vgg_arch import create_vgg13 netD = create_vgg13(num_classes, pretrained=True) elif args.model_train == 'vgg16': from arch.vgg_arch import create_vgg16 netD = create_vgg16(num_classes, pretrained=True) elif args.model_train == 'vgg19': from arch.vgg_arch import create_vgg19 netD = create_vgg19(num_classes, pretrained=True) ############################################# elif args.model_train == 'SwinTransformer': from swin_transformer_pytorch import SwinTransformer netD = SwinTransformer( hidden_dim=96, layers=(2, 2, 6, 2), heads=(3, 6, 12, 24), channels=3, num_classes=args.num_classes, head_dim=32, window_size=8, downscaling_factors=(4, 2, 2, 2), relative_pos_embedding=True ) from torch.autograd import Variable import torch.onnx import torchvision import torch dummy_input = Variable(torch.randn(1, 3, 256, 256)) # don't set it too high, will run out of RAM state_dict = torch.load(args.model_path) print("Loaded model from model path into state_dict.") netD.load_state_dict(state_dict) torch.onnx.export(netD, dummy_input, args.output_path, opset_version=11) print("Done.")
root_path = '/home/ubuntu/dataset/dfdc_image/train/dfdc_train_part_10/' test_path = '/home/ubuntu/dataset/dfdc_image/test/' batch_size = 10 #folder num # image_num = batch_size * 32 num_workers= 4 epoch = 100 dir_name = t.strftime('~%Y%m%d~%H%M%S', t.localtime(t.time())) log_train = './log/' + dir_name + '/train' writer = SummaryWriter(log_train) v = ViT( image_size = 256, patch_size = 32, num_classes = 2, dim = 1024, depth = 6, heads = 8, mlp_dim = 2048, dropout = 0.3, emb_dropout = 0.3 ) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) print(count_parameters(v)) criterion = nn.CrossEntropyLoss() bce = nn.BCELoss() sigmoid = nn.Sigmoid() opt = torch.optim.Adam(v.parameters(), lr=3e-4) #opt = torch.optim.SGD(v.parameters(), lr=3e-4) v.cuda()
def main(): parser = argparse.ArgumentParser(description='ViT') parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats') parser.add_argument('--dataset', default='dvsc') parser.add_argument('--exp_id', default='sdvsc-adam') parser.add_argument('--mode', default='normal') parser.add_argument('--batch', default=128) parser.add_argument('--epochs', default=10) parser.add_argument('--cuda', default=True) parser.add_argument('--optim', default='SGD') args = parser.parse_args() os.system('mkdir -p weights') dataset = {'smnist': SMNIST, 'dvsc': DVSC} if args.dataset == 'smnist': image_size = 60 patch_size = 10 num_classes = 10 samp = 6 elif args.dataset == 'dvsc': image_size = 384 patch_size = 32 num_classes = 2 samp = 12 if args.mode == 'normal': model = ViT(image_size=image_size, patch_size=patch_size, num_classes=num_classes, dim=512, depth=4, heads=8, mlp_dim=512, dropout=0.1, emb_dropout=0.1) else: model = ViT_sphere( image_size=image_size, patch_size=patch_size, num_classes=num_classes, dim=512, depth=4, heads=8, mlp_dim=512, base_order=1, mode=args.mode, # face, vertex and regular samp=samp, dropout=0.1, emb_dropout=0.1) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Trainable parameters", params) cuda = args.cuda epochs = args.epochs batch = args.batch path = 'weights/' train_data = dataset[args.dataset](args.data_dir, 'train', image_size, image_size, None) valid_data = dataset[args.dataset](args.data_dir, 'valid', image_size, image_size, None) train_loader = DataLoader(dataset=train_data, batch_size=batch, shuffle=True) valid_loader = DataLoader(dataset=valid_data, batch_size=batch, shuffle=True) if cuda: model = model.cuda() model.train() if args.optim == 'SGD': optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9) else: optimizer = optim.Adam(model.parameters(), lr=1e-3) #, momentum=0.9) cla_loss = torch.nn.CrossEntropyLoss() valid_loss = 1000 valid_acc = 0 print("Training Start") T_L = [] V_L = [] V_a = [] for i in range(epochs): print("Epoch", i + 1) model.train() L = [] for i, data in enumerate(tqdm(train_loader)): img, target = data if cuda: img = img.cuda() target = target.cuda() preds = model(img) output = cla_loss(preds, target) L.append(output.cpu().item()) output.backward() optimizer.step() optimizer.zero_grad() T_L.append(np.mean(L)) print("train loss:", np.mean(L)) sum_acc = 0 total = len(valid_data) model.eval() for i, data in enumerate(tqdm(valid_loader)): img, target = data if cuda: img = img.cuda() target = target.cuda() preds = model(img) L.append(cla_loss(preds, target).item()) probabilities = torch.nn.functional.softmax(preds, dim=1) preds = torch.argmax(probabilities, dim=1) acc = torch.sum( torch.where(preds == target, torch.tensor(1, device=preds.device), torch.tensor(0, device=preds.device))) sum_acc += acc v_l = np.mean(L) v_a = sum_acc.item() / total * 100 if v_a > valid_acc: valid_acc = v_a torch.save( { 'epoch': epochs, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, path + args.exp_id + 'model_acc.pth') if v_l < valid_loss: valid_loss = v_l torch.save( { 'epoch': epochs, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, path + args.exp_id + 'model_loss.pth') V_L.append(v_l) V_a.append(v_a) print("val loss:", v_l) print("val acc:", v_a) print(T_L) plt.plot(T_L, label='Total_loss', color='blue') plt.plot(V_L, label='Valid_loss', color='red') plt.legend(loc="upper left") plt.xlabel("num of epochs") plt.ylabel("loss") plt.savefig(path + args.exp_id + 'Learning_Curves.png') plt.clf() plt.plot(V_a, label='Valid_acc', color='cyan') plt.legend(loc="upper left") plt.xlabel("num of epochs") plt.ylabel("accuracy") plt.savefig(path + args.exp_id + 'Val_acc.png') torch.save( { 'epoch': epochs, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, path + args.exp_id + 'model_last.pth')
print(dic, print_size(dict_[dic])) else: return str(dict_.shape) # print(pretain_tf_model.keys()) input_size = 224 patch_size = 16 num_layers = 12 # print(pretain_tf_model.keys()) # print_size(pretain_tf_model['pre_logits']) v = ViT(image_size=input_size, patch_size=patch_size, num_classes=1000, depth=num_layers, heads=12, mlp_dim=3072, dropout=0.1, emb_dropout=0.1) print("Model's state_dict:") for param_tensor in v.state_dict(): print(param_tensor, "\t", v.state_dict()[param_tensor].size()) ## copy embedding tf_dict = {} embedding_weight_shape = pretain_tf_model['embedding']['kernel'].shape embedding_weight = np.array( jnp.transpose(pretain_tf_model['embedding']['kernel'], (3, 2, 0, 1))) # embedding_weight = pretain_tf_model['embedding']['kernel'].reshape([embedding_weight_shape[3],embedding_weight_shape[2],embedding_weight_shape[1],embedding_weight_shape[0]])
def main(): parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--lr', type=float, default=0.01, metavar='S', help='learning rate') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--normalize', action='store_true', default=False, help='normalize input dataset') parser.add_argument('--cnn', action='store_true', default=False, help='use cnn model') args = parser.parse_args() use_cuda = torch.cuda.is_available() kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} if args.normalize: transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) else: transform = transforms.Compose([transforms.ToTensor()]) x_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform) x_test = datasets.MNIST(root='./data', train=False, download=True, transform=transform) DataLoader = torch.utils.data.DataLoader train_loader = DataLoader(x_train, shuffle=True, batch_size=args.batch_size, **kwargs) test_loader = DataLoader(x_test, shuffle=False, batch_size=args.batch_size, **kwargs) device = torch.device("cuda" if use_cuda else "cpu") if args.cnn: model = CNNModel().to(device) else: model = ViT( image_size=28, patch_size=14, num_classes=10, dim=128, depth=6, heads=8, mlp_dim=128, channels=1, ).to(device) if torch.cuda.device_count() > 1: print("Available GPUs:", torch.cuda.device_count()) model = nn.DataParallel(model) print("Model:", model) print("Device:", device) optimizer = optim.Adam(model.parameters()) start_time = datetime.datetime.now() best_top1 = 0 best_top5 = 0 for epoch in range(1, args.epochs + 1): top1, top5 = train(args, model, device, train_loader, test_loader, optimizer, epoch) if top1 > best_top1: print("New best Top 1: %0.2f%%, Top 5: %0.2f%%" % (top1, top5)) best_top1 = top1 best_top5 = top5 elapsed_time = datetime.datetime.now() - start_time print("Elapsed time (train): %s" % elapsed_time) print("Best Top 1: %0.2f%%, Top 5: %0.2f%%" % (best_top1, best_top5)) if args.save_model: torch.save(model.state_dict(), "mnist.pth")
if args.version == 2: args.cos = True args.moco_t = 0.2 if args.version == 3: args.cos = True args.symmetric = True print(args) vit = ViT( image_size=32, patch_size=4, num_classes=args.moco_dim, # dim = 256, # depth = 4, # heads = 12, # mlp_dim = 512, dim=256, depth=3, heads=8, mlp_dim=384, dropout=0.1, emb_dropout=0.1) model = MoCo(dim=args.moco_dim, K=args.moco_k, m=args.moco_m, T=args.moco_t, ver=args.version, arch=args.arch, bn_splits=args.bn_splits, symmetric=args.symmetric,