def performer_small_patch25_500(pretrained=False, **kwargs): efficient_transformer = Performer(dim=384, depth=12, heads=6, causal=True) model = ViT(image_size=500, patch_size=25, num_classes=2, dim=384, transformer=efficient_transformer) return model
def performer_tiny_patch25_500(pretrained=False, **kwargs): efficient_transformer = Performer(dim=512, depth=1, heads=8, causal=True) model = ViT(image_size=500, patch_size=25, num_classes=2, dim=512, transformer=efficient_transformer) # TODO fix pretrained implementation # if pretrained: # checkpoint = torch.load_state_dict( # torch.load(PATH) # ) # model.load_state_dict(checkpoint["model"]) return model
def to_vit(self): v = EfficientViT(*self.args, **self.kwargs) v.load_state_dict(self.state_dict()) return v
def main(): # Training settings args = training_args() batch_size = args.batch_size epochs = args.epochs lr = args.lr gamma = args.gamma seed = 42 device = args.device def seed_everything(seed): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True seed_everything(seed) # Data path competition_name = "dogs-vs-cats-redux-kernels-edition" data_dir = os.path.expanduser(f"~/.kaggle/competitions/{competition_name}") train_dir = os.path.join(data_dir, "train") test_dir = os.path.join(data_dir, "test") train_list = glob.glob(os.path.join(train_dir, '*.jpg')) test_list = glob.glob(os.path.join(test_dir, '*.jpg')) print(f"Train Data: {len(train_list)}") print(f"Test Data: {len(test_list)}") labels = [path.split('/')[-1].split('.')[0] for path in train_list] # Split train_list, valid_list = train_test_split(train_list, test_size=0.2, stratify=labels, random_state=seed) print(f"Train Data: {len(train_list)}") print(f"Validation Data: {len(valid_list)}") print(f"Test Data: {len(test_list)}") # Image Augumentation train_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) val_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) test_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) # Dataloader from dataset.cats_dogs import CatsDogsDataset train_data = CatsDogsDataset(train_list, transform=train_transforms) valid_data = CatsDogsDataset(valid_list, transform=val_transforms) test_data = CatsDogsDataset(test_list, transform=test_transforms) train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(dataset=valid_data, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True) print( f"train samples: {len(train_data)},train batches: {len(train_loader)}." ) print(f"val samples: {len(valid_data)},val batches: {len(valid_loader)}.") # Select and prepare model if args.arch == "ViT": from vit_pytorch.efficient import ViT from torch.optim.lr_scheduler import StepLR # Effecient Attention # Linformer efficient_transformer = Linformer( dim=128, seq_len=49 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) # Visual Transformer model = ViT( dim=128, image_size=224, patch_size=32, num_classes=2, transformer=efficient_transformer, channels=3, ).to(device) # Training configs for ViT criterion = nn.CrossEntropyLoss() # loss function optimizer = torch.optim.Adam(model.parameters(), lr=lr) # optimizer scheduler = StepLR(optimizer, step_size=1, gamma=gamma) # scheduler TODO 没用上 scheduler = None elif args.arch == "resnet50": from torchvision.models.resnet import resnet50 from torch.optim.lr_scheduler import MultiStepLR model = resnet50().to(device) # Training configs for resnet50 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) # optimizer # ResNet learning schedule scheduler = MultiStepLR(optimizer, milestones=[80, 120, 160], gamma=0.1) prefix = os.path.join("~", "Documents", "DeepLearningData", competition_name) current_time = datetime.now().strftime("%Y%m%d-%H%M%S") subfix = os.path.join(args.arch, current_time) save_dir = os.path.expanduser(os.path.join(prefix, subfix, "ckpts")) log_dir = os.path.expanduser(os.path.join(prefix, subfix, "logs")) os.makedirs(save_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) history = train(model, epochs, criterion, optimizer, train_loader, valid_loader, scheduler=scheduler, device=device, verbose=True, save_weights=True, save_dir=save_dir) path = os.path.join(log_dir, "history.pickle") with open(path, "wb") as f: pickle.dump(history, f) print(f"Saved history to: {path}")
def main(): args, config = parse_args_and_config() tb_logger = tensorboardX.SummaryWriter( log_dir=os.path.join('vit_logs', args.doc)) device = config.device batch_size = config.train.batch_size lr = float(config.optim.lr) epochs = config.train.epochs transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)), ]) cifar_train = datasets.CIFAR10(root="CIFAR10/", train=True, download=True, transform=transform) cifar_test = datasets.CIFAR10(root="CIFAR10/", train=False, download=True, transform=transform) data_train = DataLoader(dataset=cifar_train, batch_size=config.train.batch_size, shuffle=True) data_test = DataLoader(dataset=cifar_test, batch_size=config.train.batch_size // 4, shuffle=False) torch.manual_seed(43) val_size = 5000 train_size = len(cifar_train) - val_size train_ds, val_ds = random_split(cifar_train, [train_size, val_size]) print(len(train_ds), len(val_ds)) efficient_transformer = Performer(dim_head=64, dim=config.model.p_dim, depth=config.model.p_depth, heads=config.model.p_heads, causal=True) model = ViT(dim=config.model.dim, image_size=config.model.image_size, patch_size=config.model.patch_size, num_classes=config.model.num_classes, transformer=efficient_transformer) train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True) val_loader = DataLoader(val_ds, batch_size // 4, num_workers=4, pin_memory=True) test_loader = DataLoader(data_test, batch_size // 4, num_workers=4, pin_memory=True) opt = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() model.to(device) step = 0 for epoch in range(1, epochs + 1): acc = 0 tot_loss = 0 train_cnt = 0 test_cnt = 0 model.train() pbar = tqdm(train_loader) acc_tr = 0 for x, y in pbar: # print(x.shape) # break x = x.to(device) y = y.to(device) y_pred = model(x) opt.zero_grad() loss = criterion(y_pred, y) loss.backward() opt.step() tot_loss += loss.item() * x.shape[0] train_cnt += x.shape[0] acc_tr = accuracy(y_pred, y) if step % config.train.log_iter == 0: tb_logger.add_scalar('loss', tot_loss / train_cnt, global_step=step) tb_logger.add_scalar('train_accuracy', acc_tr, global_step=step) if step % 100 == 0: imgs_grid = torchvision.utils.make_grid(x[:8, ...], 3) tb_logger.add_image('imgs', imgs_grid, global_step=step) pbar.set_description( f"Loss : {tot_loss/train_cnt:.4f}, Acc: {acc_tr}") step += 1 model.eval() for x, y in val_loader: x = x.to(device) y = y.to(device) y_pred = model(x) y_argmax = y_pred.argmax(dim=1) # acc += (y == y_argmax).sum() acc += accuracy(y_pred, y) test_cnt += x.shape[0] print( f'epoch {epoch} : Average loss : {tot_loss/train_cnt:.4f}, test_acc : {acc.item()/test_cnt:.4f}' ) average_loss = tot_loss / train_cnt logging.info( f'epoch {epoch} : average_val_loss : {average_loss:.4f}, test_acc : {acc.item()/test_cnt}' ) tb_logger.add_scalar('average_val_loss', tot_loss / train_cnt, global_step=epoch) tb_logger.add_scalar(f'val_acc', acc.item() / test_cnt, global_step=epoch) logging.info("Sampling from model: {}".format(args.doc))
def train(): # Model efficient_transformer = Linformer( dim=128, seq_len=300 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) my_model = ViT( dim=128, image_size=320, patch_size=16, num_classes=25, transformer=efficient_transformer, channels=3, ).to(device) if os.path.exists('transformer/my_model.pt'): my_model.load_state_dict(torch.load('transformer/my_model.pt')) print('Load my_model.pt') batch_size = 32 num_epoch = 100 num_classes = 25 learning_rate = 8e-4 train_set = MyDataset(is_train=True, num_cat=num_classes) validation_set = MyDataset(is_train=False, num_cat=num_classes) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True) validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=32, shuffle=True, pin_memory=True) optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate) loss_func = torch.nn.CrossEntropyLoss() scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=5, threshold=2e-1, verbose=True, min_lr=1e-5) bestTestAccuracy = 0 print('Start training') train_size = len(train_loader.dataset) test_size = len(validation_loader.dataset) for epoch in range(num_epoch): total = 0 correct = 0 my_model.train() for i, data in enumerate(train_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) loss = loss_func(prediction, labels) optimizer.zero_grad() loss.backward() optimizer.step() _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f}') total = 0 correct = 0 my_model.eval() for i, data in enumerate(validation_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}' ) if (correct / total) > bestTestAccuracy: bestTestAccuracy = correct / total print(f'Update best test: {100*bestTestAccuracy:.5f}') torch.save( my_model.state_dict(), f"transformer/my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt" ) scheduler.step(bestTestAccuracy)
'''================================================================ Total params: 271,590,402 Trainable params: 271,590,402 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.57 Forward/backward pass size (MB): 1038.92 Params size (MB): 1036.04 Estimated Total Size (MB): 2075.53 ---------------------------------------------------------------- ''' model = ViT( dim=1024, image_size=224, patch_size=16, num_classes=2, depth=32, # number of transformer blocks heads=16, # number of multi-channel attention mlp_dim=2048, channels=3, ) model.to(device) # loss function criterion = nn.CrossEntropyLoss() # optimizer optimizer = optim.Adam(model.parameters(), lr=lr) # scheduler scheduler = StepLR(optimizer, step_size=1, gamma=gamma) for epoch in range(epochs):
def main(args): print(args) device = torch.device(args.device) # fix seed for reproducability print("Setting random seed") random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True cudnn.deterministic = True data_directory = args.data_path print("Loading data") train_dataset = ColonCancerDataset(data_directory, train=True, seed=args.seed) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=args.pin_mem, drop_last=True) val_dataset = ColonCancerDataset(data_directory, train=False, seed=args.seed) val_loader = DataLoader(val_dataset, batch_size=int(1.5 * args.batch_size), shuffle=False, num_workers=args.num_workers, pin_memory=args.pin_mem, drop_last=False) print(f"Creating model: {args.model}") efficient_transformer = Performer(dim=384, depth=12, heads=6, causal=True) model = ViT(image_size=500, patch_size=25, num_classes=2, dim=384, transformer=efficient_transformer) # TODO fix create model function and files # model = create_model( # args.model, # pretrained=False, # num_classes=2, # drop_rate=args.drop, # drop_path_rate=args.drop_path, # drop_block_rate=None, # ) model.to(device) model_without_ddp = model n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f"Number of params: {n_parameters}") linear_scaled_lr = args.lr * args.batch_size / 512.0 args.lr = linear_scaled_lr optimiser = create_optimizer(args, model_without_ddp) loss_scaler = NativeScaler() lr_scheduler, _ = create_scheduler(args, optimiser) criterion = LabelSmoothingCrossEntropy() output_dir = Path(args.output_dir) wandb.watch(model, criterion, log='all', log_freq=10) print(f"Starting training for {args.epochs} epochs") start_time = time.time() for epoch in tqdm(range(args.start_epoch, args.epochs + 1)): train_loss, train_metrics = train_one_epoch(model, criterion, train_loader, optimiser, device) lr_scheduler.step(epoch) # TODO add in resuming training val_loss, val_metrics = evaluate(val_loader, model, device) if args.output_dir: checkpoint_paths = [output_dir / "checkpoint.pth"] for checkpoint_path in checkpoint_paths: save( { "model": model_without_ddp.state_dict(), "optimiser": optimiser.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "scaler": loss_scaler.state_dict(), "args": args, }, checkpoint_path) wandb.log({ "epoch": epoch, "train loss": train_loss, "val loss": val_loss, "train acc": train_metrics["accuracy"], "train f1": train_metrics["f1 score"], "train prec": train_metrics["precision"], "train recall": train_metrics["recall"], "val acc": val_metrics["accuracy"], "val f1": val_metrics["f1 score"], "val prec": val_metrics["precision"], "val recall": val_metrics["recall"] }) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("Training time {}".format(total_time_str))
from vit_pytorch.efficient import ViT import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import StepLR efficient_transformer = Linformer( dim=128, seq_len=16 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) att_model = ViT( dim=128, image_size=28, patch_size=7, num_classes=10, transformer=efficient_transformer, channels=1, ).to(device) # loss function criterion = nn.CrossEntropyLoss() # optimizer optimizer = optim.Adam(att_model.parameters(), lr=0.01) # scheduler scheduler = StepLR(optimizer, step_size=1, gamma=0.7) for epoch in range(20): epoch_loss = 0 epoch_accuracy = 0
# Linformer efficient_transformer = Linformer( dim=128, #128 seq_len=49 + 1, # 7x7 patches + 1 cls-token depth=12, heads=8, k=64) # Visual Transformer model = ViT( dim=128, image_size=224, patch_size=32, num_classes=2, transformer=efficient_transformer, channels=3, ).to(device) # loss function criterion = nn.CrossEntropyLoss() # optimizer optimizer = optim.Adam(model.parameters(), lr=lr) # scheduler scheduler = StepLR(optimizer, step_size=1, gamma=gamma) for epoch in range(epochs): epoch_loss = 0 epoch_accuracy = 0 for data, label in tqdm(train_loader):