def performer_small_patch25_500(pretrained=False, **kwargs):
    efficient_transformer = Performer(dim=384, depth=12, heads=6, causal=True)

    model = ViT(image_size=500,
                patch_size=25,
                num_classes=2,
                dim=384,
                transformer=efficient_transformer)

    return model
def performer_tiny_patch25_500(pretrained=False, **kwargs):
    efficient_transformer = Performer(dim=512, depth=1, heads=8, causal=True)

    model = ViT(image_size=500,
                patch_size=25,
                num_classes=2,
                dim=512,
                transformer=efficient_transformer)

    # TODO fix pretrained implementation
    # if pretrained:
    #     checkpoint = torch.load_state_dict(
    #         torch.load(PATH)
    #     )
    #     model.load_state_dict(checkpoint["model"])
    return model
Пример #3
0
 def to_vit(self):
     v = EfficientViT(*self.args, **self.kwargs)
     v.load_state_dict(self.state_dict())
     return v
Пример #4
0
def main():
    # Training settings
    args = training_args()
    batch_size = args.batch_size
    epochs = args.epochs
    lr = args.lr
    gamma = args.gamma
    seed = 42
    device = args.device

    def seed_everything(seed):
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

    seed_everything(seed)

    # Data path
    competition_name = "dogs-vs-cats-redux-kernels-edition"
    data_dir = os.path.expanduser(f"~/.kaggle/competitions/{competition_name}")
    train_dir = os.path.join(data_dir, "train")
    test_dir = os.path.join(data_dir, "test")
    train_list = glob.glob(os.path.join(train_dir, '*.jpg'))
    test_list = glob.glob(os.path.join(test_dir, '*.jpg'))

    print(f"Train Data: {len(train_list)}")
    print(f"Test Data: {len(test_list)}")
    labels = [path.split('/')[-1].split('.')[0] for path in train_list]

    # Split
    train_list, valid_list = train_test_split(train_list,
                                              test_size=0.2,
                                              stratify=labels,
                                              random_state=seed)

    print(f"Train Data: {len(train_list)}")
    print(f"Validation Data: {len(valid_list)}")
    print(f"Test Data: {len(test_list)}")

    # Image Augumentation
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    test_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    # Dataloader
    from dataset.cats_dogs import CatsDogsDataset
    train_data = CatsDogsDataset(train_list, transform=train_transforms)
    valid_data = CatsDogsDataset(valid_list, transform=val_transforms)
    test_data = CatsDogsDataset(test_list, transform=test_transforms)

    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch_size,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_data,
                             batch_size=batch_size,
                             shuffle=True)

    print(
        f"train samples: {len(train_data)},train batches: {len(train_loader)}."
    )
    print(f"val samples: {len(valid_data)},val batches: {len(valid_loader)}.")

    # Select and prepare model
    if args.arch == "ViT":
        from vit_pytorch.efficient import ViT
        from torch.optim.lr_scheduler import StepLR
        # Effecient Attention
        # Linformer
        efficient_transformer = Linformer(
            dim=128,
            seq_len=49 + 1,  # 7x7 patches + 1 cls-token
            depth=12,
            heads=8,
            k=64)
        # Visual Transformer
        model = ViT(
            dim=128,
            image_size=224,
            patch_size=32,
            num_classes=2,
            transformer=efficient_transformer,
            channels=3,
        ).to(device)
        # Training configs for ViT
        criterion = nn.CrossEntropyLoss()  # loss function
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # optimizer
        scheduler = StepLR(optimizer, step_size=1,
                           gamma=gamma)  # scheduler TODO 没用上
        scheduler = None
    elif args.arch == "resnet50":
        from torchvision.models.resnet import resnet50
        from torch.optim.lr_scheduler import MultiStepLR
        model = resnet50().to(device)
        # Training configs for resnet50
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=1e-3)  # optimizer
        # ResNet learning schedule
        scheduler = MultiStepLR(optimizer,
                                milestones=[80, 120, 160],
                                gamma=0.1)

    prefix = os.path.join("~", "Documents", "DeepLearningData",
                          competition_name)
    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    subfix = os.path.join(args.arch, current_time)

    save_dir = os.path.expanduser(os.path.join(prefix, subfix, "ckpts"))
    log_dir = os.path.expanduser(os.path.join(prefix, subfix, "logs"))

    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    history = train(model,
                    epochs,
                    criterion,
                    optimizer,
                    train_loader,
                    valid_loader,
                    scheduler=scheduler,
                    device=device,
                    verbose=True,
                    save_weights=True,
                    save_dir=save_dir)

    path = os.path.join(log_dir, "history.pickle")
    with open(path, "wb") as f:
        pickle.dump(history, f)
        print(f"Saved history to: {path}")
Пример #5
0
def main():
    args, config = parse_args_and_config()
    tb_logger = tensorboardX.SummaryWriter(
        log_dir=os.path.join('vit_logs', args.doc))
    device = config.device
    batch_size = config.train.batch_size
    lr = float(config.optim.lr)
    epochs = config.train.epochs
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ])

    cifar_train = datasets.CIFAR10(root="CIFAR10/",
                                   train=True,
                                   download=True,
                                   transform=transform)

    cifar_test = datasets.CIFAR10(root="CIFAR10/",
                                  train=False,
                                  download=True,
                                  transform=transform)

    data_train = DataLoader(dataset=cifar_train,
                            batch_size=config.train.batch_size,
                            shuffle=True)

    data_test = DataLoader(dataset=cifar_test,
                           batch_size=config.train.batch_size // 4,
                           shuffle=False)

    torch.manual_seed(43)
    val_size = 5000
    train_size = len(cifar_train) - val_size

    train_ds, val_ds = random_split(cifar_train, [train_size, val_size])
    print(len(train_ds), len(val_ds))

    efficient_transformer = Performer(dim_head=64,
                                      dim=config.model.p_dim,
                                      depth=config.model.p_depth,
                                      heads=config.model.p_heads,
                                      causal=True)

    model = ViT(dim=config.model.dim,
                image_size=config.model.image_size,
                patch_size=config.model.patch_size,
                num_classes=config.model.num_classes,
                transformer=efficient_transformer)

    train_loader = DataLoader(train_ds,
                              batch_size,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)
    val_loader = DataLoader(val_ds,
                            batch_size // 4,
                            num_workers=4,
                            pin_memory=True)
    test_loader = DataLoader(data_test,
                             batch_size // 4,
                             num_workers=4,
                             pin_memory=True)

    opt = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    model.to(device)

    step = 0
    for epoch in range(1, epochs + 1):
        acc = 0
        tot_loss = 0
        train_cnt = 0
        test_cnt = 0
        model.train()
        pbar = tqdm(train_loader)
        acc_tr = 0
        for x, y in pbar:
            #         print(x.shape)
            #         break
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            opt.zero_grad()
            loss = criterion(y_pred, y)
            loss.backward()
            opt.step()
            tot_loss += loss.item() * x.shape[0]
            train_cnt += x.shape[0]

            acc_tr = accuracy(y_pred, y)
            if step % config.train.log_iter == 0:
                tb_logger.add_scalar('loss',
                                     tot_loss / train_cnt,
                                     global_step=step)
                tb_logger.add_scalar('train_accuracy',
                                     acc_tr,
                                     global_step=step)
            if step % 100 == 0:
                imgs_grid = torchvision.utils.make_grid(x[:8, ...], 3)
                tb_logger.add_image('imgs', imgs_grid, global_step=step)
            pbar.set_description(
                f"Loss : {tot_loss/train_cnt:.4f}, Acc: {acc_tr}")
            step += 1

        model.eval()

        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)

            y_argmax = y_pred.argmax(dim=1)
            #             acc += (y == y_argmax).sum()
            acc += accuracy(y_pred, y)
            test_cnt += x.shape[0]

        print(
            f'epoch {epoch} : Average loss : {tot_loss/train_cnt:.4f}, test_acc : {acc.item()/test_cnt:.4f}'
        )
        average_loss = tot_loss / train_cnt

        logging.info(
            f'epoch {epoch} : average_val_loss : {average_loss:.4f}, test_acc : {acc.item()/test_cnt}'
        )
        tb_logger.add_scalar('average_val_loss',
                             tot_loss / train_cnt,
                             global_step=epoch)
        tb_logger.add_scalar(f'val_acc',
                             acc.item() / test_cnt,
                             global_step=epoch)

        logging.info("Sampling from model: {}".format(args.doc))
Пример #6
0
def train():
    # Model
    efficient_transformer = Linformer(
        dim=128,
        seq_len=300 + 1,  # 7x7 patches + 1 cls-token
        depth=12,
        heads=8,
        k=64)
    my_model = ViT(
        dim=128,
        image_size=320,
        patch_size=16,
        num_classes=25,
        transformer=efficient_transformer,
        channels=3,
    ).to(device)

    if os.path.exists('transformer/my_model.pt'):
        my_model.load_state_dict(torch.load('transformer/my_model.pt'))
        print('Load my_model.pt')

    batch_size = 32
    num_epoch = 100
    num_classes = 25
    learning_rate = 8e-4

    train_set = MyDataset(is_train=True, num_cat=num_classes)
    validation_set = MyDataset(is_train=False, num_cat=num_classes)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True)
    validation_loader = torch.utils.data.DataLoader(validation_set,
                                                    batch_size=32,
                                                    shuffle=True,
                                                    pin_memory=True)

    optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate)
    loss_func = torch.nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  factor=0.5,
                                  patience=5,
                                  threshold=2e-1,
                                  verbose=True,
                                  min_lr=1e-5)
    bestTestAccuracy = 0

    print('Start training')
    train_size = len(train_loader.dataset)
    test_size = len(validation_loader.dataset)
    for epoch in range(num_epoch):
        total = 0
        correct = 0
        my_model.train()
        for i, data in enumerate(train_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            loss = loss_func(prediction, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            print(
                f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f}')

        total = 0
        correct = 0
        my_model.eval()
        for i, data in enumerate(validation_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            print(
                f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}'
            )

        if (correct / total) > bestTestAccuracy:
            bestTestAccuracy = correct / total
            print(f'Update best test: {100*bestTestAccuracy:.5f}')
            torch.save(
                my_model.state_dict(),
                f"transformer/my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt"
            )

        scheduler.step(bestTestAccuracy)
Пример #7
0
'''================================================================
Total params: 271,590,402
Trainable params: 271,590,402
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 1038.92
Params size (MB): 1036.04
Estimated Total Size (MB): 2075.53
---------------------------------------------------------------- '''

model = ViT(
    dim=1024,
    image_size=224,
    patch_size=16,
    num_classes=2,
    depth=32,  # number of transformer blocks
    heads=16,  # number of multi-channel attention
    mlp_dim=2048,
    channels=3,
)

model.to(device)

# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

for epoch in range(epochs):
def main(args):

    print(args)

    device = torch.device(args.device)

    # fix seed for reproducability
    print("Setting random seed")
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    cudnn.deterministic = True

    data_directory = args.data_path
    print("Loading data")
    train_dataset = ColonCancerDataset(data_directory,
                                       train=True,
                                       seed=args.seed)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers,
                              pin_memory=args.pin_mem,
                              drop_last=True)

    val_dataset = ColonCancerDataset(data_directory,
                                     train=False,
                                     seed=args.seed)
    val_loader = DataLoader(val_dataset,
                            batch_size=int(1.5 * args.batch_size),
                            shuffle=False,
                            num_workers=args.num_workers,
                            pin_memory=args.pin_mem,
                            drop_last=False)

    print(f"Creating model: {args.model}")
    efficient_transformer = Performer(dim=384, depth=12, heads=6, causal=True)

    model = ViT(image_size=500,
                patch_size=25,
                num_classes=2,
                dim=384,
                transformer=efficient_transformer)
    # TODO fix create model function and files
    # model = create_model(
    #     args.model,
    #     pretrained=False,
    #     num_classes=2,
    #     drop_rate=args.drop,
    #     drop_path_rate=args.drop_path,
    #     drop_block_rate=None,
    # )

    model.to(device)

    model_without_ddp = model
    n_parameters = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
    print(f"Number of params: {n_parameters}")

    linear_scaled_lr = args.lr * args.batch_size / 512.0
    args.lr = linear_scaled_lr
    optimiser = create_optimizer(args, model_without_ddp)
    loss_scaler = NativeScaler()

    lr_scheduler, _ = create_scheduler(args, optimiser)

    criterion = LabelSmoothingCrossEntropy()

    output_dir = Path(args.output_dir)

    wandb.watch(model, criterion, log='all', log_freq=10)

    print(f"Starting training for {args.epochs} epochs")
    start_time = time.time()
    for epoch in tqdm(range(args.start_epoch, args.epochs + 1)):
        train_loss, train_metrics = train_one_epoch(model, criterion,
                                                    train_loader, optimiser,
                                                    device)

        lr_scheduler.step(epoch)
        # TODO add in resuming training

        val_loss, val_metrics = evaluate(val_loader, model, device)

        if args.output_dir:
            checkpoint_paths = [output_dir / "checkpoint.pth"]
            for checkpoint_path in checkpoint_paths:
                save(
                    {
                        "model": model_without_ddp.state_dict(),
                        "optimiser": optimiser.state_dict(),
                        "lr_scheduler": lr_scheduler.state_dict(),
                        "epoch": epoch,
                        "scaler": loss_scaler.state_dict(),
                        "args": args,
                    }, checkpoint_path)

        wandb.log({
            "epoch": epoch,
            "train loss": train_loss,
            "val loss": val_loss,
            "train acc": train_metrics["accuracy"],
            "train f1": train_metrics["f1 score"],
            "train prec": train_metrics["precision"],
            "train recall": train_metrics["recall"],
            "val acc": val_metrics["accuracy"],
            "val f1": val_metrics["f1 score"],
            "val prec": val_metrics["precision"],
            "val recall": val_metrics["recall"]
        })

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))
Пример #9
0
from vit_pytorch.efficient import ViT
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

efficient_transformer = Linformer(
    dim=128,
    seq_len=16 + 1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64)

att_model = ViT(
    dim=128,
    image_size=28,
    patch_size=7,
    num_classes=10,
    transformer=efficient_transformer,
    channels=1,
).to(device)

# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(att_model.parameters(), lr=0.01)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

for epoch in range(20):
    epoch_loss = 0
    epoch_accuracy = 0
# Linformer

efficient_transformer = Linformer(
    dim=128,  #128
    seq_len=49 + 1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64)

# Visual Transformer

model = ViT(
    dim=128,
    image_size=224,
    patch_size=32,
    num_classes=2,
    transformer=efficient_transformer,
    channels=3,
).to(device)

# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    for data, label in tqdm(train_loader):