batch_size = config.batch_size
    if download and isHVD: hvd.allreduce(torch.tensor(1), name="barrier")
    # Horovod: use DistributedSampler to partition the training data.
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=num_replicas, rank=rank)
    train_loader = torch.utils.data.DataLoader(train_dataset,
            batch_size=batch_size * args.batches_per_allreduce, 
            sampler=train_sampler, **kwargs)

    # Horovod: use DistributedSampler to partition the test data.
    test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset, num_replicas=num_replicas, rank=rank)
    test_loader = torch.utils.data.DataLoader(test_dataset, 
            batch_size=batch_size, sampler=test_sampler, **kwargs)

    if args.cuda:
        model.cuda()
    # if verbose:    summary(model, (3, 32, 32))

    criterion = nn.CrossEntropyLoss()
    use_kfac = True if args.kfac_update_freq > 0 else False

    # if use_kfac:    args.base_lr = 0.003     #0.003 for vit
    lr_scheduler=[]
    optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum,weight_decay=args.weight_decay)
    if model_name == "vit" or model_name == "distiller":
        optimizer = optim.Adam(model.parameters(), lr=0.003)        #QHAdam is nearly same as adam, much better than SGD
        # optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=args.momentum,weight_decay=args.weight_decay)
    if model_name == "jaggi":
        optimizer, lrs = Jaggi_get_optimizer(train_loader,model.named_parameters(),VoT_config)
        # lr_scheduler.append(lrs)
def main():

    parser = argparse.ArgumentParser(description='ViT')
    parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats')
    parser.add_argument('--dataset', default='dvsc')
    parser.add_argument('--exp_id', default='sdvsc-adam')
    parser.add_argument('--mode', default='normal')
    parser.add_argument('--batch', default=128)
    parser.add_argument('--epochs', default=10)
    parser.add_argument('--cuda', default=True)
    parser.add_argument('--optim', default='SGD')
    args = parser.parse_args()

    os.system('mkdir -p weights')

    dataset = {'smnist': SMNIST, 'dvsc': DVSC}
    if args.dataset == 'smnist':
        image_size = 60
        patch_size = 10
        num_classes = 10
        samp = 6
    elif args.dataset == 'dvsc':
        image_size = 384
        patch_size = 32
        num_classes = 2
        samp = 12

    if args.mode == 'normal':
        model = ViT(image_size=image_size,
                    patch_size=patch_size,
                    num_classes=num_classes,
                    dim=512,
                    depth=4,
                    heads=8,
                    mlp_dim=512,
                    dropout=0.1,
                    emb_dropout=0.1)
    else:
        model = ViT_sphere(
            image_size=image_size,
            patch_size=patch_size,
            num_classes=num_classes,
            dim=512,
            depth=4,
            heads=8,
            mlp_dim=512,
            base_order=1,
            mode=args.mode,  # face, vertex and regular
            samp=samp,
            dropout=0.1,
            emb_dropout=0.1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])

    print("Trainable parameters", params)

    cuda = args.cuda
    epochs = args.epochs
    batch = args.batch
    path = 'weights/'

    train_data = dataset[args.dataset](args.data_dir, 'train', image_size,
                                       image_size, None)
    valid_data = dataset[args.dataset](args.data_dir, 'valid', image_size,
                                       image_size, None)

    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data,
                              batch_size=batch,
                              shuffle=True)

    if cuda:
        model = model.cuda()
    model.train()
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=1e-3)  #, momentum=0.9)

    cla_loss = torch.nn.CrossEntropyLoss()

    valid_loss = 1000
    valid_acc = 0

    print("Training Start")
    T_L = []
    V_L = []
    V_a = []
    for i in range(epochs):
        print("Epoch", i + 1)
        model.train()
        L = []
        for i, data in enumerate(tqdm(train_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            output = cla_loss(preds, target)
            L.append(output.cpu().item())
            output.backward()
            optimizer.step()
            optimizer.zero_grad()

        T_L.append(np.mean(L))
        print("train loss:", np.mean(L))

        sum_acc = 0
        total = len(valid_data)
        model.eval()
        for i, data in enumerate(tqdm(valid_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            L.append(cla_loss(preds, target).item())
            probabilities = torch.nn.functional.softmax(preds, dim=1)
            preds = torch.argmax(probabilities, dim=1)
            acc = torch.sum(
                torch.where(preds == target,
                            torch.tensor(1, device=preds.device),
                            torch.tensor(0, device=preds.device)))
            sum_acc += acc

        v_l = np.mean(L)
        v_a = sum_acc.item() / total * 100

        if v_a > valid_acc:
            valid_acc = v_a
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_acc.pth')

        if v_l < valid_loss:
            valid_loss = v_l
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_loss.pth')

        V_L.append(v_l)
        V_a.append(v_a)
        print("val loss:", v_l)
        print("val acc:", v_a)

    print(T_L)
    plt.plot(T_L, label='Total_loss', color='blue')
    plt.plot(V_L, label='Valid_loss', color='red')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("loss")
    plt.savefig(path + args.exp_id + 'Learning_Curves.png')
    plt.clf()
    plt.plot(V_a, label='Valid_acc', color='cyan')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("accuracy")
    plt.savefig(path + args.exp_id + 'Val_acc.png')

    torch.save(
        {
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, path + args.exp_id + 'model_last.pth')
Exemplo n.º 3
0
def main():


    parser = argparse.ArgumentParser(description='ViT')
    parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats')
    parser.add_argument('--dataset', default='dvsc')
    parser.add_argument('--resume', default='dvsc-sgd-regularmodel_last.pth')
    parser.add_argument('--set', default = 'test')
    parser.add_argument('--mode', default='regular')
    parser.add_argument('--batch', default=8)
    parser.add_argument('--cuda', default=True)
    args = parser.parse_args()


    os.system('mkdir -p weights')

    dataset = {'smnist': SMNIST, 'dvsc': DVSC}
    
    if args.dataset == 'smnist':
        image_size  = 60
        patch_size  = 10
        num_classes = 10
        samp = 6
    elif args.dataset == 'dvsc':
        image_size  = 384
        patch_size  = 32
        num_classes = 2
        samp = 12


    if args.mode == 'normal':
        model = ViT(
            image_size  = image_size,
            patch_size  = patch_size,
            num_classes = num_classes,
            dim         = 512,
            depth       = 4,
            heads       = 8,
            mlp_dim     = 512,
            dropout     = 0.1,
            emb_dropout = 0.1
        )
    else :
        model = ViT_sphere(
            image_size  = image_size,
            patch_size  = patch_size,
            num_classes = num_classes,
            dim         = 512,
            depth       = 4,
            heads       = 8,
            mlp_dim     = 512,
            base_order = 1,
            mode = args.mode, # face, vertex and regular
            samp = samp,
            dropout     = 0.1,
            emb_dropout = 0.1
        )

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])

    print("Trainable parameters", params)

    path    = 'weights/'
    model = load_model(model, os.path.join(path, args.resume))
    cuda    = args.cuda
    batch   = args.batch

    test_data   = dataset[args.dataset](args.data_dir, args.set, image_size, image_size, None)
    test_loader = DataLoader(dataset=test_data, batch_size=batch, shuffle=False)

    if cuda:
        model = model.cuda()
    model.eval()

    P=np.array([])
    T=np.array([])
    
    #df = pd.read_csv("dvsc.csv")

    for i, data in enumerate(tqdm(test_loader)):
        img, target = data
        if cuda:
            img    = img.cuda()
            target = target.cuda()
        preds = model(img)
        probabilities = torch.nn.functional.softmax(preds, dim=1)
        preds = torch.argmax(probabilities, dim =1) 
        P = np.concatenate([P,preds.cpu().numpy()])
        T = np.concatenate([T,target.cpu().numpy()])

    confusion = confusion_matrix(P, T)

    #df['pred_class'] = P    
    #df.to_csv('dvsc_p_regular.csv')

    print('Confusion Matrix\n')
    print(confusion)
        
    print('\nClassification Report\n')
    print(classification_report(P, T, target_names=test_data.category))
Exemplo n.º 4
0
    depth = 6,
    heads = 8,
    mlp_dim = 2048,
    dropout = 0.3,
    emb_dropout = 0.3
)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(v))
criterion = nn.CrossEntropyLoss()
bce = nn.BCELoss()
sigmoid = nn.Sigmoid()
opt = torch.optim.Adam(v.parameters(), lr=3e-4)
#opt = torch.optim.SGD(v.parameters(), lr=3e-4)
v.cuda()
criterion.cuda()
bce.cuda()
dataloader = torch.utils.data.DataLoader(
    Dataset(root_path), batch_size=batch_size, shuffle=False, num_workers=8
)
test_dataloader = torch.utils.data.DataLoader(
    TestDataset(test_path), batch_size=batch_size, shuffle=False, num_workers=8
)

def plot_confusion_matrix(cm,classes, epoch, normalize=False,title='Confusion matrix',cmap=plt.cm.Blues):
    plt.clf()
    """
    This function prints and plots the confusion matrix very prettily.
    Normalization can be applied by setting `normalize=True`.
    """