Exemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cifar_data = nd.random.normal(shape=(1, 3, 32, 32))
        self.cifar_att56 = ResidualAttentionModel_32input()
        self.cifar_att56.initialize()

        self.cifar_att92 = ResidualAttentionModel_32input(
            additional_stage=True)
        self.cifar_att92.initialize()

        self.imgnet_data = nd.random.normal(shape=(1, 3, 224, 224))
        self.att56 = ResidualAttentionModel()
        self.att56.initialize()

        self.att92 = ResidualAttentionModel(additional_stage=True)
        self.att92.initialize()
Exemplo n.º 2
0
def main():
    model = ResidualAttentionModel()
    model = torch.nn.DataParallel(model)

    optimizer = torch.optim.SGD(model.parameters(), 0.001, \
                                momentum=0.9, \
                                weight_decay=1e-4)
    checkpoint = torch.load('best_model/model_best.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    model.eval()
Exemplo n.º 3
0
class TestModel(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cifar_data = nd.random.normal(shape=(1, 3, 32, 32))
        self.cifar_att56 = ResidualAttentionModel_32input()
        self.cifar_att56.initialize()

        self.cifar_att92 = ResidualAttentionModel_32input(
            additional_stage=True)
        self.cifar_att92.initialize()

        self.imgnet_data = nd.random.normal(shape=(1, 3, 224, 224))
        self.att56 = ResidualAttentionModel()
        self.att56.initialize()

        self.att92 = ResidualAttentionModel(additional_stage=True)
        self.att92.initialize()

    def test_model(self):
        self.assertEqual((1, 10), self.cifar_att56(self.cifar_data).shape)
        self.assertEqual((1, 10), self.cifar_att92(self.cifar_data).shape)
        self.assertEqual((1, 1000), self.att56(self.imgnet_data).shape)
        self.assertEqual((1, 1000), self.att92(self.imgnet_data).shape)
test_dataset = datasets.CIFAR10(root='./data/',
                                train=False,
                                transform=transform)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=35,
                                           shuffle=True,
                                           num_workers=8)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=20,
                                          shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')
model = ResidualAttentionModel().cuda()
print(model)

lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
is_train = True
model_file = 'model_92.pkl'

if is_train is True:
    # Training
    for epoch in range(100):
        for i, (images, labels) in enumerate(train_loader):
            images = Variable(images.cuda())
            # print(images.data)
            labels = Variable(labels.cuda())
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torchvision
from torchvision import transforms, datasets, models
import os
import cv2
import time
# from model.residual_attention_network_pre import ResidualAttentionModel
# based https://github.com/liudaizong/Residual-Attention-Network
from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel

if __name__ == "__main__":

    model_file = 'model_92_sgd.pkl'

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')
    model = ResidualAttentionModel()
    print(model)

    model.load_state_dict(
        torch.load(model_file, map_location=torch.device('cpu')))
    model.eval()
Exemplo n.º 6
0
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)


if __name__ == '__main__':
    batch_size = 64
    iterations = 530e3
    wd = 1e-4
    lr = 0.1
    lr_period = tuple([iterations * i for i in (0.3, 0.6, 0.9)])
    lr_decay = 0.1
    cat_interval = 10e3
    num_workers = 12
    num_gpus = 2
    ctx = [mx.gpu(i) for i in range(num_gpus)]

    net = ResidualAttentionModel()
    net.hybridize(static_alloc=True)
    net.initialize(init=mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), 'nag', {
        'learning_rate': lr,
        'momentum': 0.9,
        'wd': wd
    })

    train_data = gluon.data.DataLoader(ImageFolderDataset(
        '/system1/Dataset/ImageNet/ILSVRC2012_img_train',
        transform=transformer),
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers,
Exemplo n.º 7
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        # model = models.__dict__[args.arch]()
        model = ResidualAttentionModel()

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            # transforms.RandomResizedCrop(224),
            # transforms.RandomHorizontalFlip(),
            # transforms.RandomHorizontalFlip(),
            # transforms.RandomCrop((32, 32), padding=4),
            # transforms.ToTensor(),
            # transforms.RandomHorizontalFlip(),
            # transforms.RandomCrop((32, 32), padding=4),   #left, top, right, bottom
            # transforms.Scale(224),
            transforms.ToTensor(),
            normalize
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            valdir,
            transforms.Compose([
                # transforms.Resize(256),
                # transforms.CenterCrop(224),
                # transforms.RandomCrop((32, 32), padding=4),
                transforms.ToTensor(),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):

        # for epoch in range(args.start_epoch, args.start_epoch+1):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
            print([
                "train_acc", train_acc, "val_acc", val_acc, "train_loss",
                train_loss, "val_loss", val_loss
            ])
            np.save('full_training_in_progress.npy',
                    np.array([train_acc, val_acc, train_loss, val_loss]))
    fig, (ax1, ax2) = plt.subplots(1, 2)
    plot_result(range(1), train_acc, val_acc, 'acc', 'Accuracy', ax=ax1)
    plot_result(range(1), train_loss, val_loss, 'loss', 'Loss', ax=ax2)
    fig.tight_layout()
    # plt.show()
    plt.savefig('result.png')
Exemplo n.º 8
0
                              train=False, 
                              transform=transform)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=20, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=20, 
                                          shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

model = ResidualAttentionModel().cuda()
print(model)

lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Training 
for epoch in range(100):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.cuda())
        # print(images.data)
        labels = Variable(labels.cuda())
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()