Exemplo n.º 1
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1, best_loc1, best_epoch, \
        loc1_at_best_acc1, acc1_at_best_loc1, \
        gtknown_at_best_acc1, gtknown_at_best_loc1
    global writer

    args.gpu = gpu
    log_folder = os.path.join('train_log', args.name, ts)
    args.save_dir = log_folder

    if args.gpu == 0:
        writer = SummaryWriter(logdir=log_folder)

    if not os.path.isdir(log_folder):
        os.makedirs(log_folder, exist_ok=True)

    with open('{}/args.json'.format(log_folder), 'w') as fp:
        json.dump(args.__dict__, fp)

    Logger(os.path.join(log_folder, 'log.log'))

    print('args: ', args)

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))
    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    if args.dataset == 'CUB':
        num_classes = 200
    elif args.dataset == 'tiny_imagenet':
        num_classes = 200
    elif args.dataset == 'ILSVRC':
        num_classes = 1000
    else:
        raise Exception("Not preferred dataset.")

    if args.arch == 'vgg16':
        model = vgg.vgg16(pretrained=True, num_classes=num_classes)
    elif args.arch == 'vgg16_GAP':
        model = vgg.vgg16_GAP(pretrained=True, num_classes=num_classes)
    elif args.arch == 'vgg16_ADL':
        model = vgg.vgg16_ADL(pretrained=True,
                              num_classes=num_classes,
                              ADL_position=args.ADL_position,
                              drop_rate=args.ADL_rate,
                              drop_thr=args.ADL_thr)
    elif args.arch == 'resnet50_ADL':
        model = resnet.resnet50(pretrained=True,
                                num_classes=num_classes,
                                ADL_position=args.ADL_position,
                                drop_rate=args.ADL_rate,
                                drop_thr=args.ADL_thr)
    elif args.arch == 'resnet50':
        model = resnet.resnet50(pretrained=True, num_classes=num_classes)

    elif args.arch == 'resnet34_ADL':
        model = resnet.resnet34(pretrained=True,
                                num_classes=num_classes,
                                ADL_position=args.ADL_position,
                                drop_rate=args.ADL_rate,
                                drop_thr=args.ADL_thr)

    elif args.arch == 'se_resnet50_ADL':
        model = resnet.resnet50_se(pretrained=True,
                                   num_classes=num_classes,
                                   ADL_position=args.ADL_position,
                                   drop_rate=args.ADL_rate,
                                   drop_thr=args.ADL_thr)

    else:
        model = None

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu], find_unused_parameters=True)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    param_features = []
    param_classifiers = []

    if args.arch.startswith('vgg'):
        for name, parameter in model.named_parameters():
            if 'features.' in name:
                param_features.append(parameter)
            else:
                param_classifiers.append(parameter)

    elif args.arch.startswith('resnet') or args.arch.startswith('se'):
        for name, parameter in model.named_parameters():
            if 'layer4.' in name or 'fc.' in name:
                param_classifiers.append(parameter)
            else:
                param_features.append(parameter)
    else:
        raise Exception("Fail to recognize the architecture")

    optimizer = torch.optim.SGD([{
        'params': param_features,
        'lr': args.lr
    }, {
        'params': param_classifiers,
        'lr': args.lr * args.lr_ratio
    }],
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nest)

    # optionally resume from a checkpoint
    if args.resume:
        model, optimizer = load_model(model, optimizer, args)

    # for param_group in optimizer.param_groups:
    #     param_group['lr'] = args.lr

    cudnn.benchmark = True

    # CUB-200-2011
    train_loader, val_loader, train_sampler = data_loader(args)

    if args.cam_curve:
        cam_curve(val_loader, model, criterion, writer, args)
        return

    if args.evaluate:
        evaluate(val_loader, model, criterion, args)
        return

    if args.gpu == 0:
        print("Batch Size per Tower: %d" % (args.batch_size))
        print(model)

    for epoch in range(args.start_epoch, args.epochs):
        if args.gpu == 0:
            print(
                "===========================================================")
            print("Start Epoch %d ..." % (epoch + 1))

        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)
        val_acc1 = 0
        val_loss = 0
        val_gtloc = 0
        val_loc = 0
        # train for one epoch
        train_acc, train_loss, progress_train = \
            train(train_loader, model, criterion, optimizer, epoch, args)

        if args.gpu == 0:
            progress_train.display(epoch + 1)

        # evaluate on validation set
        if args.task == 'cls':
            val_acc1, val_loss = validate(val_loader, model, criterion, epoch,
                                          args)

        # evaluate localization on validation set
        elif args.task == 'wsol':
            val_acc1, val_acc5, val_loss, \
            val_gtloc, val_loc = evaluate_loc(val_loader, model, criterion, epoch, args)

        # tensorboard
        if args.gpu == 0:
            writer.add_scalar(args.name + '/train_acc', train_acc, epoch)
            writer.add_scalar(args.name + '/train_loss', train_loss, epoch)
            writer.add_scalar(args.name + '/val_cls_acc', val_acc1, epoch)
            writer.add_scalar(args.name + '/val_loss', val_loss, epoch)
            writer.add_scalar(args.name + '/val_gt_loc', val_gtloc, epoch)
            writer.add_scalar(args.name + '/val_loc1', val_loc, epoch)

        # remember best acc@1 and save checkpoint
        is_best = val_acc1 > best_acc1
        best_acc1 = max(val_acc1, best_acc1)
        if is_best:
            best_epoch = epoch + 1
            loc1_at_best_acc1 = val_loc
            gtknown_at_best_acc1 = val_gtloc

        if args.task == 'wsol':
            # in case best loc,, Not using this.
            is_best_loc = val_loc > best_loc1
            best_loc1 = max(val_loc, best_loc1)
            if is_best_loc:
                best_epoch = epoch + 1
                acc1_at_best_loc1 = val_acc1
                gtknown_at_best_loc1 = val_gtloc

        if args.gpu == 0:
            print("\nCurrent Best Epoch: %d" % (best_epoch))
            print("Top-1 GT-Known Localization Acc: %.3f \
                   \nTop-1 Localization Acc: %.3f\
                   \nTop-1 Classification Acc: %.3f"                                                     % \
                  (gtknown_at_best_acc1, loc1_at_best_acc1, best_acc1))
            print("\nEpoch %d finished." % (epoch + 1))

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            saving_dir = os.path.join(log_folder)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best, saving_dir)

    if args.gpu == 0:
        save_train(best_acc1, loc1_at_best_acc1, gtknown_at_best_acc1,
                   best_loc1, acc1_at_best_loc1, gtknown_at_best_loc1, args)

        print("===========================================================")
        print("Start Evaluation on Best Checkpoint ...")

    args.resume = os.path.join(log_folder, 'model_best.pth.tar')
    model, _ = load_model(model, optimizer, args)
    evaluate(val_loader, model, criterion, args)
    cam_curve(val_loader, model, criterion, writer, args)
Exemplo n.º 2
0
                          shuffle=True,
                          drop_last=True,
                          pin_memory=True)
training_batch_generator = get_training_batch(train_loader)

test_data = DATA(data_root=(opt.data_dir + 'validation/'))
test_loader = DataLoader(test_data,
                         num_workers=8,
                         batch_size=opt.batch_size,
                         shuffle=True,
                         drop_last=True,
                         pin_memory=True)
testing_batch_generator = get_training_batch(test_loader)

print("Initializing Networks")
model_vgg = vgg16(pretrained=True, progress=True)
optimizer_vgg = optim.Adam(model_vgg.parameters(), lr=opt.lr)
model_vgg.cuda()
cse_loss = nn.CrossEntropyLoss().cuda()


def train(batch, label):
    model_vgg.train()
    y = model_vgg(batch)
    loss = cse_loss(y, label)
    optimizer_vgg.zero_grad()
    loss.backward()
    optimizer_vgg.step()
    return [loss.item()]

Exemplo n.º 3
0
print("Initializing Data Loader")
data = DATA(data_root=(opt.data_dir + 'test/'))
loader = DataLoader(data,
                    num_workers=8,
                    batch_size=opt.batch_size,
                    shuffle=False,
                    drop_last=False,
                    pin_memory=True)

print("Initializing Networks")
# model_xcp = xception(2)
# checkpoint = torch.load(opt.modeldir)
# model_xcp.load_state_dict(checkpoint['module'])
# model_xcp.eval().cuda()

model_vgg = vgg16()
checkpoint = torch.load(opt.modeldir)
model_vgg.load_state_dict(checkpoint['module'])
model_vgg.eval().cuda()

softmax = nn.Softmax(dim=1)


def test(image):
    with torch.no_grad():
        z = model_vgg(image)
        pred = torch.max(z, dim=1)[1]
        z = softmax(z)
    return pred, z

Exemplo n.º 4
0
        def forward(self, x):
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 500)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = self.fc2(x)
            return x, F.log_softmax(x)


    model = Net()

elif args.network == 'Alexnet':
    model = alexnet.AlexNet(num_classes=100)
elif args.network == 'Vgg':
    model = vgg.vgg16()
    print(model)
elif args.network == 'Resnet':
    model = resnet.ResNet50(num_classes=100)
elif args.network == 'Densenet':
    model = densenet.densenet_cifar(num_classes=100)
    #print(model)





if args.cuda:
    model.cuda(args.gpu)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
Exemplo n.º 5
0
vocab = Vocab(captions_dict, threshold)
vocab_size = vocab.id

embeddings = np.random.uniform(-1, 1, [vocab_size, embedding_dim])
for k in data:
    if k[0] in vocab.word2id:
        embeddings[vocab.word2id[k[0]]] = list(map(float, k[1:]))

weights = embeddings
with open('vocab.pkl', 'wb') as f:
    pickle.dump(vocab, f)
    print('dictionary dump')

# # Build models
encoder = vgg.vgg16()
decoder = RNN(embedding_dim=embedding_dim,
              hidden_dim=hidden_dim,
              vocab_size=vocab_size,
              num_layers=1,
              weights=weights)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = torch.optim.Adam(params, lr=learning_rate)

# Train models
num_epochs = 100
save_iter = 10
for epoch in range(num_epochs):