def main_worker(gpu, ngpus_per_node, args): global best_acc1, best_loc1, best_epoch, \ loc1_at_best_acc1, acc1_at_best_loc1, \ gtknown_at_best_acc1, gtknown_at_best_loc1 global writer args.gpu = gpu log_folder = os.path.join('train_log', args.name, ts) args.save_dir = log_folder if args.gpu == 0: writer = SummaryWriter(logdir=log_folder) if not os.path.isdir(log_folder): os.makedirs(log_folder, exist_ok=True) with open('{}/args.json'.format(log_folder), 'w') as fp: json.dump(args.__dict__, fp) Logger(os.path.join(log_folder, 'log.log')) print('args: ', args) if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) if args.dataset == 'CUB': num_classes = 200 elif args.dataset == 'tiny_imagenet': num_classes = 200 elif args.dataset == 'ILSVRC': num_classes = 1000 else: raise Exception("Not preferred dataset.") if args.arch == 'vgg16': model = vgg.vgg16(pretrained=True, num_classes=num_classes) elif args.arch == 'vgg16_GAP': model = vgg.vgg16_GAP(pretrained=True, num_classes=num_classes) elif args.arch == 'vgg16_ADL': model = vgg.vgg16_ADL(pretrained=True, num_classes=num_classes, ADL_position=args.ADL_position, drop_rate=args.ADL_rate, drop_thr=args.ADL_thr) elif args.arch == 'resnet50_ADL': model = resnet.resnet50(pretrained=True, num_classes=num_classes, ADL_position=args.ADL_position, drop_rate=args.ADL_rate, drop_thr=args.ADL_thr) elif args.arch == 'resnet50': model = resnet.resnet50(pretrained=True, num_classes=num_classes) elif args.arch == 'resnet34_ADL': model = resnet.resnet34(pretrained=True, num_classes=num_classes, ADL_position=args.ADL_position, drop_rate=args.ADL_rate, drop_thr=args.ADL_thr) elif args.arch == 'se_resnet50_ADL': model = resnet.resnet50_se(pretrained=True, num_classes=num_classes, ADL_position=args.ADL_position, drop_rate=args.ADL_rate, drop_thr=args.ADL_thr) else: model = None if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) param_features = [] param_classifiers = [] if args.arch.startswith('vgg'): for name, parameter in model.named_parameters(): if 'features.' in name: param_features.append(parameter) else: param_classifiers.append(parameter) elif args.arch.startswith('resnet') or args.arch.startswith('se'): for name, parameter in model.named_parameters(): if 'layer4.' in name or 'fc.' in name: param_classifiers.append(parameter) else: param_features.append(parameter) else: raise Exception("Fail to recognize the architecture") optimizer = torch.optim.SGD([{ 'params': param_features, 'lr': args.lr }, { 'params': param_classifiers, 'lr': args.lr * args.lr_ratio }], momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nest) # optionally resume from a checkpoint if args.resume: model, optimizer = load_model(model, optimizer, args) # for param_group in optimizer.param_groups: # param_group['lr'] = args.lr cudnn.benchmark = True # CUB-200-2011 train_loader, val_loader, train_sampler = data_loader(args) if args.cam_curve: cam_curve(val_loader, model, criterion, writer, args) return if args.evaluate: evaluate(val_loader, model, criterion, args) return if args.gpu == 0: print("Batch Size per Tower: %d" % (args.batch_size)) print(model) for epoch in range(args.start_epoch, args.epochs): if args.gpu == 0: print( "===========================================================") print("Start Epoch %d ..." % (epoch + 1)) if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) val_acc1 = 0 val_loss = 0 val_gtloc = 0 val_loc = 0 # train for one epoch train_acc, train_loss, progress_train = \ train(train_loader, model, criterion, optimizer, epoch, args) if args.gpu == 0: progress_train.display(epoch + 1) # evaluate on validation set if args.task == 'cls': val_acc1, val_loss = validate(val_loader, model, criterion, epoch, args) # evaluate localization on validation set elif args.task == 'wsol': val_acc1, val_acc5, val_loss, \ val_gtloc, val_loc = evaluate_loc(val_loader, model, criterion, epoch, args) # tensorboard if args.gpu == 0: writer.add_scalar(args.name + '/train_acc', train_acc, epoch) writer.add_scalar(args.name + '/train_loss', train_loss, epoch) writer.add_scalar(args.name + '/val_cls_acc', val_acc1, epoch) writer.add_scalar(args.name + '/val_loss', val_loss, epoch) writer.add_scalar(args.name + '/val_gt_loc', val_gtloc, epoch) writer.add_scalar(args.name + '/val_loc1', val_loc, epoch) # remember best acc@1 and save checkpoint is_best = val_acc1 > best_acc1 best_acc1 = max(val_acc1, best_acc1) if is_best: best_epoch = epoch + 1 loc1_at_best_acc1 = val_loc gtknown_at_best_acc1 = val_gtloc if args.task == 'wsol': # in case best loc,, Not using this. is_best_loc = val_loc > best_loc1 best_loc1 = max(val_loc, best_loc1) if is_best_loc: best_epoch = epoch + 1 acc1_at_best_loc1 = val_acc1 gtknown_at_best_loc1 = val_gtloc if args.gpu == 0: print("\nCurrent Best Epoch: %d" % (best_epoch)) print("Top-1 GT-Known Localization Acc: %.3f \ \nTop-1 Localization Acc: %.3f\ \nTop-1 Classification Acc: %.3f" % \ (gtknown_at_best_acc1, loc1_at_best_acc1, best_acc1)) print("\nEpoch %d finished." % (epoch + 1)) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): saving_dir = os.path.join(log_folder) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best, saving_dir) if args.gpu == 0: save_train(best_acc1, loc1_at_best_acc1, gtknown_at_best_acc1, best_loc1, acc1_at_best_loc1, gtknown_at_best_loc1, args) print("===========================================================") print("Start Evaluation on Best Checkpoint ...") args.resume = os.path.join(log_folder, 'model_best.pth.tar') model, _ = load_model(model, optimizer, args) evaluate(val_loader, model, criterion, args) cam_curve(val_loader, model, criterion, writer, args)
shuffle=True, drop_last=True, pin_memory=True) training_batch_generator = get_training_batch(train_loader) test_data = DATA(data_root=(opt.data_dir + 'validation/')) test_loader = DataLoader(test_data, num_workers=8, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True) testing_batch_generator = get_training_batch(test_loader) print("Initializing Networks") model_vgg = vgg16(pretrained=True, progress=True) optimizer_vgg = optim.Adam(model_vgg.parameters(), lr=opt.lr) model_vgg.cuda() cse_loss = nn.CrossEntropyLoss().cuda() def train(batch, label): model_vgg.train() y = model_vgg(batch) loss = cse_loss(y, label) optimizer_vgg.zero_grad() loss.backward() optimizer_vgg.step() return [loss.item()]
print("Initializing Data Loader") data = DATA(data_root=(opt.data_dir + 'test/')) loader = DataLoader(data, num_workers=8, batch_size=opt.batch_size, shuffle=False, drop_last=False, pin_memory=True) print("Initializing Networks") # model_xcp = xception(2) # checkpoint = torch.load(opt.modeldir) # model_xcp.load_state_dict(checkpoint['module']) # model_xcp.eval().cuda() model_vgg = vgg16() checkpoint = torch.load(opt.modeldir) model_vgg.load_state_dict(checkpoint['module']) model_vgg.eval().cuda() softmax = nn.Softmax(dim=1) def test(image): with torch.no_grad(): z = model_vgg(image) pred = torch.max(z, dim=1)[1] z = softmax(z) return pred, z
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 500) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return x, F.log_softmax(x) model = Net() elif args.network == 'Alexnet': model = alexnet.AlexNet(num_classes=100) elif args.network == 'Vgg': model = vgg.vgg16() print(model) elif args.network == 'Resnet': model = resnet.ResNet50(num_classes=100) elif args.network == 'Densenet': model = densenet.densenet_cifar(num_classes=100) #print(model) if args.cuda: model.cuda(args.gpu) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
vocab = Vocab(captions_dict, threshold) vocab_size = vocab.id embeddings = np.random.uniform(-1, 1, [vocab_size, embedding_dim]) for k in data: if k[0] in vocab.word2id: embeddings[vocab.word2id[k[0]]] = list(map(float, k[1:])) weights = embeddings with open('vocab.pkl', 'wb') as f: pickle.dump(vocab, f) print('dictionary dump') # # Build models encoder = vgg.vgg16() decoder = RNN(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=vocab_size, num_layers=1, weights=weights) # Loss and optimizer criterion = nn.CrossEntropyLoss() params = list(encoder.parameters()) + list(decoder.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) # Train models num_epochs = 100 save_iter = 10 for epoch in range(num_epochs):