def main(): args.device = 'cuda' if torch.cuda.is_available() else 'cpu' # Data print('==> Preparing data..') trainloader, testloader, args.ndata = get_data_loader() print('==> Building model..') net, lemniscate, optimizer, criterion, scheduler, best_acc, start_epoch = build_model( ) if args.test_only: kNN(net, lemniscate, trainloader, testloader, 200, args.nce_t, 1) sys.exit(0) for epoch in range(start_epoch, args.epochs): scheduler.step() train(net, optimizer, trainloader, criterion, lemniscate, epoch) acc = kNN(net, lemniscate, trainloader, testloader, 200, args.nce_t, 0) if acc > best_acc: print('Saving..') state = { 'net': net.state_dict(), 'lemniscate': lemniscate, 'acc': acc, 'epoch': epoch, 'optimizer': optimizer.state_dict(), } os.makedirs(args.model_dir, exist_ok=True) torch.save(state, os.path.join(args.model_dir, 'ckpt.cifar.pth.tar')) best_acc = acc print('best accuracy: {:.2f}'.format(best_acc * 100)) acc = kNN(net, lemniscate, trainloader, testloader, 200, args.nce_t, 1) print('last accuracy: {:.2f}'.format(acc * 100))
assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/' + args.resume) net.load_state_dict(checkpoint['net']) lemniscate = checkpoint['lemniscate'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] # define loss function criterion = NCECriterion() net.to(device) lemniscate.to(device) criterion.to(device) if args.test_only: acc = kNN(0, net, lemniscate, trainloader, testloader, 200, args.nce_t, 1) sys.exit(0) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) def adjust_learning_rate(optimizer, epoch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = args.lr if epoch >= 20: lr = args.lr * (0.1**((epoch - 20) // 20)) print(lr) for param_group in optimizer.param_groups:
def main(): global args, best_prec1 args = parser.parse_args() # init seed my_whole_seed = 222 random.seed(my_whole_seed) np.random.seed(my_whole_seed) torch.manual_seed(my_whole_seed) torch.cuda.manual_seed_all(my_whole_seed) torch.cuda.manual_seed(my_whole_seed) np.random.seed(my_whole_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False os.environ['PYTHONHASHSEED'] = str(my_whole_seed) for kk_time in range(args.seedstart, args.seedstart + 1): args.seed = kk_time args.result = args.result + str(args.seed) # create model model = models.__dict__[args.arch](low_dim=args.low_dim, multitask=args.multitask, showfeature=args.showfeature, domain=args.domain, args=args) model = torch.nn.DataParallel(model).cuda() print('Number of learnable params', get_learnable_para(model) / 1000000., " M") # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) aug = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) # aug = transforms.Compose([transforms.RandomResizedCrop(224, scale=(0.08, 1.), ratio=(3 / 4, 4 / 3)), # transforms.RandomHorizontalFlip(p=0.5), # get_color_distortion(s=1), # transforms.Lambda(lambda x: gaussian_blur(x)), # transforms.ToTensor(), # normalize]) aug_test = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) # load dataset # import datasets.fundus_amd_syn_crossvalidation as medicaldata import datasets.fundus_amd_syn_crossvalidation_ind as medicaldata train_dataset = medicaldata.traindataset(root=args.data, transform=aug, train=True, args=args) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4, drop_last=True if args.multiaug else False, worker_init_fn=random.seed(my_whole_seed)) valid_dataset = medicaldata.traindataset(root=args.data, transform=aug_test, train=False, args=args) val_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4, worker_init_fn=random.seed(my_whole_seed)) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).cuda() if args.multitaskposrot: cls_criterion = nn.CrossEntropyLoss().cuda() else: cls_criterion = None if args.multitaskposrot: print("running multi task with miccai") criterion = BatchCriterion(1, 0.1, args.batch_size, args).cuda() elif args.synthesis: print("running synthesis") criterion = BatchCriterionFour(1, 0.1, args.batch_size, args).cuda() elif args.multiaug: print("running cvpr") criterion = BatchCriterion(1, 0.1, args.batch_size, args).cuda() else: criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) lemniscate = checkpoint['lemniscate'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: knn_num = 100 auc, acc, precision, recall, f1score = kNN(args, model, lemniscate, train_loader, val_loader, knn_num, args.nce_t, 2) f = open("savemodels/result.txt", "a+") f.write("auc: %.4f\n" % (auc)) f.write("acc: %.4f\n" % (acc)) f.write("pre: %.4f\n" % (precision)) f.write("recall: %.4f\n" % (recall)) f.write("f1score: %.4f\n" % (f1score)) f.close() return # mkdir result folder and tensorboard os.makedirs(args.result, exist_ok=True) writer = SummaryWriter("runs/" + str(args.result.split("/")[-1])) writer.add_text('Text', str(args)) # copy code import shutil, glob source = glob.glob("*.py") source += glob.glob("*/*.py") os.makedirs(args.result + "/code_file", exist_ok=True) for file in source: name = file.split("/")[0] if name == file: shutil.copy(file, args.result + "/code_file/") else: os.makedirs(args.result + "/code_file/" + name, exist_ok=True) shutil.copy(file, args.result + "/code_file/" + name) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, args, [1000, 2000]) writer.add_scalar("lr", lr, epoch) # # train for one epoch loss = train(train_loader, model, lemniscate, criterion, cls_criterion, optimizer, epoch, writer) writer.add_scalar("train_loss", loss, epoch) # save checkpoint if epoch % 200 == 0 or (epoch in [1600, 1800, 2000]): auc, acc, precision, recall, f1score = kNN( args, model, lemniscate, train_loader, val_loader, 100, args.nce_t, 2) # save to txt writer.add_scalar("test_auc", auc, epoch) writer.add_scalar("test_acc", acc, epoch) writer.add_scalar("test_precision", precision, epoch) writer.add_scalar("test_recall", recall, epoch) writer.add_scalar("test_f1score", f1score, epoch) f = open(args.result + "/result.txt", "a+") f.write("epoch " + str(epoch) + "\n") f.write("auc: %.4f\n" % (auc)) f.write("acc: %.4f\n" % (acc)) f.write("pre: %.4f\n" % (precision)) f.write("recall: %.4f\n" % (recall)) f.write("f1score: %.4f\n" % (f1score)) f.close() save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'optimizer': optimizer.state_dict(), }, filename=args.result + "/fold" + str(args.seedstart) + "-epoch-" + str(epoch) + ".pth.tar")
def main(): global args, best_prec1 args = parser.parse_args() args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](low_dim=args.low_dim) if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolderInstance( traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2,1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolderInstance(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() if args.nce_k > 0: lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m).cuda() criterion = NCECriterion(ndata).cuda() else: lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) lemniscate = checkpoint['lemniscate'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True if args.evaluate: kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, lemniscate, criterion, optimizer, epoch) # evaluate on validation set prec1 = NN(epoch, model, lemniscate, train_loader, val_loader) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best) # evaluate KNN after last epoch kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
def main(): global args, best_prec1 args = parser.parse_args() args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model # if args.pretrained: # print("=> using pre-trained model '{}'".format(args.arch)) # model = models.__dict__[args.arch](pretrained=True, finetune=args.finetune, low_dim= args.low_dim) # else: # print("=> creating model '{}'".format(args.arch)) # # model = models.__dict__[args.arch](low_dim=args.low_dim) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # train_dataset = datasets.CombinedMaskDataset( # other_data_path = '/home/saschaho/Simcenter/found_label_imgs', # csv_root_folder='/home/saschaho/Simcenter/Floor_Elevation_Data/Streetview_Irma/Streetview_Irma/images', # data_csv='/home/saschaho/Simcenter/Building_Information_Prediction/all_bims_train.csv', # transform = transforms.Compose([ # transforms.RandomResizedCrop(224, scale=(0.2,1.)), # transforms.RandomGrayscale(p=0.2), # transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, # ]),attribute = 'first_floor_elevation_ft', mask_images=True) # val_dataset = datasets.CombinedMaskDataset( # csv_root_folder='/home/saschaho/Simcenter/Floor_Elevation_Data/Streetview_Irma/Streetview_Irma/images', # data_csv='/home/saschaho/Simcenter/Building_Information_Prediction/all_bims_val.csv', # transform=transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, # ]), #attribute = 'first_floor_elevation_ft', mask_images=True) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.3, 1.)), transforms.RandomGrayscale(p=0.5), transforms.ColorJitter(0.5, 0.5, 0.5, 0.5), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), normalize ]) val_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) train_dataset = First_Floor_Binary(args.attribute_name, args.train_data, args.image_folder, transform=train_transform, regression=args.regression, mask_buildings=args.mask_buildings, softmask=args.softmask) val_dataset = First_Floor_Binary(args.attribute_name, args.val_data, args.image_folder, transform=val_transform, regression=args.regression, mask_buildings=args.mask_buildings, softmask=args.softmask) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) model = ResidualAttentionModel_92_Small(args.low_dim, dropout=False) model = torch.nn.DataParallel(model).cuda() print('Train dataset instances: {}'.format(len(train_loader.dataset))) print('Val dataset instances: {}'.format(len(val_loader.dataset))) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() if args.nce_k > 0: lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m).cuda() criterion = NCECriterion(ndata).cuda() else: lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = RAdam(model.parameters()) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] keyname = [ keyname for keyname in model.state_dict().keys() if 'fc.weight' in keyname ][0] lat_vec_len_model = model.state_dict()[keyname].shape[0] lat_vec_len_checkpoint = checkpoint['state_dict'][keyname].shape[0] low_dim_differ = False if lat_vec_len_model != lat_vec_len_checkpoint: low_dim_differ = True print( 'Warning: Latent vector sizes do not match. Assuming finetuning' ) print( 'Lemniscate will be trained from scratch with new optimizer.' ) del checkpoint['state_dict'][keyname] del checkpoint['state_dict'][keyname.replace('weight', 'bias')] missing_keys, unexpected_keys = model.load_state_dict( checkpoint['state_dict'], strict=False) if len(missing_keys) or len(unexpected_keys): print('Warning: Missing or unexpected keys found.') print('Missing: {}'.format(missing_keys)) print('Unexpected: {}'.format(unexpected_keys)) if not low_dim_differ: # The memory bank will be trained from scratch if # the low dim is different. Maybe later repopulated lemniscate = checkpoint['lemniscate'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True if args.evaluate: kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t) return for epoch in range(args.start_epoch, args.epochs): # if args.distributed: # train_sampler.set_epoch(epoch) #adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, lemniscate, criterion, optimizer, epoch) # evaluate on validation set prec1 = NN(epoch, model, lemniscate, train_loader, val_loader) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.name) # evaluate KNN after last epoch kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
def main(): global args, best_prec1, best_prec1_past, best_prec1_future args = parser.parse_args() args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](low_dim=args.low_dim) if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.to(get_device(args.gpu)) else: model = torch.nn.DataParallel(model).to(get_device(args.gpu)) else: model.to(get_device(args.gpu)) model = torch.nn.parallel.DistributedDataParallel(model) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = Dataset(traindir, n_frames) val_dataset = Dataset(valdir, n_frames) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, #(train_sampler is None), num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() if args.nce_k > 0: lemniscate = NCEAverage(args.gpu, args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m).to(get_device(args.gpu)) criterion = NCECriterion(ndata).to(get_device(args.gpu)) else: lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).to(get_device(args.gpu)) criterion = nn.CrossEntropyLoss().to(get_device(args.gpu)) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) lemniscate = checkpoint['lemniscate'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True if args.evaluate: kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, lemniscate, criterion, optimizer, epoch) # evaluate on validation set prec1, prec1_past, prec1_future = NN(epoch, model, lemniscate, train_loader, val_loader) add_epoch_score('epoch_scores.txt', epoch, prec1) add_epoch_score('epoch_scores_past.txt', epoch, prec1_past) add_epoch_score('epoch_scores_future.txt', epoch, prec1_future) # Sascha: This is a bug because it seems prec1 or best_prec1 is a vector at some point with # more than one entry # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, epoch) is_best_past = prec1_past > best_prec1_past best_prec1_past = max(prec1_past, best_prec1_past) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'best_prec1_past': best_prec1_past, 'optimizer': optimizer.state_dict(), }, is_best_past, epoch, best_mod='_past') is_best_future = prec1_future > best_prec1_future best_prec1_future = max(prec1_future, best_prec1_future) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'best_prec1_future': best_prec1_future, 'optimizer': optimizer.state_dict(), }, is_best_future, epoch, best_mod='_future') # evaluate KNN after last epoch kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
def main(args): args.best_acc = 0 best_acc5 = 0 # Data print('==> Preparing data..') train_loader, test_loader, ndata = get_dataloader(args, add_erasing=args.erasing, aug_plus=args.aug_plus) logger.info(f"length of training dataset: {ndata}") # Model model, model_ema = build_model(args) contrast = MemoryMoCo(128, args.nce_k, args.nce_t, thresh=0).cuda() criterion = NCESoftmaxLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.batch_size * dist.get_world_size() / 128 * args.base_learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = get_scheduler(optimizer, len(train_loader), args) if args.amp_opt_level != "O0": if amp is None: logger.warning(f"apex is not installed but amp_opt_level is set to {args.amp_opt_level}, ignoring.\n" "you should install apex from https://github.com/NVIDIA/apex#quick-start first") args.amp_opt_level = "O0" else: model, optimizer = amp.initialize(model, optimizer, opt_level=args.amp_opt_level) model_ema = amp.initialize(model_ema, opt_level=args.amp_opt_level) model = DistributedDataParallel(model, device_ids=[args.local_rank], broadcast_buffers=False) # optionally resume from a checkpoint if args.resume: assert os.path.isfile(args.resume) load_checkpoint(args, model, model_ema, contrast, optimizer, scheduler) # tensorboard if dist.get_rank() == 0: summary_writer = SummaryWriter(log_dir=args.save_dir) else: summary_writer = None # routine for epoch in range(args.start_epoch, args.epochs + 1): if args.lr_scheduler == 'cosine': train_loader.sampler.set_epoch(epoch) tic = time.time() loss, prob = train_moco(epoch, train_loader, model, model_ema, contrast, criterion, optimizer, scheduler, args) logger.info('epoch {}, total time {:.2f}'.format(epoch, time.time() - tic)) if summary_writer is not None: # tensorboard logger summary_writer.add_scalar('ins_loss', loss, epoch) summary_writer.add_scalar('ins_prob', prob, epoch) summary_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch) if args.dataset == 'stl10-full': acc, acc5 = kNN(epoch, model, contrast, labeledTrainloader, test_loader, 200, args.nce_t, True) else: acc, acc5 = kNN(epoch, model, contrast, train_loader, test_loader, 200, args.nce_t, True) if acc >= args.best_acc: args.best_acc = acc best_acc5 = acc5 logger.info('KNN top-1 precion: {:.4f} {:.4f}, best is: {:.4f} {:.4f}'.format(acc*100., \ acc5*100., args.best_acc*100., best_acc5*100)) logger.info(str(args)) if dist.get_rank() == 0: # save model save_checkpoint(args, epoch, model, model_ema, contrast, optimizer, scheduler, args.best_acc) if args.dataset == 'stl10-full': acc1, acc5 = kNN(epoch, model, contrast, labeledTrainloader, test_loader, 200, args.nce_t, True) else: acc1, acc5 = kNN(epoch, model, contrast, train_loader, test_loader, 200, args.nce_t, True) logger.info('KNN top-1 and top-5 precion with recomputed memory bank: {:.4f} {:.4f}'.format(acc1*100., acc5*100)) logger.info('Best KNN top-1 and top-5 precion: {:.4f} {:.4f}'.format(args.best_acc*100., best_acc5*100)) logger.info(str(args))
start_epoch = checkpoint['epoch'] # define loss function if hasattr(lemniscate, 'K'): criterion = NCECriterion(ndata) else: criterion = nn.CrossEntropyLoss() criterion_cld = nn.CrossEntropyLoss() criterion_cld.to(device) lemniscate.to(device) criterion.to(device) if args.test_only: acc = kNN(0, net, lemniscate, trainloader, testloader, 200, args.nce_t, 1) sys.exit(0) # Training def train(epoch): print('\nEpoch: %d' % epoch) torch.set_num_threads(1) if args.lr_scheduler == 'cosine': trainloader.sampler.set_epoch(epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() train_CLD_loss = AverageMeter() train_CLD_acc = AverageMeter()
net = models.__dict__['ResNet50'](low_dim=args.low_dim) # define leminiscate lemniscate = LinearAverage(args.low_dim, ndata, args.temperature, args.memory_momentum) # define loss function criterion = NCACrossEntropy(torch.LongTensor(trainloader.dataset.targets)) if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) lemniscate.cuda() criterion.cuda() cudnn.benchmark = True if args.test_only: acc = kNN(0, net, lemniscate, trainloader, testloader, 30, args.temperature) sys.exit(0) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4, nesterov=True) def adjust_learning_rate(optimizer, epoch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = args.lr * (0.1 ** (epoch // 50)) print(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr # Training def train(epoch): print('\nEpoch: %d' % epoch) adjust_learning_rate(optimizer, epoch)
def main(args): # Data print('==> Preparing data..') _size = 32 transform_train = transforms.Compose([ transforms.Resize(size=_size), transforms.RandomResizedCrop(size=_size, scale=(0.2, 1.)), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomGrayscale(p=0.2), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.Resize(size=_size), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = datasets.CIFAR10Instance(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=4) testset = datasets.CIFAR10Instance(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=4) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') ndata = trainset.__len__() print('==> Building model..') net = models.__dict__['ResNet18'](low_dim=args.low_dim) device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': net = torch.nn.DataParallel(net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = True criterion = ICRcriterion() # define loss function: inner product loss within each mini-batch uel_criterion = BatchCriterion(args.batch_m, args.batch_t, args.batch_size, ndata) net.to(device) criterion.to(device) uel_criterion.to(device) best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch if args.test_only or len(args.resume) > 0: # Load checkpoint. model_path = 'checkpoint/' + args.resume print('==> Resuming from checkpoint..') assert os.path.isdir( args.model_dir), 'Error: no checkpoint directory found!' checkpoint = torch.load(model_path) net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] # define leminiscate if args.test_only and len(args.resume) > 0: trainFeatures, feature_index = compute_feature(trainloader, net, len(trainset), args) lemniscate = LinearAverage(torch.tensor(trainFeatures), args.low_dim, ndata, args.nce_t, args.nce_m) else: lemniscate = LinearAverage(torch.tensor([]), args.low_dim, ndata, args.nce_t, args.nce_m) lemniscate.to(device) # define optimizer optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # optimizer2 = torch.optim.SGD(net2.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # test acc if args.test_only: acc = kNN(0, net, trainloader, testloader, 200, args.batch_t, ndata, low_dim=args.low_dim) exit(0) if len(args.resume) > 0: best_acc = best_acc start_epoch = start_epoch + 1 else: best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch icr2 = ICRDiscovery(ndata) # init_cluster_num = 20000 for round in range(5): for epoch in range(start_epoch, 200): #### get Features # trainFeatures are trainloader features and shuffle=True, so feature_index is match data trainFeatures, feature_index = compute_feature( trainloader, net, len(trainset), args) if round == 0: y = -1 * math.log10(ndata) / 200 * epoch + math.log10(ndata) cluster_num = int(math.pow(10, y)) if cluster_num <= args.nmb_cluster: cluster_num = args.nmb_cluster print('cluster number: ' + str(cluster_num)) ###clustering algorithm to use # faiss cluster deepcluster = clustering.__dict__[args.clustering]( int(cluster_num)) #### Features to clustering clustering_loss = deepcluster.cluster(trainFeatures, feature_index, verbose=args.verbose) L = np.array(deepcluster.images_lists) image_dict = deepcluster.images_dict print('create ICR ...') # icr = ICRDiscovery(ndata) # if args.test_only and len(args.resume) > 0: # icr = cluster_assign(icr, L, trainFeatures, feature_index, trainset, # cluster_ratio + epoch*((1-cluster_ratio)/250)) icrtime = time.time() # icr = cluster_assign(epoch, L, trainFeatures, feature_index, 1, 1) if epoch < args.warm_epoch: icr = cluster_assign(epoch, L, trainFeatures, feature_index, args.cluster_ratio, 1) else: icr = PreScore(epoch, L, image_dict, trainFeatures, feature_index, trainset, args.high_ratio, args.cluster_ratio, args.alpha, args.beta) print('calculate ICR time is: {}'.format(time.time() - icrtime)) writer.add_scalar('icr_time', (time.time() - icrtime), epoch + round * 200) else: cluster_num = args.nmb_cluster print('cluster number: ' + str(cluster_num)) ###clustering algorithm to use # faiss cluster deepcluster = clustering.__dict__[args.clustering]( int(cluster_num)) #### Features to clustering clustering_loss = deepcluster.cluster(trainFeatures, feature_index, verbose=args.verbose) L = np.array(deepcluster.images_lists) image_dict = deepcluster.images_dict print('create ICR ...') # icr = ICRDiscovery(ndata) # if args.test_only and len(args.resume) > 0: # icr = cluster_assign(icr, L, trainFeatures, feature_index, trainset, # cluster_ratio + epoch*((1-cluster_ratio)/250)) icrtime = time.time() # icr = cluster_assign(epoch, L, trainFeatures, feature_index, 1, 1) icr = PreScore(epoch, L, image_dict, trainFeatures, feature_index, trainset, args.high_ratio, args.cluster_ratio, args.alpha, args.beta) print('calculate ICR time is: {}'.format(time.time() - icrtime)) writer.add_scalar('icr_time', (time.time() - icrtime), epoch + round * 200) # else: # icr = cluster_assign(icr, L, trainFeatures, feature_index, trainset, 0.2 + epoch*0.004) # print(icr.neighbours) icr2 = train(epoch, net, optimizer, lemniscate, criterion, uel_criterion, trainloader, icr, icr2, args.stage_update, args.lr, device, round) print('----------Evaluation---------') start = time.time() acc = kNN(0, net, trainloader, testloader, 200, args.batch_t, ndata, low_dim=args.low_dim) print("Evaluation Time: '{}'s".format(time.time() - start)) writer.add_scalar('nn_acc', acc, epoch + round * 200) if acc > best_acc: print('Saving..') state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } if not os.path.isdir(args.model_dir): os.mkdir(args.model_dir) torch.save(state, './checkpoint/ckpt_best_round_{}.t7'.format(round)) best_acc = acc state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } torch.save(state, './checkpoint/ckpt_last_round_{}.t7'.format(round)) print( '[Round]: {} [Epoch]: {} \t accuracy: {}% \t (best acc: {}%)'. format(round, epoch, acc, best_acc))
def main(): global args, best_prec1 args = parser.parse_args() # Initialize distributed processing args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True, low_dim=args.low_dim) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](low_dim=args.low_dim) if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], # ImageNet stats std=[0.229, 0.224, 0.225]) # normalize = transforms.Normalize(mean=[0.234, 0.191, 0.159], # xView stats # std=[0.173, 0.143, 0.127]) print("Creating datasets") cj = args.color_jit train_dataset = datasets.ImageFolderInstance( traindir, transforms.Compose([ transforms.Resize((224, 224)), # transforms.Grayscale(3), # transforms.ColorJitter(cj, cj, cj, cj), #transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(45), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) elif args.balanced_sampling: print("Using balanced sampling") # Here's where we compute the weights for WeightedRandomSampler class_counts = {v: 0 for v in train_dataset.class_to_idx.values()} for path, ndx in train_dataset.samples: class_counts[ndx] += 1 total = float(np.sum([v for v in class_counts.values()])) class_probs = [ class_counts[ndx] / total for ndx in range(len(class_counts)) ] # make a list of class probabilities corresponding to the entries in train_dataset.samples reciprocal_weights = [ class_probs[idx] for i, (_, idx) in enumerate(train_dataset.samples) ] # weights are the reciprocal of the above weights = (1 / torch.Tensor(reciprocal_weights)) train_sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(train_dataset), replacement=True) else: #if args.red_data is < 1, then the training is done with a subsamle of the total data. Otherwise it's the total data. data_size = len(train_dataset) sub_index = np.random.randint(0, data_size, round(args.red_data * data_size)) sub_index.sort() train_sampler = torch.utils.data.sampler.SubsetRandomSampler(sub_index) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) print("Training on", len(train_dataset.imgs), "images. Training batch size:", args.batch_size) if len(train_dataset.imgs) % args.batch_size != 0: print( "Warning: batch size doesn't divide the # of training images so ", len(train_dataset.imgs) % args.batch_size, "images will be skipped per epoch.") print("If you don't want to skip images, use a batch size in:", get_factors(len(train_dataset.imgs))) val_dataset = datasets.ImageFolderInstance( valdir, transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), normalize, ])) val_bs = [ factor for factor in get_factors(len(val_dataset)) if factor < 500 ][-1] val_bs = 100 val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=val_bs, shuffle=False, num_workers=args.workers, pin_memory=True) print("Validating on", len(val_dataset), "images. Validation batch size:", val_bs) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() if args.nce_k > 0: lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m) criterion = NCECriterion(ndata).cuda() else: lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss, verbose=False) optimizer.load_state_dict(checkpoint['optimizer']) args.start_epoch = checkpoint['epoch'] # best_prec1 = checkpoint['best_prec1'] lemniscate = checkpoint['lemniscate'] if args.select_load: pred = checkpoint['prediction'] print("=> loaded checkpoint '{}' (epoch {}, best_prec1 )".format( args.resume, checkpoint['epoch'])) #, checkpoint['best_prec1'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # optionally fine-tune a model trained on a different dataset elif args.fine_tune: print("=> loading checkpoint '{}'".format(args.fine_tune)) checkpoint = torch.load(args.fine_tune) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss, verbose=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.fine_tune, checkpoint['epoch'])) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss, verbose=False) # Optionally recompute memory. If fine-tuning, then we must recompute memory if args.recompute_memory or args.fine_tune: # Aaron - Experiments show that iterating over torch.utils.data.DataLoader will skip the last few # unless the batch size evenly divides size of the data set. This shouldn't be the case # according to documentation, there's even a flag for drop_last, but it's not working # compute a good batch size for re-computing memory memory_bs = [ factor for factor in get_factors(len(train_loader.dataset)) if factor < 500 ][-1] print("Recomputing memory using", train_dataset.root, "with a batch size of", memory_bs) transform_bak = train_loader.dataset.transform train_loader.dataset.transform = val_loader.dataset.transform temploader = torch.utils.data.DataLoader( train_loader.dataset, batch_size=memory_bs, shuffle=False, num_workers=train_loader.num_workers, pin_memory=True) lemniscate.memory = torch.zeros(len(train_loader.dataset), args.low_dim).cuda() model.eval() with torch.no_grad(): for batch_idx, (inputs, targets, indexes) in enumerate(tqdm.tqdm(temploader)): batchSize = inputs.size(0) features = model(inputs) lemniscate.memory[batch_idx * batchSize:batch_idx * batchSize + batchSize, :] = features.data train_loader.dataset.transform = transform_bak model.train() cudnn.benchmark = True if args.evaluate: kNN(model, lemniscate, train_loader, val_loader, args.K, args.nce_t) return begin_train_time = datetime.datetime.now() # my_knn(model, lemniscate, train_loader, val_loader, args.K, args.nce_t, train_dataset, val_dataset) if args.tsne: labels = idx_to_name(train_dataset, args.graph_labels) tsne(lemniscate, args.tsne, labels) if args.pca: labels = idx_to_name(train_dataset, args.graph_labels) pca(lemniscate, labels) if args.view_knn: my_knn(model, lemniscate, train_loader, val_loader, args.K, args.nce_t, train_dataset, val_dataset) if args.kmeans: kmeans, yi = kmean(lemniscate, args.kmeans, 500, args.K, train_dataset) D, I = kmeans.index.search(lemniscate.memory.data.cpu().numpy(), 1) cent_group = {} data_cent = {} for n, i in enumerate(I): if i[0] not in cent_group.keys(): cent_group[i[0]] = [] cent_group[i[0]].append(n) data_cent[n] = i[0] train_sampler = torch.utils.data.sampler.SubsetRandomSampler( cent_group[0]) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) # lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m) # criterion = NCECriterion(ndata).cuda() # lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m) if args.tsne_grid: tsne_grid(val_loader, model) if args.h_cluster: for size in range(2, 3): # size = 20 kmeans, topk = kmean(lemniscate, size, 500, 10, train_dataset) respred = torch.tensor([]).cuda() lab, idx = [[] for i in range(2)] num = 0 ''' for p,index,label in pred: respred = torch.cat((respred,p)) if num == 0: lab = label else: lab += label idx.append(index) num+=1 ''' h_cluster(lemniscate, train_dataset, kmeans, topk, size) #, respred, lab, idx) # axis_explore(lemniscate, train_dataset) # kmeans_opt(lemniscate, 5) if args.select: if not args.select_load: pred = [] if args.select_size: size = int(args.select_size * ndata) else: size = round(ndata / 100.0) sub_sample = np.random.randint(0, ndata, size=size) train_sampler = torch.utils.data.sampler.SubsetRandomSampler( sub_sample) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) pred = div_train(train_loader, model, 0, pred) pred_features = [] pred_labels = [] pred_idx = [] for inst in pred: feat, idx, lab = list(inst) pred_features.append(feat) pred_labels.append(lab) pred_idx.append(idx.data.cpu()) if args.select_save: save_checkpoint( { 'epoch': args.start_epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'prediction': pred, 'lemniscate': lemniscate, 'optimizer': optimizer.state_dict(), }, 'select.pth.tar') min_idx = selection(pred_features, pred_idx, train_dataset, args.select_num, args.select_thresh) train_sampler = torch.utils.data.sampler.SubsetRandomSampler(min_idx) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) lemniscate = NCEAverage(args.low_dim, ndata, 20, args.nce_t, args.nce_m) optimizer = torch.optim.SGD(model.parameters(), 0.1, momentum=0.1, weight_decay=0.00001) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss, verbose=False) for epoch in range(50): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) if epoch % 1 == 0: save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'optimizer': optimizer.state_dict(), }) train(train_loader, model, lemniscate, criterion, optimizer, epoch) train_sampler = torch.utils.data.sampler.SubsetRandomSampler(sub_index) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t, args.nce_m) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss, verbose=False) if args.kmeans_opt: kmeans_opt(lemniscate, 500) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) if epoch % 1 == 0: # evaluate on validation set #prec1 = NN(epoch, model, lemniscate, train_loader, train_loader) # was evaluating on train # prec1 = kNN(model, lemniscate, train_loader, val_loader, args.K, args.nce_t) # prec1 really should be renamed to prec5 as kNN now returns top5 score, but # it won't be backward's compatible as earlier models were saved with "best_prec1" # remember best prec@1 and save checkpoint # is_best = prec1 > best_prec1 # best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, # 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }) # , is_best) # train for one epoch train(train_loader, model, lemniscate, criterion, optimizer, epoch) # kmeans,cent = kmeans() # group_train(train_loader, model, lemniscate, criterion, optimizer, epoch, kmeans, cent) # print elapsed time end_train_time = datetime.datetime.now() d = end_train_time - begin_train_time print( "Trained for %d epochs. Elapsed time: %s days, %.2dh: %.2dm: %.2ds" % (len(range(args.start_epoch, args.epochs)), d.days, d.seconds // 3600, (d.seconds // 60) % 60, d.seconds % 60))
def main(): global args, best_prec1 args = parser.parse_args() my_whole_seed = 111 random.seed(my_whole_seed) np.random.seed(my_whole_seed) torch.manual_seed(my_whole_seed) torch.cuda.manual_seed_all(my_whole_seed) torch.cuda.manual_seed(my_whole_seed) np.random.seed(my_whole_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False os.environ['PYTHONHASHSEED'] = str(my_whole_seed) for kk_time in range(args.seedstart, args.seedend): args.seed = kk_time args.result = args.result + str(args.seed) # create model model = models.__dict__[args.arch](low_dim=args.low_dim, multitask=args.multitask, showfeature=args.showfeature, args=args) # # from models.Gresnet import ResNet18 # model = ResNet18(low_dim=args.low_dim, multitask=args.multitask) model = torch.nn.DataParallel(model).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) aug = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) # aug = transforms.Compose([transforms.RandomResizedCrop(224, scale=(0.08, 1.), ratio=(3 / 4, 4 / 3)), # transforms.RandomHorizontalFlip(p=0.5), # get_color_distortion(s=1), # transforms.Lambda(lambda x: gaussian_blur(x)), # transforms.ToTensor(), # normalize]) # aug = transforms.Compose([transforms.RandomRotation(60), # transforms.RandomResizedCrop(224, scale=(0.6, 1.)), # transforms.RandomGrayscale(p=0.2), # transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize]) aug_test = transforms.Compose( [transforms.Resize(224), transforms.ToTensor(), normalize]) # dataset import datasets.fundus_kaggle_dr as medicaldata train_dataset = medicaldata.traindataset(root=args.data, transform=aug, train=True, args=args) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=8, drop_last=True if args.multiaug else False, worker_init_fn=random.seed(my_whole_seed)) valid_dataset = medicaldata.traindataset(root=args.data, transform=aug_test, train=False, test_type="amd", args=args) val_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8, worker_init_fn=random.seed(my_whole_seed)) valid_dataset_gon = medicaldata.traindataset(root=args.data, transform=aug_test, train=False, test_type="gon", args=args) val_loader_gon = torch.utils.data.DataLoader( valid_dataset_gon, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8, worker_init_fn=random.seed(my_whole_seed)) valid_dataset_pm = medicaldata.traindataset(root=args.data, transform=aug_test, train=False, test_type="pm", args=args) val_loader_pm = torch.utils.data.DataLoader( valid_dataset_pm, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8, worker_init_fn=random.seed(my_whole_seed)) # define lemniscate and loss function (criterion) ndata = train_dataset.__len__() lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m).cuda() local_lemniscate = None if args.multitaskposrot: print("running multi task with positive") criterion = BatchCriterionRot(1, 0.1, args.batch_size, args).cuda() elif args.domain: print("running domain with four types--unify ") from lib.BatchAverageFour import BatchCriterionFour # criterion = BatchCriterionTriple(1, 0.1, args.batch_size, args).cuda() criterion = BatchCriterionFour(1, 0.1, args.batch_size, args).cuda() elif args.multiaug: print("running multi task") criterion = BatchCriterion(1, 0.1, args.batch_size, args).cuda() else: criterion = nn.CrossEntropyLoss().cuda() if args.multitask: cls_criterion = nn.CrossEntropyLoss().cuda() else: cls_criterion = None optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) lemniscate = checkpoint['lemniscate'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: knn_num = 100 auc, acc, precision, recall, f1score = kNN(args, model, lemniscate, train_loader, val_loader, knn_num, args.nce_t, 2) return # mkdir result folder and tensorboard os.makedirs(args.result, exist_ok=True) writer = SummaryWriter("runs/" + str(args.result.split("/")[-1])) writer.add_text('Text', str(args)) # copy code import shutil, glob source = glob.glob("*.py") source += glob.glob("*/*.py") os.makedirs(args.result + "/code_file", exist_ok=True) for file in source: name = file.split("/")[0] if name == file: shutil.copy(file, args.result + "/code_file/") else: os.makedirs(args.result + "/code_file/" + name, exist_ok=True) shutil.copy(file, args.result + "/code_file/" + name) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, args, [100, 200]) writer.add_scalar("lr", lr, epoch) # # train for one epoch loss = train(train_loader, model, lemniscate, local_lemniscate, criterion, cls_criterion, optimizer, epoch, writer) writer.add_scalar("train_loss", loss, epoch) # gap_int = 10 # if (epoch) % gap_int == 0: # knn_num = 100 # auc, acc, precision, recall, f1score = kNN(args, model, lemniscate, train_loader, val_loader, knn_num, args.nce_t, 2) # writer.add_scalar("test_auc", auc, epoch) # writer.add_scalar("test_acc", acc, epoch) # writer.add_scalar("test_precision", precision, epoch) # writer.add_scalar("test_recall", recall, epoch) # writer.add_scalar("test_f1score", f1score, epoch) # # auc, acc, precision, recall, f1score = kNN(args, model, lemniscate, train_loader, val_loader_gon, # knn_num, args.nce_t, 2) # writer.add_scalar("gon/test_auc", auc, epoch) # writer.add_scalar("gon/test_acc", acc, epoch) # writer.add_scalar("gon/test_precision", precision, epoch) # writer.add_scalar("gon/test_recall", recall, epoch) # writer.add_scalar("gon/test_f1score", f1score, epoch) # auc, acc, precision, recall, f1score = kNN(args, model, lemniscate, train_loader, val_loader_pm, # knn_num, args.nce_t, 2) # writer.add_scalar("pm/test_auc", auc, epoch) # writer.add_scalar("pm/test_acc", acc, epoch) # writer.add_scalar("pm/test_precision", precision, epoch) # writer.add_scalar("pm/test_recall", recall, epoch) # writer.add_scalar("pm/test_f1score", f1score, epoch) # save checkpoint save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'lemniscate': lemniscate, 'optimizer': optimizer.state_dict(), }, filename=args.result + "/fold" + str(args.seedstart) + "-epoch-" + str(epoch) + ".pth.tar")
# define loss function if hasattr(lemniscate, 'K'): criterion = NCECriterion(ndata) else: criterion = nn.CrossEntropyLoss() if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) lemniscate.cuda() criterion.cuda() cudnn.benchmark = True if args.test_only: acc = kNN(0, net, lemniscate, trainloader, testloader, 200, args.nce_t, 1) sys.exit(0) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) def adjust_learning_rate(optimizer, epoch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = args.lr if epoch >= 80: lr = args.lr * (0.1 ** ((epoch-80) // 40)) print(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr # Training def train(epoch):