def main(): args = parse_args() # Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Dataset train_loader, test_loader, classes = mnist_loader.load_dataset( args.dataset_dir, img_show=True) # Model model = Net().to(device) print(model) # Loss nllloss = nn.NLLLoss().to( device) # CrossEntropyLoss = log_softmax + NLLLoss loss_weight = 1 centerloss = CenterLoss(10, 2).to(device) # Optimizer dnn_optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005) sheduler = lr_scheduler.StepLR(dnn_optimizer, 20, gamma=0.8) center_optimizer = optim.SGD(centerloss.parameters(), lr=0.5) print('Start training...') for epoch in range(100): # Update parameters. epoch += 1 sheduler.step() # Train and test a model. train_acc, train_loss, feat, labels = train(device, train_loader, model, nllloss, loss_weight, centerloss, dnn_optimizer, center_optimizer) test_acc, test_loss = test(device, test_loader, model, nllloss, loss_weight, centerloss) stdout_temp = 'Epoch: {:>3}, train acc: {:<8}, train loss: {:<8}, test acc: {:<8}, test loss: {:<8}' print( stdout_temp.format(epoch, train_acc, train_loss, test_acc, test_loss)) # Visualize features of each class. vis_img_path = args.vis_img_path_temp.format(str(epoch).zfill(3)) visualize(feat.data.cpu().numpy(), labels.data.cpu().numpy(), epoch, vis_img_path) # Save a trained model. model_path = args.model_path_temp.format(str(epoch).zfill(3)) torch.save(model.state_dict(), model_path)
def main(): args = parse_args() # Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Dataset train_loader, test_loader, classes = mnist_loader.load_dataset( args.dataset_dir, img_show=False) # Model model = Net().to(device) model.load_state_dict(torch.load(args.model_path)) model = model.eval() print(model) # Loss nllloss = nn.NLLLoss().to( device) # CrossEntropyLoss = log_softmax + NLLLoss loss_weight = 1 centerloss = CenterLoss(10, 2).to(device) # Test a model. print('Testing a trained model....') test_acc, test_loss = test(device, test_loader, model, nllloss, loss_weight, centerloss) stdout_temp = 'test acc: {:<8}, test loss: {:<8}' print(stdout_temp.format(test_acc, test_loss))
def __init__(self, model, optimizer, loss_f, save_dir=None, save_freq=1): self.model = model device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs self.model = torch.nn.DataParallel(self.model, device_ids=range( torch.cuda.device_count())) self.model.to(device) #self.model.load_state_dict(torch.load("checkpoints/exp1/ model_370.pkl")['weight']) self.optimizer = optimizer self.loss_1 = loss_f().cuda() self.loss_2 = CenterLoss().cuda() self.optimizer_center = torch.optim.Adam( params=self.loss_2.parameters()) self.save_dir = save_dir self.save_freq = save_freq self.writer = SummaryWriter()
def main(): if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) use_gpu = torch.cuda.is_available() np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.deterministic = True cudnn.benchmark = True print("Initializing train dataset {}".format(args.train_dataset)) train_dataset = data_manager.init_dataset(name=args.train_dataset) print("Initializing test dataset {}".format(args.test_dataset)) test_dataset = data_manager.init_dataset(name=args.test_dataset) # print("Initializing train dataset {}".format(args.train_dataset, split_id=6)) # train_dataset = data_manager.init_dataset(name=args.train_dataset) # print("Initializing test dataset {}".format(args.test_dataset, split_id=6)) # test_dataset = data_manager.init_dataset(name=args.test_dataset) transform_train = T.Compose([ T.Resize([args.height, args.width]), T.RandomHorizontalFlip(), T.Pad(10), T.RandomCrop([args.height, args.width]), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.RandomErasing(probability=0.5, mean=[0.485, 0.456, 0.406]) ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False # random_snip first_snip constrain_random evenly trainloader = DataLoader( VideoDataset(train_dataset.train, seq_len=args.seq_len, sample='constrain_random', transform=transform_train), sampler=RandomIdentitySampler(train_dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( VideoDataset(test_dataset.query, seq_len=args.seq_len, sample='evenly', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( VideoDataset(test_dataset.gallery, seq_len=args.seq_len, sample='evenly', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=train_dataset.num_train_pids, loss={'xent', 'htri'}) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) print("load model {0} from {1}".format(args.arch, args.load_model)) if args.load_model != '': pretrained_model = torch.load(args.load_model) model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_model['state_dict'].items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) start_epoch = pretrained_model['epoch'] + 1 best_rank1 = pretrained_model['rank1'] else: start_epoch = args.start_epoch best_rank1 = -np.inf criterion = dict() criterion['triplet'] = WeightedRegularizedTriplet() criterion['xent'] = CrossEntropyLabelSmooth( num_classes=train_dataset.num_train_pids) criterion['center'] = CenterLoss(num_classes=train_dataset.num_train_pids, feat_dim=512, use_gpu=True) print(criterion) optimizer = dict() optimizer['model'] = model.get_optimizer(args) optimizer['center'] = torch.optim.SGD(criterion['center'].parameters(), lr=0.5) scheduler = lr_scheduler.MultiStepLR(optimizer['model'], milestones=args.stepsize, gamma=args.gamma) print(model) model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") distmat = test(model, queryloader, galleryloader, args.pool, use_gpu, return_distmat=True) return start_time = time.time() train_time = 0 best_epoch = args.start_epoch print("==> Start training") for epoch in range(start_epoch, args.max_epoch): scheduler.step() print('Epoch', epoch, 'lr', scheduler.get_lr()[0]) start_train_time = time.time() train(epoch, model, criterion, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if (epoch + 1) > args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
aux_loss = PerpetualOrthogonalProjectionLoss( num_classes=100, feat_dim=embedding_dim[args.net], no_norm=False, use_attention=False) params = list(net.parameters()) + list(aux_loss.parameters()) else: aux_loss = OrthogonalProjectionLoss(no_norm=False, use_attention=False) if args.hnc: hnc_loss = cam_loss_kd_topk() else: hnc_loss = None if args.cl: center_loss = CenterLoss(num_classes=100, feat_dim=2048, use_gpu=True) params = list(net.parameters()) + list(center_loss.parameters()) else: optimizer = optim.SGD(params=params, lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay iter_per_epoch = len(training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) if args.resume: if args.pth is not None:
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset( root=args.root, name=args.dataset, split_id=args.split_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split, ) transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'cent'}) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_cent = CenterLoss(num_classes=dataset.num_train_pids, feat_dim=model.feat_dim, use_gpu=use_gpu) optimizer_model = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) optimizer_cent = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_cent, optimizer_model, optimizer_cent, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if args.stepsize > 0: scheduler.step() if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--loss', default='softmax_loss', choices=['softmax_loss', 'center_loss', 'sphere_face_loss', 'cos_face_loss', 'arc_face_loss']) parser.add_argument('--viz', default='vizs') parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--lr', type=float, default=0.001) args = parser.parse_args() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {} transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_loader = DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transform), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = DataLoader( datasets.MNIST('../data', train=False, transform=transform), batch_size=512, shuffle=True, **kwargs) model = Net().to(device) if args.loss == 'center_loss': criterion = CenterLoss().to(device) center_optimizer = optim.SGD([criterion.centers], lr=args.lr, momentum=0.9) elif args.loss == 'sphere_face_loss': criterion = SphereFaceLoss().to(device) elif args.loss == 'cos_face_loss': criterion = CosFaceLoss(s=7, m=0.2).to(device) elif args.loss == 'softmax_loss': criterion = SoftmaxLoss().to(device) elif args.loss == 'arc_face_loss': criterion = ArcFaceLoss().to(device) optimizer = optim.SGD([{'params': model.parameters()}, {'params': criterion.fc.parameters()}], lr=args.lr, momentum=0.9) for epoch in range(1, args.epochs + 1): model.train() embeddings = [] labels = [] for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() if args.loss == 'center_loss': center_optimizer.zero_grad() embedding = model(data) loss = criterion(embedding, target) loss.backward() optimizer.step() if args.loss == 'center_loss': center_optimizer.step() embeddings.append(embedding) labels.append(target) if batch_idx % 100 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) embeddings = torch.cat(embeddings, 0).cpu().detach().numpy() labels = torch.cat(labels, 0).cpu().detach().numpy() acc = val(model, criterion, device, test_loader) visualize(args.viz, args.loss, embeddings, labels, epoch, acc) print('Creating gif...') create_gif('./%s/gifs/%s.gif' % (args.viz, args.loss), './%s/%s' % (args.viz, args.loss), 0.2) print('Done')
def train_epoch(train_loader, model, loss_fn, optimzer, k, n_K, n_classes, rm_zero, global_loss=False, train_embeddeds=None, train_targets=None, gamma=0.01, center_sigma=-1.0, method='kTriplet', use_cross_entropy=False): model.train() losses_triplet = [] losses_inner_class = [] losses_center = [] losses_cross_entropy = [] losses = [] center_loss = CenterLoss(n_classes, mean_center=True).cuda() cross_entropy = nn.CrossEntropyLoss().cuda() if global_loss: in_mat, out_mat = get_global_distance_map(train_embeddeds, train_targets, k, n_K) data_id = 0 for batch_idx, (data, target) in enumerate( tqdm(train_loader, ncols=70, desc="Train")): data = data.cuda() target = target.cuda() # !note, for Triplet, here just the embeddings, for Clas, it's predict outputs = model(data) embeddeds = outputs if method == 'kTriplet': anchor, positive, negative = generate_k_triplet(embeddeds, target, K=k, B=n_K) elif method == 'batchHardTriplet': anchor, positive, negative = generate_batch_hard_triplet( embeddeds, target) elif method == 'batchAllTriplet' or method == 'batchSemiHardTriplet': anchor, positive, negative = generate_all_triplet( embeddeds, target) else: raise ValueError triplet_loss = loss_fn(anchor, positive, negative) if method == 'batchSemiHardTriplet': semi = torch.nonzero((triplet_loss <= loss_fn.margin) & (triplet_loss > 0)) triplet_loss.index_select(dim=0, index=semi.squeeze()) if rm_zero: non_zero = torch.nonzero(triplet_loss.cpu().data).size(0) if non_zero == 0: loss_triplet = triplet_loss.mean() else: loss_triplet = (triplet_loss / non_zero).sum() else: loss_triplet = triplet_loss.mean() if gamma > 0: loss_inner_class = torch.log1p( (anchor - positive).pow(2).sum(1)).mean() loss = loss_triplet + gamma * loss_inner_class elif gamma == 0: loss_inner_class = np.mean( np.sum(np.power( np.log1p((anchor.cpu().detach().numpy() - positive.cpu().detach().numpy())), 2), axis=1)) loss_inner_class = torch.tensor(loss_inner_class) loss = loss_triplet else: loss_inner_class = torch.tensor(0) loss = loss_triplet if center_sigma > 0: closs = center_sigma * center_loss(embeddeds, target) loss += closs losses_center.append(closs.item()) if global_loss: g_pos, g_neg = get_global_data( train_loader.dataset, in_mat, out_mat, np.array(range(data_id, data_id + train_loader.batch_size))) data_id += train_loader.batch_size g_pos = g_pos.cuda() g_neg = g_neg.cuda() out_p = model(g_pos) out_n = model(g_neg) anchor, positive, negative = makeup_global_triplet( embeddeds, out_p, out_n, k, n_K) global_loss = loss_fn(anchor, positive, negative) loss += global_loss # TODO # add cross entropy if use_cross_entropy: #ce_loss = cross_entropy(, target) #loss = loss + ce_loss print('NOt Implement!') sys.exit(-1) pass else: ce_loss = torch.tensor(0) optimzer.zero_grad() loss.backward() optimzer.step() losses_triplet.append(loss_triplet.item()) losses_inner_class.append(loss_inner_class.item()) losses_cross_entropy.append(ce_loss.item()) losses.append(loss.item()) average_triplet_loss = sum(losses_triplet) / len(train_loader) average_inner_class = sum(losses_inner_class) / len(train_loader) average_center_loss = sum(losses_center) / len(train_loader) average_cross_entropy = sum(losses_cross_entropy) / len(train_loader) total_loss = sum(losses) / len(train_loader) return total_loss, average_triplet_loss, average_inner_class, average_center_loss, average_cross_entropy
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_img_dataset( root=args.root, name=args.dataset, split_id=args.split_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split, ) transform_train = T.Compose([ T.Resize((args.height, args.width)), T.RandomHorizontalFlip(p=0.5), T.Pad(10), T.RandomCrop([args.height, args.width]), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), torchvision.transforms.RandomErasing(p=0.5, scale=(0.02, 0.4), ratio=(0.3, 3.33), value=(0.4914, 0.4822, 0.4465)) # T.RandomErasing(probability=0.5, sh=0.4, mean=(0.4914, 0.4822, 0.4465)), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), sampler=RandomIdentitySampler2(dataset.train, batch_size=args.train_batch, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri', 'cent'}) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_htri = TripletLoss(margin=args.margin) criterion_cent = CenterLoss(num_classes=dataset.num_train_pids, feat_dim=model.feat_dim, use_gpu=use_gpu) optimizer_model = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay) optimizer_cent = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) '''only the optimizer_model use learning rate schedule''' # if args.stepsize > 0: # scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) '''------Modify lr_schedule here------''' current_schedule = init_lr_schedule(schedule=args.schedule, warm_up_epoch=args.warm_up_epoch, half_cos_period=args.half_cos_period, lr_milestone=args.lr_milestone, gamma=args.gamma, stepsize=args.stepsize) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer_model, lr_lambda=current_schedule) '''------Please refer to the args.xxx for details of hyperparams------''' #embed() start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_htri, criterion_cent, optimizer_model, optimizer_cent, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if args.schedule: scheduler.step() if (epoch + 1) > args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
fc = torch.nn.Linear(2, 2).cuda() layer = Membership_norm(2, 4, init_c=-5 * torch.ones((2, 4), dtype=torch.float), init_lamda=4 * torch.ones((2, 4), dtype=torch.float)).cuda() # x = torch.tensor([[[0.9, 0.1], [0.9, 0.1]], [[-0.9, 0.1], [-0.1, -2.5]]], dtype=torch.float, requires_grad=True) # x2 = x ** 2 # print(x2.requires_grad) # print(x.shape) # print(layer(x)) # print(layer.c) # print(x.shape) # loss_focal = torch.nn.MSELoss() loss_focal = FocalLoss() loss_center = CenterLoss() para = [ {"params": fc.parameters(), "lr": 1e-3}, {"params": layer.c, "lr": 1e-3}, {"params": layer.lamda, "lr": 1e-3}, ] # optim = torch.optim.SGD(para) optim = torch.optim.Adam(para) # bestloss = 1e5 # bestnetweightfc = [] # bestnetweightlayer = [] for i in range(0, 100000): h = fc(x_in_tensor).unsqueeze(2) y = layer(h)
def __init__(self, config): super(Trainer, self).__init__(config) self.datamanager = DataManger(config["data"]) # model self.model = Baseline( num_classes=self.datamanager.datasource.get_num_classes("train") ) # summary model summary( self.model, input_size=(3, 256, 128), batch_size=config["data"]["batch_size"], device="cpu", ) # losses cfg_losses = config["losses"] self.criterion = Softmax_Triplet_loss( num_class=self.datamanager.datasource.get_num_classes("train"), margin=cfg_losses["margin"], epsilon=cfg_losses["epsilon"], use_gpu=self.use_gpu, ) self.center_loss = CenterLoss( num_classes=self.datamanager.datasource.get_num_classes("train"), feature_dim=2048, use_gpu=self.use_gpu, ) # optimizer cfg_optimizer = config["optimizer"] self.optimizer = torch.optim.Adam( self.model.parameters(), lr=cfg_optimizer["lr"], weight_decay=cfg_optimizer["weight_decay"], ) self.optimizer_centerloss = torch.optim.SGD( self.center_loss.parameters(), lr=0.5 ) # learing rate scheduler cfg_lr_scheduler = config["lr_scheduler"] self.lr_scheduler = WarmupMultiStepLR( self.optimizer, milestones=cfg_lr_scheduler["steps"], gamma=cfg_lr_scheduler["gamma"], warmup_factor=cfg_lr_scheduler["factor"], warmup_iters=cfg_lr_scheduler["iters"], warmup_method=cfg_lr_scheduler["method"], ) # track metric self.train_metrics = MetricTracker("loss", "accuracy") self.valid_metrics = MetricTracker("loss", "accuracy") # save best accuracy for function _save_checkpoint self.best_accuracy = None # send model to device self.model.to(self.device) self.scaler = GradScaler() # resume model from last checkpoint if config["resume"] != "": self._resume_checkpoint(config["resume"])
class Trainer(BaseTrainer): def __init__(self, config): super(Trainer, self).__init__(config) self.datamanager = DataManger(config["data"]) # model self.model = Baseline( num_classes=self.datamanager.datasource.get_num_classes("train") ) # summary model summary( self.model, input_size=(3, 256, 128), batch_size=config["data"]["batch_size"], device="cpu", ) # losses cfg_losses = config["losses"] self.criterion = Softmax_Triplet_loss( num_class=self.datamanager.datasource.get_num_classes("train"), margin=cfg_losses["margin"], epsilon=cfg_losses["epsilon"], use_gpu=self.use_gpu, ) self.center_loss = CenterLoss( num_classes=self.datamanager.datasource.get_num_classes("train"), feature_dim=2048, use_gpu=self.use_gpu, ) # optimizer cfg_optimizer = config["optimizer"] self.optimizer = torch.optim.Adam( self.model.parameters(), lr=cfg_optimizer["lr"], weight_decay=cfg_optimizer["weight_decay"], ) self.optimizer_centerloss = torch.optim.SGD( self.center_loss.parameters(), lr=0.5 ) # learing rate scheduler cfg_lr_scheduler = config["lr_scheduler"] self.lr_scheduler = WarmupMultiStepLR( self.optimizer, milestones=cfg_lr_scheduler["steps"], gamma=cfg_lr_scheduler["gamma"], warmup_factor=cfg_lr_scheduler["factor"], warmup_iters=cfg_lr_scheduler["iters"], warmup_method=cfg_lr_scheduler["method"], ) # track metric self.train_metrics = MetricTracker("loss", "accuracy") self.valid_metrics = MetricTracker("loss", "accuracy") # save best accuracy for function _save_checkpoint self.best_accuracy = None # send model to device self.model.to(self.device) self.scaler = GradScaler() # resume model from last checkpoint if config["resume"] != "": self._resume_checkpoint(config["resume"]) def train(self): for epoch in range(self.start_epoch, self.epochs + 1): result = self._train_epoch(epoch) if self.lr_scheduler is not None: self.lr_scheduler.step() result = self._valid_epoch(epoch) # add scalars to tensorboard self.writer.add_scalars( "Loss", { "Train": self.train_metrics.avg("loss"), "Val": self.valid_metrics.avg("loss"), }, global_step=epoch, ) self.writer.add_scalars( "Accuracy", { "Train": self.train_metrics.avg("accuracy"), "Val": self.valid_metrics.avg("accuracy"), }, global_step=epoch, ) # logging result to console log = {"epoch": epoch} log.update(result) for key, value in log.items(): self.logger.info(" {:15s}: {}".format(str(key), value)) # save model if ( self.best_accuracy == None or self.best_accuracy < self.valid_metrics.avg("accuracy") ): self.best_accuracy = self.valid_metrics.avg("accuracy") self._save_checkpoint(epoch, save_best=True) else: self._save_checkpoint(epoch, save_best=False) # save logs self._save_logs(epoch) def _train_epoch(self, epoch): """Training step""" self.model.train() self.train_metrics.reset() with tqdm(total=len(self.datamanager.get_dataloader("train"))) as epoch_pbar: epoch_pbar.set_description(f"Epoch {epoch}") for batch_idx, (data, labels, _) in enumerate( self.datamanager.get_dataloader("train") ): # push data to device data, labels = data.to(self.device), labels.to(self.device) # zero gradient self.optimizer.zero_grad() self.optimizer_centerloss.zero_grad() with autocast(): # forward batch score, feat = self.model(data) # calculate loss and accuracy loss = ( self.criterion(score, feat, labels) + self.center_loss(feat, labels) * self.config["losses"]["beta"] ) _, preds = torch.max(score.data, dim=1) # backward parameters # loss.backward() self.scaler.scale(loss).backward() # backward parameters for center_loss for param in self.center_loss.parameters(): param.grad.data *= 1.0 / self.config["losses"]["beta"] # optimize # self.optimizer.step() self.scaler.step(self.optimizer) self.optimizer_centerloss.step() self.scaler.update() # update loss and accuracy in MetricTracker self.train_metrics.update("loss", loss.item()) self.train_metrics.update( "accuracy", torch.sum(preds == labels.data).double().item() / data.size(0), ) # update process bar epoch_pbar.set_postfix( { "train_loss": self.train_metrics.avg("loss"), "train_acc": self.train_metrics.avg("accuracy"), } ) epoch_pbar.update(1) return self.train_metrics.result() def _valid_epoch(self, epoch): """Validation step""" self.model.eval() self.valid_metrics.reset() with torch.no_grad(): with tqdm(total=len(self.datamanager.get_dataloader("val"))) as epoch_pbar: epoch_pbar.set_description(f"Epoch {epoch}") for batch_idx, (data, labels, _) in enumerate( self.datamanager.get_dataloader("val") ): # push data to device data, labels = data.to(self.device), labels.to(self.device) with autocast(): # forward batch score, feat = self.model(data) # calculate loss and accuracy loss = ( self.criterion(score, feat, labels) + self.center_loss(feat, labels) * self.config["losses"]["beta"] ) _, preds = torch.max(score.data, dim=1) # update loss and accuracy in MetricTracker self.valid_metrics.update("loss", loss.item()) self.valid_metrics.update( "accuracy", torch.sum(preds == labels.data).double().item() / data.size(0), ) # update process bar epoch_pbar.set_postfix( { "val_loss": self.valid_metrics.avg("loss"), "val_acc": self.valid_metrics.avg("accuracy"), } ) epoch_pbar.update(1) return self.valid_metrics.result() def _save_checkpoint(self, epoch, save_best=True): """save model to file""" state = { "epoch": epoch, "state_dict": self.model.state_dict(), "center_loss": self.center_loss.state_dict(), "optimizer": self.optimizer.state_dict(), "optimizer_centerloss": self.optimizer_centerloss.state_dict(), "lr_scheduler": self.lr_scheduler.state_dict(), "best_accuracy": self.best_accuracy, } filename = os.path.join(self.checkpoint_dir, "model_last.pth") self.logger.info("Saving last model: model_last.pth ...") torch.save(state, filename) if save_best: filename = os.path.join(self.checkpoint_dir, "model_best.pth") self.logger.info("Saving current best: model_best.pth ...") torch.save(state, filename) def _resume_checkpoint(self, resume_path): """Load model from checkpoint""" if not os.path.exists(resume_path): raise FileExistsError("Resume path not exist!") self.logger.info("Loading checkpoint: {} ...".format(resume_path)) checkpoint = torch.load(resume_path, map_location=self.map_location) self.start_epoch = checkpoint["epoch"] + 1 self.model.load_state_dict(checkpoint["state_dict"]) self.center_loss.load_state_dict(checkpoint["center_loss"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) self.optimizer_centerloss.load_state_dict(checkpoint["optimizer_centerloss"]) self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) self.best_accuracy = checkpoint["best_accuracy"] self.logger.info( "Checkpoint loaded. Resume training from epoch {}".format(self.start_epoch) ) def _save_logs(self, epoch): """Save logs from google colab to google drive""" if os.path.isdir(self.logs_dir_saved): shutil.rmtree(self.logs_dir_saved) destination = shutil.copytree(self.logs_dir, self.logs_dir_saved)
class Trainer(object): #cuda = torch.cuda.is_available() #torch.backends.cudnn.benchmark = True def __init__(self, model, optimizer, loss_f, save_dir=None, save_freq=1): self.model = model device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs self.model = torch.nn.DataParallel(self.model, device_ids=range( torch.cuda.device_count())) self.model.to(device) #self.model.load_state_dict(torch.load("checkpoints/exp1/ model_370.pkl")['weight']) self.optimizer = optimizer self.loss_1 = loss_f().cuda() self.loss_2 = CenterLoss().cuda() self.optimizer_center = torch.optim.Adam( params=self.loss_2.parameters()) self.save_dir = save_dir self.save_freq = save_freq self.writer = SummaryWriter() def _iteration(self, data_loader, ep, is_train=True): loop_loss = [] #loop_loss_a = [] outputlabel = [] targetlabel = [] for img1, target in tqdm(data_loader): img1, target = img1.cuda(), target.cuda() target = target.squeeze_() out1, out2 = self.model(img1) print(out2.size()) loss_1 = self.loss_1(out2, target) loss_2 = self.loss_2(out1, target) loss_step = loss_1.data.item() print(">>>loss:", loss_step) loop_loss.append(loss_1.data.item() / len(data_loader)) #loop_loss_a.append(loss_a.data.item() / len(data_loader)) #accuracy.append((output.data.max(1)[1] == target.data).sum().item()) if is_train: self.optimizer.zero_grad() self.optimizer_center.zero_grad() loss_1.backward(retain_graph=True) loss_2.backward(retain_graph=True) self.optimizer_center.step() self.optimizer.step() output = F.softmax(out2, dim=1) output = output.cpu() output = output.data.numpy() output = np.argmax(output, axis=1) target = target.cpu().data.numpy() # target = np.argmax(target,axis=1) target = np.reshape(target, [-1]) output = np.reshape(output, [-1]) target = target.astype(np.int8) output = output.astype(np.int8) outputlabel.append(output) targetlabel.append(target) if is_train: self.writer.add_scalar('train/loss_epoch', sum(loop_loss), ep) targetlabel = np.reshape(np.array(targetlabel), [-1]).astype(np.int) outputlabel = np.reshape(np.array(outputlabel), [-1]).astype(np.int) accuracy = accuracy_score(targetlabel, outputlabel) self.writer.add_scalar('train/accuracy', accuracy, ep) #self.writer.add_scalar('train/loss_a_epoch', sum(loop_loss_a), ep) #self.writer.add_scalar('train/accuracy',sum(accuracy)/len(data_loader.dataset),ep) else: print(targetlabel) print(outputlabel) targetlabel = np.reshape(np.array(targetlabel), [-1]).astype(np.int) outputlabel = np.reshape(np.array(outputlabel), [-1]).astype(np.int) accuracy = accuracy_score(targetlabel, outputlabel) print(accuracy) matrixs = confusion_matrix(targetlabel, outputlabel) np.save('matrixs/matrixs_' + str(ep) + '.npy', matrixs) self.writer.add_scalar('test/accuracy', accuracy, ep) self.writer.add_scalar('test/loss_epoch', sum(loop_loss), ep) #self.writer.add_scalar('test/loss_a_epoch', sum(loop_loss_a), ep) #self.writer.add_scalar('test/accuracy',sum(accuracy)/len(data_loader.dataset),ep) mode = "train" if is_train else "test" #print(">>>[{mode}] loss: {loss}/accuracy: {accuracy}".format(mode=mode,loss=sum(loop_loss),accuracy=sum(accuracy)/len(data_loader.dataset))) print(">>>[{mode}] loss: {loss}".format(mode=mode, loss=sum(loop_loss))) return loop_loss def train(self, data_loader, ep): self.model.train() with torch.enable_grad(): loss = self._iteration(data_loader, ep) #pass def test(self, data_loader, ep): self.model.eval() with torch.no_grad(): loss = self._iteration(data_loader, ep, is_train=False) def loop(self, epochs, train_data, test_data, scheduler=None): for ep in range(1, epochs + 1): if scheduler is not None: scheduler.step() print("epochs: {}".format(ep)) self.train(train_data, ep) if (ep % self.save_freq == 0): self.save(ep) self.test(test_data, ep) def save(self, epoch, **kwargs): model_out_path = self.save_dir state = {"epoch": epoch, "weight": self.model.state_dict()} if not os.path.exists(model_out_path): os.makedirs(model_out_path) torch.save(state, model_out_path + '/ model_{epoch}.pkl'.format(epoch=epoch))