def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) elif args.network == 'mr18': print("mr18") model = myResnet18() else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): scheduler.step() if args.full_log: lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW_Accuracy', lfw_acc, epoch) full_log(epoch) start = datetime.now() # One epoch's training train_loss, train_top5_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger, writer=writer) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch) end = datetime.now() delta = end - start print('{} seconds'.format(delta.seconds)) # One epoch's validation lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW Accuracy', lfw_acc, epoch) # Check if there was an improvement is_best = lfw_acc > best_acc best_acc = max(lfw_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--mode", type=str, default="train") parser.add_argument("--model", type=str, default="mobilenet_v2") parser.add_argument("--dataset", type=str, default="cifar10") parser.add_argument("--dataroot", type=str, default="/tmp/data") parser.add_argument("--batch_size", type=int, default=128) parser.add_argument("--n_epochs", type=int, default=100) parser.add_argument("--lr", type=float, default=1e-3) parser.add_argument("--n_gpus", type=int, default=1) parser.add_argument("--checkpoint", type=str, default="/tmp/chkpt.pth.tar") parser.add_argument("--save_every", type=int, default=10) parser.add_argument("--pretrained", type=str, default=None) args = parser.parse_args() print(args) if torch.cuda.is_available(): print("cuda is available, use cuda") device = torch.device("cuda") else: print("cuda is not available, use cpu") device = torch.device("cpu") print("download dataset: {}".format(args.dataset)) train_loader, test_loader, n_classes = get_loaders( dataset=args.dataset, root=args.dataroot, batch_size=args.batch_size) print("build model: {}".format(args.model)) if args.model == "mobilenet": from models import MobileNet model = MobileNet(n_classes=n_classes) elif args.model == "mobilenet_v2": from models import MobileNet_v2 model = MobileNet_v2(n_classes=n_classes) elif args.model == "shufflenet": from models import ShuffleNet model = ShuffleNet(n_classes=n_classes) elif args.model == "shufflenet_v2": from models import ShuffleNet_v2 model = ShuffleNet_v2(n_classes=n_classes) elif args.model == "squeezenet": from models import SqueezeNet model = SqueezeNet(n_classes=n_classes) else: raise NotImplementedError model = model.to(device) if args.pretrained: model.load_state_dict(torch.load(args.checkpoint)) if args.n_gpus > 1: gpus = [] for i in range(args.n_gpus): gpus.append(i) model = nn.DataParallel(model, device_ids=gpus) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() if args.mode == "train": for epoch in range(args.n_epochs): train(epoch, model, optimizer, criterion, train_loader, device) if (epoch + 1) % args.save_every == 0: print("saving model...") torch.save(the_model.state_dict(), args.checkpoint) elif args.mode == "test": test(model, criterion, test_loader, device) else: raise NotImplementedError
elif backbone_network == 'WideResNet': from models import WideResNet model = WideResNet(n_layers=n_layers, widening_factor=opt.widening_factor, dataset=opt.dataset, attention=opt.attention_module, group_size=opt.group_size) model = nn.DataParallel(model).to(device) criterion = nn.CrossEntropyLoss() if dataset_name in ['CIFAR10', 'CIFAR100']: optim = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) milestones = [150, 225] elif dataset_name == 'ImageNet': optim = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) milestones = [30, 60] elif dataset_name == 'SVHN': optim = torch.optim.SGD(model.parameters(),
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], # lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) optimizer = InsightFaceOptimizer( torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', optimizer.lr, epoch) # One epoch's validation megaface_acc = megaface_test(model) writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch) # Check if there was an improvement is_best = megaface_acc > best_acc best_acc = max(megaface_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
def train_net(args): torch.manual_seed(7) #torch的随机种子,在torch.randn使用 np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() #tensorboard epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], # lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) optimizer = InsightFaceOptimizer( torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) #这里还需要自己加载进去 start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = Dataset(root=args.train_path, phase='train', input_shape=(3, 112, 112)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training # 这里写一个训练函数十分简练,值得学习 train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', optimizer.lr, epoch) # Save checkpoint if epoch % 10 == 0: save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc)