def get_model(): # Get model from config if config.model == "resnet18": model = models.resnet18(pretrained=config.pretrained) elif config.model == "resnet34": model = models.resnet34(pretrained=config.pretrained) elif config.model == 'resnet50': model = models.resnet50(pretrained=config.pretrained) elif config.model == "resnet101": model = models.resnet101(pretrained=config.pretrained) elif config.model == "resnet152": model = models.resnet152(pretrained=config.pretrained) elif config.model == "resnext50_32x4d": model = models.resnet34(pretrained=config.pretrained) elif config.model == 'resnext101_32x8d': model = models.resnet50(pretrained=config.pretrained) elif config.model == "wide_resnet50_2": model = models.resnet101(pretrained=config.pretrained) elif config.model == "wide_resnet101_2": model = models.resnet152(pretrained=config.pretrained) else: raise ValueError('%s not supported'.format(config.model)) # Initialize fc layer (in_features, out_features) = model.fc.in_features, model.fc.out_features model.fc = torch.nn.Linear(in_features, out_features) return model
def create_model(name, num_classes): if name == 'resnet34': model = models.resnet34(True) model.fc = nn.Linear(model.fc.in_features, num_classes) nn.init.xavier_uniform(model.fc.weight) nn.init.constant(model.fc.bias, 0) elif name == 'resnet152': model = models.resnet152(True) model.fc = nn.Linear(model.fc.in_features, num_classes) nn.init.xavier_uniform(model.fc.weight) nn.init.constant(model.fc.bias, 0) elif name == 'densenet121': model = models.densenet121(True) model.classifier = nn.Linear(model.classifier.in_features, num_classes) nn.init.xavier_uniform(model.classifier.weight) nn.init.constant(model.classifier.bias, 0) elif name == 'vgg11_bn': model = models.vgg11_bn(False, num_classes) elif name == 'vgg19_bn': model = models.vgg19_bn(True) model.classifier._modules['6'] = nn.Linear(model.classifier._modules['6'].in_features, num_classes) nn.init.xavier_uniform(model.classifier._modules['6'].weight) nn.init.constant(model.classifier._modules['6'].bias, 0) elif name == 'alexnet': model = models.alexnet(True) model.classifier._modules['6'] = nn.Linear(model.classifier._modules['6'].in_features, num_classes) nn.init.xavier_uniform(model.classifier._modules['6'].weight) nn.init.constant(model.classifier._modules['6'].bias, 0) else: model = Net(num_classes) return model
def __init__(self, nb_classes, encoder=None, decoder=None, latent_size=500): super(SoftVAE, self).__init__() # Encoder if encoder is None: encoder = resnet34() self.encoder = encoder # Latent variable intermediate_size = 512 * encoder.expansion * 7 * 7 self.fc1 = nn.Linear(intermediate_size, latent_size) self.fc2 = nn.Linear(intermediate_size, latent_size) # Classification self.fc3 = nn.Linear(intermediate_size, nb_classes) # Decoder self.fc4 = nn.Linear(latent_size + nb_classes, intermediate_size) if decoder is None: decoder = decoder31() self.decoder = decoder if encoder.expansion != decoder.expansion: raise ValueError('Encoder expansion, {} != Decoder expansion, {}' ''.format(encoder.expansion, decoder.expansion))
def model_config(net_type, num_classes, OOD_num_classes): if net_type == "resnet50": model = models.resnet50(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) elif net_type == "resnet34": model = models.resnet34(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) elif net_type == "vgg19": model = models.vgg19(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) elif net_type == "vgg16": model = models.vgg16(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) elif net_type == "vgg19_bn": model = models.vgg19_bn(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) elif net_type == "vgg16_bn": model = models.vgg16_bn(num_c=num_classes, num_cc=OOD_num_classes, pretrained=True) return model
def __init__(self, num_classes=2): super(new_model, self).__init__() self.pre_model = models.resnet34(pretrained=True) self.pre_model.fc = nn.Linear(512, 256) self.relu1 = nn.ReLU(inplace=True) self.new_fc1 = nn.Linear(256, 128) self.relu2 = nn.ReLU(inplace=True) self.new_fc2 = nn.Linear(128, num_classes)
def create_model(name, num_classes): if name == 'resnet34': model = models.resnet34(True) model.fc = nn.Linear(model.fc.in_features, num_classes) nn.init.xavier_uniform(model.fc.weight) nn.init.constant(model.fc.bias, 0) elif name == 'resnet50': model = models.resnet50(True) model.fc = nn.Linear(model.fc.in_features, num_classes) nn.init.xavier_uniform(model.fc.weight) nn.init.constant(model.fc.bias, 0) elif name == 'resnet152': model = models.resnet152(True) model.fc = nn.Linear(model.fc.in_features, num_classes) nn.init.xavier_uniform(model.fc.weight) nn.init.constant(model.fc.bias, 0) elif name == 'seresnet50': model = models.se_resnet50() model.last_linear = nn.Linear(model.last_linear.in_features, num_classes, bias=True) elif name == 'seresnet152': model = models.se_resnet152() model.last_linear = nn.Linear(model.last_linear.in_features, num_classes, bias=True) elif name == 'dpn131': model = models.dpn131() model.classifier = nn.Conv2d(2688, num_classes, kernel_size=1, bias=True) elif name == 'densenet121': model = models.densenet121(True) model.classifier = nn.Linear(model.classifier.in_features, num_classes) nn.init.xavier_uniform(model.classifier.weight) nn.init.constant(model.classifier.bias, 0) elif name == 'vgg11_bn': model = models.vgg11_bn(False, num_classes) elif name == 'vgg19_bn': model = models.vgg19_bn(True) model.classifier._modules['6'] = nn.Linear( model.classifier._modules['6'].in_features, num_classes) nn.init.xavier_uniform(model.classifier._modules['6'].weight) nn.init.constant(model.classifier._modules['6'].bias, 0) elif name == 'alexnet': model = models.alexnet(True) model.classifier._modules['6'] = nn.Linear( model.classifier._modules['6'].in_features, num_classes) nn.init.xavier_uniform(model.classifier._modules['6'].weight) nn.init.constant(model.classifier._modules['6'].bias, 0) else: model = Net(num_classes) return model
def demo_basic(rank, world_size, max_epochs=5, verbose=False): # map rank [0, 1, 2] => ['cuda:1', 'cuda:2', 'cuda:3'] gpu_rank = rank + 1 # create model and move it to GPU with id rank if torch.cuda.is_available(): device = torch.device('cuda:{}'.format(gpu_rank)) else: device = torch.device('cpu') model = resnet34().to(device) if torch.cuda.is_available(): ddp_model = DDP(model, device_ids=[gpu_rank]) else: ddp_model = DDP(model) data = get_mnist('~/data', rank, world_size) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(ddp_model.parameters(), lr=0.005) t = time.time() for epoch in range(1, max_epochs + 1): loss_list = [] total_count = 0 acc_count = 0 loader = data if rank == 0 or verbose: loader = tqdm(data, total=len(data)) for image, label in loader: image = image.to(device) label = label.to(device) optimizer.zero_grad() outputs = ddp_model(image) loss = criterion(outputs, label) loss.backward() optimizer.step() total_count += outputs.shape[0] correct = torch.argmax(outputs, dim=1).eq(label) acc_count += correct.sum().item() loss_list.append(loss.item()) if rank == 0 or verbose: print('epoch', epoch, 'acc', acc_count / total_count,\ 'loss', '{:.03}'.format(sum(loss_list) / len(loss_list))) # output if rank == 0 or verbose: t = time.time() - t print('Cost Time:', t, 'avg time', t / max_epochs)
def val(args): list_threhold = [0.5] model = models.resnet34() if args.ckpt: model.load_state_dict(torch.load(args.ckpt, map_location='cpu')['state_dict']) model = model.to(device) criterion = nn.BCEWithLogitsLoss() val_dataset = ECGDataset(data_path=config.train_data, train=False) val_dataloader = DataLoader(val_dataset, batch_size=config.batch_size, num_workers=4) for threshold in list_threhold: val_loss, val_f1,val_acc,val_recall,val_precision = val_epoch(model, criterion, val_dataloader, threshold) print('threshold %.2f val_loss:%0.6e val_f1:%.8f\n , val_acc: %.8f\n val_recall: %.8f\n val_precision : %.8f\n' % (threshold, val_loss, val_f1,val_acc,val_recall,val_precision))
def main(args): np.random.seed(432) torch.random.manual_seed(432) try: os.makedirs(args.outpath) except OSError: pass experiment_path = utils.get_new_model_path(args.outpath) print(experiment_path) train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs')) val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs')) scheduler = cyclical_lr(5, 1e-5, 2e-3) trainer = train.Trainer(train_writer, val_writer, scheduler=scheduler) train_transform = data.build_preprocessing() eval_transform = data.build_preprocessing() trainds, evalds = data.build_dataset(args.datadir, None) trainds.transform = train_transform evalds.transform = eval_transform model = models.resnet34() base_opt = torch.optim.Adam(model.parameters()) opt = SWA(base_opt, swa_start=30, swa_freq=10) trainloader = DataLoader(trainds, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) evalloader = DataLoader(evalds, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True) export_path = os.path.join(experiment_path, 'last.pth') best_lwlrap = 0 for epoch in range(args.epochs): print('Epoch {} - lr {:.6f}'.format(epoch, scheduler(epoch))) trainer.train_epoch(model, opt, trainloader, scheduler(epoch)) metrics = trainer.eval_epoch(model, evalloader) print('Epoch: {} - lwlrap: {:.4f}'.format(epoch, metrics['lwlrap'])) # save best model if metrics['lwlrap'] > best_lwlrap: best_lwlrap = metrics['lwlrap'] torch.save(model.state_dict(), export_path) print('Best metrics {:.4f}'.format(best_lwlrap)) opt.swap_swa_sgd()
def main(args): np.random.seed(432) torch.random.manual_seed(432) try: os.makedirs(args.outpath) except OSError: pass experiment_path = utils.get_new_model_path(args.outpath) train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs')) val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs')) trainer = train.Trainer(train_writer, val_writer) # todo: add config train_transform = data.build_preprocessing() eval_transform = data.build_preprocessing() trainds, evalds = data.build_dataset(args.datadir, None) trainds.transform = train_transform evalds.transform = eval_transform model = models.resnet34() opt = torch.optim.Adam(model.parameters()) trainloader = DataLoader(trainds, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) evalloader = DataLoader(evalds, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True) for epoch in range(args.epochs): trainer.train_epoch(model, opt, trainloader, 3e-4) metrics = trainer.eval_epoch(model, evalloader) state = dict( epoch=epoch, model_state_dict=model.state_dict(), optimizer_state_dict=opt.state_dict(), loss=metrics['loss'], lwlrap=metrics['lwlrap'], global_step=trainer.global_step, ) export_path = os.path.join(experiment_path, 'last.pth') torch.save(state, export_path)
def build_model(model_name, num_classes, pretrain): if model_name == 'resnet50': net = resnet50(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet18': net = resnet18(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet34': net = resnet34(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet101': net = resnet101(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet152': net = resnet152(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet50se': net = resnet50se(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet50dilated': net = resnet50_dilated(num_classes=num_classes, pretrain=pretrain) elif model_name == 'resnet50dcse': net = resnet50_dcse(num_classes=num_classes, pretrain=pretrain) else: print('wait a minute') return net
def load_model(model_name): if model_name == 'CNN_raw': model = models.ConvNet() model.weight_init(0, 0.02) elif model_name == 'CNN_CBAM': model = models.ConvNet_CBAM() model.weight_init(0, 0.02) elif model_name == 'ResNet18_raw': model = models.resnet18() elif model_name == 'ResNet18_CBAM': model = models.resnet18_CBAM() elif model_name == 'ResNet34_raw': model = models.resnet34() elif model_name == 'ResNet34_CBAM': model = models.resnet34_CBAM() elif model_name == 'ResNet50_raw': model = models.resnet50() elif model_name == 'ResNet50_CBAM': model = models.resnet50_CBAM(num_classes=200) else: raise RuntimeError('Unknown model type!') return model
def train(args, dataloader_train, device, dataset_validation=None): # Tensorflow logger writer = SummaryWriter(comment='_{}'.format(args.model_dir.name)) num_classes = dataloader_train.dataset.num_classes # loguru logger.info("num_classes: " + str(num_classes)) # Generator and classifier definition generator = resnet34(args) classifier = NeuralNetAMSM(args.emb_size, num_classes) generator.train() classifier.train() generator = generator.to(device) classifier = classifier.to(device) # Load the trained model if we continue from a checkpoint start_iteration = 0 if args.checkpoint > 0: start_iteration = args.checkpoint for model, modelstr in [(generator, 'g'), (classifier, 'c')]: model.load_state_dict(torch.load(args.checkpoints_dir / f'{modelstr}_{args.checkpoint}.pt')) elif args.checkpoint == -1: start_iteration = max([int(filename.stem[2:]) for filename in args.checkpoints_dir.iterdir()]) for model, modelstr in [(generator, 'g'), (classifier, 'c')]: model.load_state_dict(torch.load(args.checkpoints_dir / f'{modelstr}_{start_iteration}.pt')) # Optimizer definition optimizer = torch.optim.SGD([{'params': generator.parameters(), 'lr': args.generator_lr}, {'params': classifier.parameters(), 'lr': args.classifier_lr}], momentum=args.momentum) criterion = nn.CrossEntropyLoss() # multi GPU support : if args.multi_gpu: dpp_generator = nn.DataParallel(generator).to(device) if dataset_validation is not None: best_eer = {v.name:{'eer':100, 'ite':-1} for v in dataset_validation.trials} # best eer of all iterations start = time.process_time() for iterations in range(start_iteration, args.num_iterations + 1): # The current iteration is specified in the scheduler # Reduce the learning rate by the given factor (args.scheduler_lambda) if iterations in args.scheduler_steps: for params in optimizer.param_groups: params['lr'] *= args.scheduler_lambda print(optimizer) avg_loss = 0 for feats, spk, utt in dataloader_train: feats = feats.unsqueeze(1).to(device) spk = torch.LongTensor(spk).to(device) # Creating embeddings if args.multi_gpu: embeds = dpp_generator(feats) else: embeds = generator(feats) # Classify embeddings preds = classifier(embeds, spk) # Calc the loss loss = criterion(preds, spk) # Backpropagation optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() avg_loss /= len(dataloader_train) # Write the loss in tensorflow writer.add_scalar('Loss', avg_loss, iterations) # loguru logging : if iterations % args.log_interval == 0: msg = "{}: {}: [{}/{}] \t C-Loss:{:.4f}, lr: {}, bs: {}".format(args.model_dir, time.ctime(), iterations, args.num_iterations, avg_loss, get_lr(optimizer), args.batch_size ) logger.info(msg) # Saving checkpoint if iterations % args.checkpoint_interval == 0: for model, modelstr in [(generator, 'g'), (classifier, 'c')]: model.eval().cpu() cp_model_path = args.checkpoints_dir / f"{modelstr}_{iterations}.pt" torch.save(model.state_dict(), cp_model_path) model.to(device).train() # Testing the saved model if dataset_validation is not None: logger.info('Model Evaluation') test_res = score_utt_utt(generator, dataset_validation, device) for veri_pair, res in test_res.items(): eer = res['eer'] logger.info(f'EER on {veri_pair}: {eer}') writer.add_scalar(f'{veri_pair}_EER', eer, iterations) if eer < best_eer[veri_pair]["eer"]: best_eer[veri_pair]["eer"] = eer best_eer[veri_pair]["ite"] = iterations msg = "" for veri, vals in best_eer.items(): msg += f"\nBest score for {veri} is at iteration {vals['ite']} : {vals['eer']} eer" logger.success(msg) logger.info(f"Saved checkpoint at iteration {iterations}") # Final model saving for model, modelstr in [(generator, 'g'), (classifier, 'c')]: model.eval().cpu() cp_filename = "final_{}_{}.pt".format(modelstr, iterations) cp_model_path = args.model_dir / cp_filename torch.save(model.state_dict(), cp_model_path) logger.success(f'Training complete in {time.process_time()-start} seconds')
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = float('-inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNetV2() else: raise TypeError('network {} is not supported.'.format( args.network)) # print(model) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_acc = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_acc', train_acc, epoch) # One epoch's validation lfw_acc, threshold = lfw_test(model) writer.add_scalar('model/valid_acc', lfw_acc, epoch) writer.add_scalar('model/valid_thres', threshold, epoch) # Check if there was an improvement is_best = lfw_acc > best_acc best_acc = max(lfw_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best) scheduler.step(epoch)
def main(): start_epoch = 0 save_model = "./save_model" tensorboard_dir = "./tensorboard/OOD" # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config train_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "train") train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) test_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "test") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, pretrained=True) # optimizer = optim.Adam(model.parameters(), lr=args.init_lr, weight_decay=1e-5) optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs * len(train_loader), ) if args.resume == True: print("load checkpoint_last") checkpoint = torch.load(os.path.join(save_model, "checkpoint_last.pth.tar")) ##### load model model.load_state_dict(checkpoint["model"]) start_epoch = checkpoint["epoch"] optimizer = optim.Adam(model.parameters(), lr = checkpoint["init_lr"]) #### loss config criterion = nn.BCEWithLogitsLoss() #### create folder Path(os.path.join(save_model, env, args.net_type)).mkdir(exist_ok=True, parents=True) if args.board_clear == True: files = glob.glob(tensorboard_dir+"/*") for f in files: shutil.rmtree(f) i = 0 while True: if Path(os.path.join(tensorboard_dir, str(i))).exists() == True: i += 1 else: Path(os.path.join(tensorboard_dir, str(i))).mkdir(exist_ok=True, parents=True) break summary = SummaryWriter(os.path.join(tensorboard_dir, str(i))) # Start training j=0 best_score=0 score = 0 for epoch in range(start_epoch, args.num_epochs): for i in range(args.num_classes): locals()["train_label{}".format(i)] = 0 locals()["test_label{}".format(i)] = 0 total_loss = 0 train_acc = 0 test_acc = 0 stime = time.time() for i, train_data in enumerate(train_loader): #### initialized org_image = train_data['input'].to(device) gt = train_data['label'].type(torch.FloatTensor).to(device) model = model.to(device).train() optimizer.zero_grad() #### forward path output = model(org_image) #### calc loss class_loss = criterion(output, gt) #### calc accuracy train_acc += sum(torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["train_label{}".format(label)] += 1 with autograd.detect_anomaly(): class_loss.backward() optimizer.step() scheduler.step() total_loss += class_loss.item() with torch.no_grad(): for i, test_data in enumerate(test_loader): org_image = test_data['input'].to(device) gt = test_data['label'].type(torch.FloatTensor).to(device) model = model.to(device).eval() #### forward path output = model(org_image) gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["test_label{}".format(label)] += 1 test_acc += sum(torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() print('Epoch [{}/{}], Step {}, loss = {:.4f}, exe time: {:.2f}, lr: {:.4f}*e-4' .format(epoch, args.num_epochs, i+1, total_loss/len(train_loader), time.time() - stime, scheduler.get_last_lr()[0] * 10 ** 4)) print("train accuracy total : {:.4f}".format(train_acc/train_data.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}" .format(num, locals()["train_label{}".format(num)]/train_data.len_list[num]) , end=" ") print() print("test accuracy total : {:.4f}".format(test_acc/test_data.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}" .format(num, locals()["test_label{}".format(num)]/test_data.len_list[num]) , end=" ") print("\n") summary.add_scalar('loss/loss', total_loss/len(train_loader), epoch) summary.add_scalar('acc/train_acc', train_acc/train_data.num_image, epoch) summary.add_scalar('acc/test_acc', test_acc/test_data.num_image, epoch) summary.add_scalar("learning_rate/lr", scheduler.get_last_lr()[0], epoch) time.sleep(0.001) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'init_lr' : scheduler.get_last_lr()[0] }, os.path.join(save_model, env,args.net_type, 'checkpoint_last.pth.tar'))
def train_net(args): torch.manual_seed(7) np.random.seed(7) best_loss = 100000 torch.manual_seed(7) np.random.seed(7) checkpoint = None start_epoch = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r100': model = resnet101(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r18': model = resnet18(args) else: # 'face' model = resnet50(args) optimizer = torch.optim.SGD(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to GPU, if available model = model.to(device) # Loss function if args.focal_loss: age_criterion = FocalLoss(gamma=args.gamma).to(device) gender_criterion = FocalLoss(gamma=args.gamma).to(device) else: age_criterion = nn.CrossEntropyLoss().to(device) gender_criterion = nn.CrossEntropyLoss().to(device) criterion_info = (age_criterion, gender_criterion, args.age_weight) # Custom dataloaders train_dataset = AgeGenDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_dataset = AgeGenDataset('valid') val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) # Epochs for epoch in range(start_epoch, epochs): scheduler.step() # One epoch's training train_loss, train_gen_accs, train_age_mae = train(train_loader=train_loader, model=model, criterion_info=criterion_info, optimizer=optimizer, epoch=epoch) writer.add_scalar('Train Loss', train_loss, epoch) writer.add_scalar('Train Gender Accuracy', train_gen_accs, epoch) writer.add_scalar('Train Age MAE', train_age_mae, epoch) # One epoch's validation valid_loss, valid_gen_accs, valid_age_mae = validate(val_loader=val_loader, model=model, criterion_info=criterion_info) writer.add_scalar('Valid Loss', valid_loss, epoch) writer.add_scalar('Valid Gender Accuracy', valid_gen_accs, epoch) writer.add_scalar('Valid Age MAE', valid_age_mae, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)
train_dataloader = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True, num_workers=num_workers) val_dataloader = DataLoader(dataset=val_data, batch_size=args.batch_size, shuffle=False, num_workers=num_workers) test_dataloader = DataLoader(dataset=test_data, batch_size=args.batch_size, shuffle=False, num_workers=num_workers) ####model model = models.resnet34(pretrained=True) ######参数修改 for param in model.parameters(): param.requires_grad = False model.fc = nn.Sequential(nn.Linear(512, 256), nn.ReLU(inplace=True), nn.Linear(256, 2)) # for param in model.parameters(): # print(param.requires_grad) ####loss and optim criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.fc.parameters(), lr=args.learning_rate)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) elif args.network == 'mr18': print("mr18") model = myResnet18() else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): scheduler.step() if args.full_log: lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW_Accuracy', lfw_acc, epoch) full_log(epoch) start = datetime.now() # One epoch's training train_loss, train_top5_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger, writer=writer) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch) end = datetime.now() delta = end - start print('{} seconds'.format(delta.seconds)) # One epoch's validation lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW Accuracy', lfw_acc, epoch) # Check if there was an improvement is_best = lfw_acc > best_acc best_acc = max(lfw_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
x *= 255 if K.image_data_format() == 'channels_first': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x # build the VGG16 network with ImageNet weights # model = vgg16.VGG16(weights='imagenet', include_top=False) # this is the placeholder for the input images input_img = keras.Input(shape=(None, None, 3)) # model = vgg16.VGG16(weights='imagenet', include_top=False) # input_img = model.input model = models.resnet34(input_img, classes=10) print('Model loaded.') model.summary() # get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) x_train, y_train, x_test, y_test = load_cifar10_data(img_width, img_height) img_ = x_train[7, :, :, :] layer_output = layer_dict[layer_name].output # Specify the layer to want to visualize
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % ( args.arch, args.batch_size, args.n_epoch) if args.pretrain: if 'synth' in args.pretrain: args.checkpoint += "_pretrain_synth" else: args.checkpoint += "_pretrain_ic17" print('checkpoint path: %s' % args.checkpoint) print('init lr: %.8f' % args.lr) print('schedule: ', args.schedule) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) kernel_num = 7 min_scale = 0.4 start_epoch = 0 data_loader = CTW1500Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale) #train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) #resnet18 and 34 didn't inplement pretrained elif args.arch == "resnet18": model = models.resnet18(pretrained=False, num_classes=kernel_num) elif args.arch == "resnet34": model = models.resnet34(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv2": model = models.resnet152(pretrained=True, num_classes=kernel_num) elif args.arch == "mobilenetv3large": model = models.mobilenetv3_large(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv3small": model = models.mobilenetv3_small(pretrained=False, num_classes=kernel_num) optimizer = tf.keras.optimizers.SGD(learning_rate=args.lr, momentum=0.99, decay=5e-4) title = 'CTW1500' if args.pretrain: print('Using pretrained model.') assert os.path.isfile( args.pretrain), 'Error: no checkpoint directory found!' logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') model.load_weights(args.resume) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): optimizer = get_new_optimizer(args, optimizer, epoch) print( '\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.get_config()['learning_rate'])) train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss,\ optimizer, epoch) model.save_weights('%s%s' % (args.checkpoint, '/model_tf/weights')) logger.append([ optimizer.get_config()['learning_rate'], train_loss, train_te_acc, train_te_iou ]) logger.close()
def main(args): np.random.seed(432) torch.random.manual_seed(432) try: os.makedirs(args.outpath) except OSError: pass experiment_path = utils.get_new_model_path(args.outpath) print(experiment_path) train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs')) val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs')) trainer = train.Trainer(train_writer, val_writer) # todo: add config train_transform = data.build_preprocessing() eval_transform = data.build_preprocessing() trainds, evalds = data.build_dataset(args.datadir, None) trainds.transform = train_transform evalds.transform = eval_transform model = models.resnet34() opt = torch.optim.Adam(model.parameters(), lr=1e-8) trainloader = DataLoader(trainds, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) evalloader = DataLoader(evalds, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True) #find lr fast ai criterion = torch.nn.BCEWithLogitsLoss() lr_finder = LRFinder(model, opt, criterion, device="cuda") # lr_finder.range_test(trainloader, val_loader=evalloader, end_lr=1, num_iter=10, step_mode="exp") lr_finder.range_test(trainloader, end_lr=100, num_iter=100, step_mode="exp") #plot graph fast ai skip_start = 6 skip_end = 3 lrs = lr_finder.history["lr"] losses = lr_finder.history["loss"] grad_norm = lr_finder.history["grad_norm"] # ind = grad_norm.index(min(grad_norm)) # opt_lr = lrs[ind] # print('LR with min grad_norm =', opt_lr) lrs = lrs[skip_start:-skip_end] losses = losses[skip_start:-skip_end] fig = plt.figure(figsize=(12, 9)) plt.plot(lrs, losses) plt.xscale("log") plt.xlabel("Learning rate") plt.ylabel("Loss") train_writer.add_figure('loss_vs_lr', fig) lr_finder.reset() # fixed_lr = 1e-3 fixed_lr = 3e-4 opt = torch.optim.Adam(model.parameters(), lr=fixed_lr) # #new # lr = 1e-3 # eta_min = 1e-5 # t_max = 10 # opt = torch.optim.Adam(model.parameters(), lr=lr) # scheduler = CosineAnnealingLR(opt, T_max=t_max, eta_min=eta_min) # #new # one cycle for 5 ehoches # scheduler = CosineAnnealingLR(opt, 519*4, eta_min=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs) # scheduler = CosineAnnealingLR(opt, 519, eta_min=1e-5) # scheduler = StepLR(opt, step_size=3, gamma=0.1) state_list = [] for epoch in range(args.epochs): # t = epoch / args.epochs # lr = np.exp((1 - t) * np.log(lr_begin) + t * np.log(lr_end)) # выставляем lr для всех параметров trainer.train_epoch(model, opt, trainloader, fixed_lr, scheduler) # trainer.train_epoch(model, opt, trainloader, 3e-4, scheduler) # trainer.train_epoch(model, opt, trainloader, 9.0451e-4, scheduler) metrics = trainer.eval_epoch(model, evalloader) state = dict( epoch=epoch, model_state_dict=model.state_dict(), optimizer_state_dict=opt.state_dict(), loss=metrics['loss'], lwlrap=metrics['lwlrap'], global_step=trainer.global_step, ) state_copy = copy.deepcopy(state) state_list.append(state_copy) export_path = os.path.join(experiment_path, 'last.pth') torch.save(state, export_path) # save the best path best_export_path = os.path.join(experiment_path, 'best.pth') max_lwlrap = 0 max_lwlrap_ind = 0 for i in range(args.epochs): if state_list[i]['lwlrap'] > max_lwlrap: max_lwlrap = state_list[i]['lwlrap'] max_lwlrap_ind = i best_state = state_list[max_lwlrap_ind] torch.save(best_state, best_export_path)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], # lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) optimizer = InsightFaceOptimizer( torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', optimizer.lr, epoch) # One epoch's validation megaface_acc = megaface_test(model) writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch) # Check if there was an improvement is_best = megaface_acc > best_acc best_acc = max(megaface_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
import models import torch num_classes = 18 inputs = torch.rand([1, 3, 224, 224]) test = models.resnet34(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.resnet50(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.resnet101(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.resnet152(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.alexnet(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.densenet121(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.densenet169(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.densenet201(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok') test = models.densenet201(num_classes=num_classes, pretrained='imagenet') assert test(inputs).size()[1] == num_classes print('ok')
def main(): start_epoch = 0 if args.metric: save_model = "./save_model_" + args.dataset + "_metric" tensorboard_dir = "./tensorboard/OOD_" + args.dataset else: save_model = "./save_model_" + args.dataset tensorboard_dir = "./tensorboard/OOD_" + args.dataset # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config train_dataset = load_data.Dog_metric_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "train", soft_label=args.soft_label) MySampler = customSampler(train_dataset, args.batch_size, args.num_instances) train_loader = torch.utils.data.DataLoader(train_dataset, # batch_size=args.batch_size, batch_sampler= MySampler, num_workers=2) test_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "test") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2) out_test_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "OOD") out_test_loader = torch.utils.data.DataLoader(out_test_dataset, batch_size=8, shuffle=True, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, pretrained=True) optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs * len(train_loader)//50, eta_min=args.init_lr/10) if args.resume == True: print("load checkpoint_last") checkpoint = torch.load(os.path.join(save_model, "checkpoint_last.pth.tar")) ##### load model model.load_state_dict(checkpoint["model"]) start_epoch = checkpoint["epoch"] optimizer = optim.SGD(model.parameters(), lr = checkpoint["init_lr"]) #### loss config criterion = nn.BCEWithLogitsLoss() triplet = torch.nn.TripletMarginLoss(margin=0.5, p=2) #### create folder Path(os.path.join(save_model, env, args.net_type)).mkdir(exist_ok=True, parents=True) if args.board_clear == True: files = glob.glob(tensorboard_dir+"/*") for f in files: shutil.rmtree(f) i = 0 while True: if Path(os.path.join(tensorboard_dir, str(i))).exists() == True: i += 1 else: Path(os.path.join(tensorboard_dir, str(i))).mkdir(exist_ok=True, parents=True) break summary = SummaryWriter(os.path.join(tensorboard_dir, str(i))) # Start training j=0 best_score=0 score = 0 triplet_loss = torch.tensor(0) membership_loss = torch.tensor(0) for epoch in range(start_epoch, args.num_epochs): for i in range(args.num_classes): locals()["train_label{}".format(i)] = 0 locals()["test_label{}".format(i)] = 0 total_loss = 0 triplet_running_loss = 0 membership_running_loss = 0 class_running_loss = 0 train_acc = 0 test_acc = 0 stime = time.time() for i, train_data in enumerate(train_loader): #### initialized org_image = train_data['input'] + 0.01 * torch.randn_like(train_data['input']) org_image = org_image.to(device) model = model.to(device).train() gt = train_data['label'].type(torch.FloatTensor).to(device) optimizer.zero_grad() #### forward path output, output_list = model.feature_list(org_image) if args.metric: target_layer = output_list[-1] negative_list = [] for batch_idx in range(args.batch_size): gt_arg = gt.argmax(dim=1) negative = (gt_arg != gt_arg[batch_idx]) if batch_idx == 0: negative_tensor = target_layer[np.random.choice(np.where(negative.cpu().numpy() == True)[0], 1)[0]] positive_tensor = target_layer[np.random.choice(np.delete( np.where(~negative.cpu().numpy() == True)[0],np.where(np.where(~negative.cpu().numpy() == True)[0] == batch_idx)), 1)[0]] negative_tensor = torch.unsqueeze(negative_tensor, dim=0) positive_tensor = torch.unsqueeze(positive_tensor, dim=0) else: tmp_negative_tensor = target_layer[np.random.choice(np.where(negative.cpu().numpy() == True)[0], 1)[0]] negative_tensor = torch.cat((negative_tensor, torch.unsqueeze(tmp_negative_tensor, dim=0)), dim=0) tmp_positive_tensor = target_layer[np.random.choice(np.delete( np.where(~negative.cpu().numpy() == True)[0],np.where(np.where(~negative.cpu().numpy() == True)[0] == batch_idx)), 1)[0]] positive_tensor = torch.cat((positive_tensor, torch.unsqueeze(tmp_positive_tensor, dim=0)), dim=0) triplet_loss = 0.5 * triplet(target_layer, positive_tensor, negative_tensor) if args.membership: R_wrong = 0 R_correct = 0 gt_idx = torch.argmax(gt, dim=1) for batch_idx, which in enumerate(gt_idx): for idx in range(args.num_classes): output_sigmoid = torch.sigmoid(output) if which == idx: R_wrong += (1 - output_sigmoid[batch_idx][idx]) ** 2 else: R_correct += output_sigmoid[batch_idx][idx] / (args.num_classes-1) membership_loss = (R_wrong + R_correct) / args.batch_size #### calc loss class_loss = criterion(output, gt) #### calc accuracy train_acc += sum(torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["train_label{}".format(label)] += 1 total_backward_loss = class_loss + triplet_loss + membership_loss total_backward_loss.backward() optimizer.step() scheduler.step() class_running_loss += class_loss.item() triplet_running_loss += triplet_loss.item() membership_running_loss += membership_loss.item() total_loss += total_backward_loss.item() with torch.no_grad(): for i, test_data in enumerate(test_loader): org_image = test_data['input'].to(device) model = model.to(device).eval() gt = test_data['label'].type(torch.FloatTensor).to(device) #### forward path output = model(org_image) gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["test_label{}".format(label)] += 1 test_acc += sum(torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() print('Epoch [{}/{}], Step {}, class_loss = {:.4f}, membership_loss = {:.4f}, total_loss = {:.4f}, exe time: {:.2f}, lr: {:.4f}*e-4' .format(epoch, args.num_epochs, i+1, class_running_loss/len(train_loader), membership_running_loss/len(train_loader), total_loss/len(train_loader), time.time() - stime, scheduler.get_last_lr()[0] * 10 ** 4)) print("train accuracy total : {:.4f}".format(train_acc/(len(MySampler)*args.batch_size))) # print("train accuracy total : {:.4f}".format(train_acc/train_dataset.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}" .format(num, locals()["train_label{}".format(num)]/train_dataset.len_list[num]) , end=" ") print() print("test accuracy total : {:.4f}".format(test_acc/test_dataset.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}" .format(num, locals()["test_label{}".format(num)]/test_dataset.len_list[num]) , end=" ") print("\n") if epoch % 10 == 9: best_TNR, best_AUROC = test_ODIN(model, test_loader, out_test_loader, args.net_type, args) summary.add_scalar('AD_acc/AUROC', best_AUROC, epoch) summary.add_scalar('AD_acc/TNR', best_TNR, epoch) summary.add_scalar('loss/loss', total_loss/len(train_loader), epoch) summary.add_scalar('loss/membership_loss', membership_running_loss/len(train_loader), epoch) summary.add_scalar('acc/train_acc', train_acc/train_dataset.num_image, epoch) summary.add_scalar('acc/test_acc', test_acc/test_dataset.num_image, epoch) summary.add_scalar("learning_rate/lr", scheduler.get_last_lr()[0], epoch) time.sleep(0.001) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'init_lr' : scheduler.get_last_lr()[0] }, os.path.join(save_model, env, args.net_type, 'checkpoint_last.pth.tar')) scheduler.step()
self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(180, 50) self.fc2 = nn.Linear(50, 8) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 180) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x, dim=1) if args.model == 'resnet34': model = models.resnet34(True) model.fc = nn.Linear(2048, FashionAI.AttrKey[args.attribute]) else: model = Net() save_folder = os.path.join(os.path.expanduser('.'), 'save', args.attribute, args.model) if os.path.exists(os.path.join(save_folder, args.model + '_checkpoint.pth')): start_epoch = torch.load( os.path.join(save_folder, args.model + '_checkpoint.pth')) model.load_state_dict( torch.load( os.path.join(save_folder, args.model + '_' + str(start_epoch) + '.pth'))) else:
def main(): start_epoch = 0 save_model = "./save_model_dis/fine" pretrained_model_dir = "./save_model_dis/pre_training" tensorboard_dir = "./tensorboard/OOD_dis/fine/" + args.dataset # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config train_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "train") train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) test_dataset = load_data.Dog_dataloader(image_dir = image_dir, num_class = args.num_classes, mode = "test") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.num_classes, shuffle=True, num_workers=2) out_train_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="OOD_val") out_train_loader = torch.utils.data.DataLoader(out_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) out_test_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="OOD") out_test_loader = torch.utils.data.DataLoader(out_test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, pretrained=True) if args.load == True: print("loading model") checkpoint = torch.load(os.path.join(pretrained_model_dir, args.pretrained_model, "checkpoint_last_pre.pth.tar")) ##### load model model.load_state_dict(checkpoint["model"]) optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs * len(train_loader)) #### loss config criterion = nn.BCEWithLogitsLoss() #### create folder Path(os.path.join(save_model, env, args.net_type)).mkdir(exist_ok=True, parents=True) if args.board_clear == True: files = glob.glob(tensorboard_dir+"/*") for f in files: shutil.rmtree(f) i = 0 while True: if Path(os.path.join(tensorboard_dir, str(i))).exists() == True: i += 1 else: Path(os.path.join(tensorboard_dir, str(i))).mkdir(exist_ok=True, parents=True) break summary = SummaryWriter(os.path.join(tensorboard_dir, str(i))) # Start training j=0 best_score=0 score = 0 for epoch in range(start_epoch, args.num_epochs): total_class_loss = 0 total_dis_loss = 0 train_acc = 0 test_acc = 0 stime = time.time() model.eval().to(device) with torch.no_grad(): for i, test_data in enumerate(test_loader): org_image = test_data['input'].to(device) gt = test_data['label'].type(torch.FloatTensor).to(device) out1, out2 = model.dis_forward(org_image) score_1 = nn.functional.softmax(out1, dim=1) score_2 = nn.functional.softmax(out2, dim=1) dist = torch.sum(torch.abs(score_1 - score_2), dim=1).reshape((org_image.shape[0], )) if i == 0: dists = dist labels = torch.zeros((org_image.shape[0],)) else: dists = torch.cat((dists, dist), dim=0) labels = torch.cat((labels, torch.zeros((org_image.shape[0]))), dim=0) test_acc += sum(torch.argmax(torch.sigmoid(out1), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() test_acc += sum(torch.argmax(torch.sigmoid(out2), dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() for i, out_org_data in enumerate(out_test_loader): out_org_image = out_org_data['input'].to(device) out1, out2 = model.dis_forward(out_org_image) score_1 = nn.functional.softmax(out1, dim=1) score_2 = nn.functional.softmax(out2, dim=1) dist = torch.sum(torch.abs(score_1 - score_2), dim=1).reshape((out_org_image.shape[0], -1)) dists = torch.cat((dists, dist), dim=0) labels = torch.cat((labels, torch.ones((out_org_image.shape[0]))), dim=0) roc = evaluate(labels.cpu(), dists.cpu(), metric='roc') print('Epoch{} AUROC: {:.3f}, test accuracy : {:.4f}'.format(epoch, roc, test_acc/test_dataset.num_image/2)) for i, (org_data, out_org_data) in enumerate(zip(train_loader, out_train_loader)): #### initialized org_image = org_data['input'].to(device) out_org_image = out_org_data['input'].to(device) model = model.to(device).train() gt = org_data['label'].type(torch.FloatTensor).to(device) optimizer.zero_grad() #### forward path out1, out2 = model.dis_forward(org_image) #### calc accuracy train_acc += sum(torch.argmax(out1, dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() train_acc += sum(torch.argmax(out2, dim=1) == torch.argmax(gt, dim=1)).cpu().detach().item() #### calc loss class1_loss = criterion(out1, gt) class2_loss = criterion(out2, gt) out1, out2 = model.dis_forward(out_org_image) dis_loss = DiscrepancyLoss(out1, out2, args.m) loss = class1_loss + class2_loss + dis_loss total_class_loss += class1_loss.item() + class2_loss.item() total_dis_loss += dis_loss.item() loss.backward() optimizer.step() scheduler.step() print('Epoch [{}/{}], Step {}, class_loss = {:.4f}, dis_loss = {:.4f}, exe time: {:.2f}, lr: {:.4f}*e-4' .format(epoch, args.num_epochs, i+1, total_class_loss/len(out_train_loader), dis_loss/len(out_train_loader), time.time() - stime, scheduler.get_last_lr()[0] * 10 ** 4)) summary.add_scalar('loss/class_loss', total_class_loss/len(train_loader), epoch) summary.add_scalar('loss/dis_loss', total_dis_loss/len(train_loader), epoch) summary.add_scalar('acc/roc', roc, epoch) summary.add_scalar("learning_rate/lr", scheduler.get_last_lr()[0], epoch) time.sleep(0.001) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'init_lr' : scheduler.get_last_lr()[0] }, os.path.join(save_model, env, args.net_type, 'checkpoint_last_fine.pth.tar'))
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = float('-inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': from mobilenet_v2 import MobileNetV2 model = MobileNetV2() else: raise TypeError('network {} is not supported.'.format( args.network)) metric_fc = ArcMarginModel(args) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] model = nn.DataParallel(model) metric_fc = nn.DataParallel(metric_fc) # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=num_workers) # Epochs for epoch in range(start_epoch, args.end_epoch): # Decay learning rate if there is no improvement for 2 consecutive epochs, and terminate training after 10 if epochs_since_improvement == 10: break if epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0: checkpoint = 'BEST_checkpoint.tar' checkpoint = torch.load(checkpoint) model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] adjust_learning_rate(optimizer, 0.5) # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch) lr = optimizer.param_groups[0]['lr'] print('\nCurrent effective learning rate: {}\n'.format(lr)) # print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', lr, epoch) if epoch % 5 == 0: # One epoch's validation megaface_acc = megaface_test(model) writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch) # Check if there was an improvement is_best = megaface_acc > best_acc best_acc = max(megaface_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = float('-inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) else: raise TypeError('network {} is not supported.'.format( args.network)) if args.pretrained: model.load_state_dict(torch.load('insight-face-v3.pt')) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, nesterov=True, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma) else: criterion = nn.CrossEntropyLoss() # Custom dataloaders # train_dataset = ArcFaceDataset('train') # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, # num_workers=num_workers) train_dataset = ArcFaceDatasetBatched('train', img_batch_size) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size // img_batch_size, shuffle=True, num_workers=num_workers, collate_fn=batched_collate_fn) scheduler = MultiStepLR(optimizer, milestones=[8, 16, 24, 32], gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): lr = optimizer.param_groups[0]['lr'] logger.info('\nCurrent effective learning rate: {}\n'.format(lr)) # print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/learning_rate', lr, epoch) # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) scheduler.step(epoch) if args.eval_ds == "LFW": from lfw_eval import lfw_test # One epochs's validata accuracy, threshold = lfw_test(model) elif args.eval_ds == "Megaface": from megaface_eval import megaface_test accuracy = megaface_test(model) else: accuracy = -1 writer.add_scalar('model/evaluation_accuracy', accuracy, epoch) # Check if there was an improvement is_best = accuracy > best_acc best_acc = max(accuracy, best_acc) if not is_best: epochs_since_improvement += 1 logger.info("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best, scheduler)
def main(): start_epoch = 0 pretrained_model = os.path.join("./pre_trained", args.dataset, args.net_type + ".pth.tar") save_model = "./save_model_dis/pre_training" tensorboard_dir = "./tensorboard/OOD_dis/pre_training" + args.dataset # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config train_dataset = load_data.Dog_metric_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="train", soft_label=args.soft_label) if args.custom_sampler: MySampler = load_data.customSampler(train_dataset, args.batch_size, args.num_instances) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=MySampler, num_workers=2) else: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) test_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="test") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2) out_test_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="OOD") out_test_loader = torch.utils.data.DataLoader(out_test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2) if args.transfer: ### perfectly OOD data OOD_dataset = load_data.Dog_dataloader(image_dir=OOD_dir, num_class=args.OOD_num_classes, mode="OOD") OOD_loader = torch.utils.data.DataLoader(OOD_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, pretrained=True) elif args.net_type == "vgg19": model = models.vgg19(num_c=args.num_classes, pretrained=True) elif args.net_type == "vgg16": model = models.vgg16(num_c=args.num_classes, pretrained=True) elif args.net_type == "vgg19_bn": model = models.vgg19_bn(num_c=args.num_classes, pretrained=True) elif args.net_type == "vgg16_bn": model = models.vgg16_bn(num_c=args.num_classes, pretrained=True) if args.transfer: extra_fc = nn.Linear(2048, args.num_classes + args.OOD_num_classes) if args.load == True: print("loading model") checkpoint = torch.load(pretrained_model) ##### load model model.load_state_dict(checkpoint["model"]) batch_num = len( train_loader) / args.batch_size if args.custom_sampler else len( train_loader) optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.num_epochs * batch_num) #### loss config criterion = nn.BCEWithLogitsLoss() #### create folder Path(os.path.join(save_model, env, args.net_type)).mkdir(exist_ok=True, parents=True) if args.board_clear == True: files = glob.glob(tensorboard_dir + "/*") for f in files: shutil.rmtree(f) i = 0 while True: if Path(os.path.join(tensorboard_dir, str(i))).exists() == True: i += 1 else: Path(os.path.join(tensorboard_dir, str(i))).mkdir(exist_ok=True, parents=True) break summary = SummaryWriter(os.path.join(tensorboard_dir, str(i))) # Start training j = 0 best_score = 0 score = 0 membership_loss = torch.tensor(0) transfer_loss = torch.tensor(0) for epoch in range(start_epoch, args.num_epochs): running_loss = 0 running_membership_loss = 0 running_transfer_loss = 0 running_class_loss = 0 train_acc = 0 test_acc = 0 stime = time.time() # for i, (train_data, OOD_data) in enumerate(zip(train_loader, OOD_loader)): for i, train_data in enumerate(train_loader): #### initialized org_image = train_data['input'] + 0.01 * torch.randn_like( train_data['input']) org_image = org_image.to(device) gt = train_data['label'].type(torch.FloatTensor).to(device) model = model.to(device).train() optimizer.zero_grad() #### forward path out1, out2 = model.pendis_forward(org_image) if args.membership: membership_loss = ( Membership_loss(out2, gt, args.num_classes) + Membership_loss(out1, gt, args.num_classes)) running_membership_loss += membership_loss.item() if args.transfer: extra_fc = extra_fc.to(device).train() OOD_image = ( OOD_data['input'] + 0.01 * torch.randn_like(OOD_data['input'])).to(device) OOD_gt = torch.cat( (torch.zeros(args.batch_size, args.num_classes), OOD_data['label'].type(torch.FloatTensor)), dim=1).to(device) #### forward path _, feature = model.gen_forward(OOD_image) OOD_output = extra_fc(feature) transfer_loss = criterion(OOD_output, OOD_gt) running_transfer_loss += transfer_loss.item() #### calc loss class1_loss = criterion(out1, gt) class2_loss = criterion(out2, gt) class_loss = (class1_loss + class2_loss) total_loss = class_loss + membership_loss * 0.3 + transfer_loss #### calc accuracy train_acc += sum( torch.argmax(out1, dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() train_acc += sum( torch.argmax(out2, dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() total_loss.backward() optimizer.step() scheduler.step() running_class_loss += class_loss.item() running_loss += total_loss.item() with torch.no_grad(): for i, test_data in enumerate(test_loader): org_image = test_data['input'].to(device) model = model.to(device).eval() gt = test_data['label'].type(torch.FloatTensor).to(device) #### forward path out1, out2 = model.pendis_forward(org_image) score_1 = nn.functional.softmax(out1, dim=1) score_2 = nn.functional.softmax(out2, dim=1) dist = torch.sum(torch.abs(score_1 - score_2), dim=1).reshape( (org_image.shape[0], -1)) if i == 0: dists = dist labels = torch.zeros((org_image.shape[0], )) else: dists = torch.cat((dists, dist), dim=0) labels = torch.cat( (labels, torch.zeros((org_image.shape[0]))), dim=0) test_acc += sum( torch.argmax(torch.sigmoid(out1), dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() test_acc += sum( torch.argmax(torch.sigmoid(out2), dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() for i, out_org_data in enumerate(out_test_loader): out_org_image = out_org_data['input'].to(device) out1, out2 = model.pendis_forward(out_org_image) score_1 = nn.functional.softmax(out1, dim=1) score_2 = nn.functional.softmax(out2, dim=1) dist = torch.sum(torch.abs(score_1 - score_2), dim=1).reshape( (out_org_image.shape[0], -1)) dists = torch.cat((dists, dist), dim=0) labels = torch.cat((labels, torch.ones( (out_org_image.shape[0]))), dim=0) roc = evaluate(labels.cpu(), dists.cpu(), metric='roc') print('Epoch{} AUROC: {:.3f}, test accuracy : {:.4f}'.format( epoch, roc, test_acc / test_dataset.num_image / 2)) print( 'Epoch [{}/{}], Step {}, total_loss = {:.4f}, class = {:.4f}, membership = {:.4f}, transfer = {:.4f}, exe time: {:.2f}, lr: {:.4f}*e-4' .format(epoch, args.num_epochs, i + 1, running_loss / batch_num, running_class_loss / batch_num, running_membership_loss / batch_num, running_transfer_loss / batch_num, time.time() - stime, scheduler.get_last_lr()[0] * 10**4)) print('exe time: {:.2f}, lr: {:.4f}*e-4'.format( time.time() - stime, scheduler.get_last_lr()[0] * 10**4)) print("train accuracy total : {:.4f}".format( train_acc / train_dataset.num_image / 2)) print("test accuracy total : {:.4f}".format( test_acc / test_dataset.num_image / 2)) summary.add_scalar('loss/total_loss', running_loss / batch_num, epoch) summary.add_scalar('loss/class_loss', running_class_loss / batch_num, epoch) summary.add_scalar('loss/membership_loss', running_membership_loss / batch_num, epoch) summary.add_scalar('acc/train_acc', train_acc / train_dataset.num_image / 2, epoch) summary.add_scalar('acc/test_acc', test_acc / test_dataset.num_image / 2, epoch) summary.add_scalar("learning_rate/lr", scheduler.get_last_lr()[0], epoch) time.sleep(0.001) torch.save( { 'model': model.state_dict(), 'epoch': epoch, 'init_lr': scheduler.get_last_lr()[0] }, os.path.join(save_model, env, args.net_type, 'checkpoint_last_pre.pth.tar'))
def train_net(args): torch.manual_seed(7) #torch的随机种子,在torch.randn使用 np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() #tensorboard epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], # lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) optimizer = InsightFaceOptimizer( torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) #这里还需要自己加载进去 start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = Dataset(root=args.train_path, phase='train', input_shape=(3, 112, 112)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training # 这里写一个训练函数十分简练,值得学习 train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', optimizer.lr, epoch) # Save checkpoint if epoch % 10 == 0: save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc)