def _setup_dataloaders(root_dir, return_dataset=False): """ Setup dataloaders. """ preprocessing = [ aug.NormalizeBboxes(cfg.grid_size), aug.Bboxes2Matrices(cfg.grid_size, cfg.num_classes), aug.Resize(cfg.target_size), aug.Normalize(cfg.mean, cfg.std, 1. / 255), aug.ToTensor() ] transforms_train = preprocessing transforms_val = preprocessing ds_train = VOCDataset(root_dir, image_set="train") dl_train = get_dataloader(ds_train, transforms_train, cfg.batch_size, num_workers=4) ds_val = VOCDataset(root_dir, image_set="val") dl_val = get_dataloader(ds_val, transforms_val, cfg.batch_size) if return_dataset: return dl_train, dl_val, ds_train, ds_val return dl_train, dl_val
def __init__(self, config): self.cuda = int(config['cuda']) #torch.cuda.empty_cache() self.train_dataloader = get_dataloader(config, scope='train') self.val_dataloader = get_dataloader(config, scope='val') self.model = get_model(config) try: model_weights = 'experiment/' + config['dir'] + '/' + config[ 'weights'] self.model.load_state_dict(torch.load(model_weights)['model']) print('Weigths loaded') except: print('Weights randomized') self.optimizer = get_optimizer(config, self.model) self.total_epochs = config['epochs'] self.batches_per_epoch = config['batches_per_epoch'] self.val_batches_per_epoch = config['val_batches_per_epoch'] self.final_weights_file = 'experiment/' + config[ 'dir'] + '/weights_last.pth' self.best_weights_file = 'experiment/' + config[ 'dir'] + '/weights_best.pth' self.log_file = 'experiment/' + config['dir'] + '/logs.csv' self.loss_dict = { 'sample_name': config['sample_name'], 'output_name': config['output_name'], 'loss': [get_criterion(x) for x in config['loss_criterion']], 'weight': config['loss_weight'] } self.train_fe = bool(config['train_feature_extractor'])
def __init__(self, opt): self.opt = opt dataset = 'MUSIC21' self.workspace = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..') self.job_name = dataset + '_gpu8_estimate_mask_' self.model_dir = self.job_name + 'model' self.sample_dir = os.path.join(self.workspace, self.job_name) self.parameter_dir = self.sample_dir + '/params' if not os.path.exists(self.parameter_dir): os.makedirs(self.parameter_dir) # whether to start training from an existing snapshot self.load = False self.iter_to_load = 62000 # Write parameters setting file if os.path.exists(self.parameter_dir): utils.save_parameters(self) '''MUSIC21''' self.trainloader, self.valloader, self.n_training_samples = ds.get_dataloader( root=opt.root, tag_dir=opt.train_tag_json_path, is_training=True) self.testloader, self.n_test_samples = ds.get_dataloader( root=opt.root, tag_dir=opt.test_tag_json_path, is_training=False) # visualization self.visualizer = Visualizer(opt)
def main(): # create experiment config config = get_config('pqnet')('train') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if config.cont: tr_agent.load_ckpt(config.ckpt) # create dataloader train_loader = get_dataloader('train', config) val_loader = get_dataloader('val', config) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) # visualize if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, 'train', outputs=outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) outputs, losses = tr_agent.val_func(data) if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, 'validation', outputs=outputs) clock.tick() # update lr by scheduler tr_agent.update_learning_rate() # update teacher forcing ratio if config.module == 'seq2seq': tr_agent.update_teacher_forcing_ratio() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def main(args, output_dir, hparams=None): """ :param args: argument :param output_dir: save dir :return: """ torch.autograd.set_detect_anomaly(True) if args.main_process: logging.info("Args {}".format( json.dumps(vars(args), indent=2, sort_keys=True))) logging.info("Hparams {}".format( json.dumps(hparams, indent=2, sort_keys=True))) model = get_model(args, hparams) train_dataloader = None validation_dataloader = None test_dataloader = None if 'train' in args.mode: train_dataloader = get_dataloader(args, mode="train") if 'val' in args.mode: validation_dataloader = get_dataloader(args, mode="val") if 'test' in args.mode: test_dataloader = get_dataloader(args, mode="test") if args.main_process: logging.info("Creating Experiment Instance...") ex = Experiment(model=model, train_dataloader=train_dataloader, validation_dataloader=validation_dataloader, test_dataloader=test_dataloader, output_dir=output_dir, device=None, args=args) try: if args.mode == 'train_eval': if args.main_process: logging.info("Start training...") ex.train_eval() elif args.mode == 'eval': if args.main_process: logging.info("Start evalating...") ex.evaluate() elif args.mode == 'test': if args.main_process: logging.info("Start inferring...") ex.infer() else: raise NotImplementedError("Not implemented") except RuntimeError as e: raise e except IOError as e: raise e except ValueError as e: raise e except KeyboardInterrupt: if args.main_process: logging.info("Exit by keyboard interrupt ") logging.info(f"Done {output_dir}")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) # writer = SummaryWriter() # create dataloader # train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/train_noise/result_json/result.json") val_loader = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = cycle(val_loader_step) epoch_acc = tr_agent.evaluate(val_loader) print(epoch_acc)
def main(): # create experiment config containing all hyperparameters config = get_config('train') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if config.cont: tr_agent.load_ckpt(config.ckpt) # create dataloader train_loader = get_dataloader('train', config) val_loader = get_dataloader('validation', config) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step tr_agent.train_func(data) # visualize if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "train") pbar.set_description("EPOCH[{}][{}]".format(e, b)) losses = tr_agent.collect_loss() pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) tr_agent.val_func(data) if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "validation") clock.tick() tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def main(): ''' Main function, flow of program ''' # Model model = resnet18() # Running architecture (GPU or CPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Image loader test_loader = get_dataloader(params['test_file'], params['img_size'],\ params['batch_size'], params['data_mean'], params['data_std']) # Creates the criterion (loss function) criterion = nn.CrossEntropyLoss() # Weights Load Up weights_file = glob.glob(params['weights_path'] + '/*.pth')[0] checkpoint = torch.load(weights_file) model.load_state_dict(checkpoint['model_state_dict']) print('Model Loaded!\nAccuracy: {:.4}\nLoss: {:.4}\nSensitivity: {:.4}\nSpecificity: {:.4}'\ .format(checkpoint['accuracy'], checkpoint['loss'],\ checkpoint['sensitivity'], checkpoint['specificity'])) # Create folder for weights pathlib.Path(params['report_path']).mkdir(parents=True, exist_ok=True) # Run test and creates a report test_report(model, test_loader, criterion, device, params['report_path'])
def encode(config): """encode each data to shape latent space """ # create the whole framwork pqnet = PQNET(config) # output dest save_dir = os.path.join(config.exp_dir, "results/enc-ckpt-{}".format(config.ckpt)) ensure_dir(save_dir) phases = ['train', 'val', 'test'] for pha in phases: data_loader = get_dataloader(pha, config, is_shuffle=False) save_phase_dir = os.path.join(save_dir, pha) ensure_dir(save_phase_dir) pbar = tqdm(data_loader) for data in pbar: data_id = data['path'][0].split('/')[-1].split('.')[0] with torch.no_grad(): shape_code = pqnet.encode(data).detach().cpu().numpy() save_path = os.path.join(save_phase_dir, "{}.npy".format(data_id)) np.save(save_path, shape_code)
def reconstruct(config): """run reconstruction""" # create the whole framwork pqnet = PQNET(config) # create dataloader test_loader = get_dataloader('test', config) # output dest save_dir = os.path.join( config.exp_dir, "results/rec-ckpt-{}-{}-p{}".format(config.ckpt, config.format, int(config.by_part))) ensure_dir(save_dir) # run testing pbar = tqdm(test_loader) for data in pbar: data_id = data['path'][0].split('/')[-1].split('.')[0] with torch.no_grad(): pqnet.reconstruct(data) output_shape = pqnet.generate_shape(format=config.format, by_part=config.by_part) save_output(output_shape, data_id, save_dir, format=config.format)
def train(epochs, net, optim): net.train() for epoch in range(epochs): epoch += 1 for batch_idx, (data, label) in enumerate( get_dataloader(train=True, batch_size=config.batch_size, shuffle=True) ): data = data.to(config.device) label = label.to(config.device) output = net(data).to(config.device) optim.zero_grad() loss = F.nll_loss(output, label) loss.backward() optim.step() if batch_idx % 10 == 0: print('epoch: {}, batch: {}, loss: {}'.format( epoch, batch_idx, loss.item() )) torch.save(net.state_dict(), './model/model.pkl') torch.save(optim.state_dict(), './model/optimizer.pkl')
def main(): args = parse_args() args.model_path = "./models/best.pth" args.batch_size = 256 args.data_path = "./data/data.npy" checkpoint = torch.load(args.model_path) args.input_dim = checkpoint["input_dim"] args.hidden_nums = checkpoint["hidden_nums"] model = Net(args.input_dim, args.hidden_nums, 1) model.load_state_dict(checkpoint['state_dict']) data = np.load(args.data_path) data_loader = get_dataloader(data, train=False, batch_size=args.batch_size, input_dim=args.input_dim) mae, mse, gt_all, output_all = evaluate(model, data_loader) data_range = data_loader.dataset.data_range x = [i for i in range(data_range[0], data_range[1])] plt.figure() plt.title("MG-eval") plt.plot(x, gt_all, color='coral', label="gt") plt.plot(x, output_all, color='green', label="predict") plt.xlabel("time", fontsize=13) plt.ylabel("value", fontsize=13) plt.legend(loc='best') #显示在最好的位置 plt.show()
def __init__(self, version): self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') self.lr = 5e-5 self.version = version self.beta = .5 self.D_net = Descriminator(self.lr, self.beta).to(self.device) self.G_net = Generator(self.lr, self.beta).to(self.device) self.batch_size = 64 self.dataloader = get_dataloader(batch_size=self.batch_size) self.z_constant = torch.randn(64, 100, 1, 1) self.gen_image_counter = 0 self.loss = nn.BCELoss() self.losses = [] self.fixed_gen_images = [] self.fixed_z = torch.randn(self.batch_size, 100, 1, 1).to(self.device) self.create_directories() self.load_models()
def evaluate(net): net.eval() loss_list = [] acc_list = [] for batch_idx, (data, label) in enumerate( get_dataloader(train=False, batch_size=config.batch_size, shuffle=True)): data = data.to(config.device) label = label.to(config.device) with torch.no_grad(): output = net(data) cur_loss = F.nll_loss(output, label) loss_list.append(cur_loss.cpu().item()) """ output: 一个[64, 2]的矩阵,表示共有batch个数据,每一行为一个softmax层之后的概率预测结果 output.max(dim=-1): 对最后一维(此处为预测结果那一维)取每一行的最大值,并增加indices记录最大值的索引 output.max(dim=-1)[-1]: 取出indices 此处的max()函数实际上是torch.max(),只是因为output是一个torch张量,所以可以直接调用max方法 """ pred = output.max(dim=-1)[-1] cur_acc = pred.eq(label).float().mean() acc_list.append(cur_acc.cpu().item()) if batch_idx % 10 == 0: print('batch: {}, loss: {}, acc: {}'.format( batch_idx, cur_loss.item(), cur_acc.item())) print('total loss: {}, total acc: {}'.format(np.mean(loss_list), np.mean(acc_list)))
def main(data_path, model_path, idtable_path, step, split): if split == 'Valid': dataset = get_dataset(data_path, "valid_data.csv", vcc18=True, valid=True, idtable=idtable_path) elif split == 'Test': dataset = get_dataset(data_path, "testing_data.csv", vcc18=True, valid=True, idtable=idtable_path) dataloader = get_dataloader(dataset, batch_size=20, num_workers=1, shuffle=False) model = MBNet(num_judges=5000).to(device) model.load_state_dict(torch.load(model_path)) lamb = 4 valid(model, dataloader, step, split, lamb)
def train(config): # train_path:train-context.json args = config.args train_set = get_dataset(config.train_path, config.w2i_vocabs, config, is_train=True) dev_set = get_dataset(config.dev_path, config.w2i_vocabs, config, is_train=False) # X:img,torch.stack; train_batch = get_dataloader(train_set, args.batch_size, is_train=True) model = Model(n_emb=args.n_emb, n_hidden=args.n_hidden, vocab_size=args.vocab_size, dropout=args.dropout, d_ff=args.d_ff, n_head=args.n_head, n_block=args.n_block) if args.restore != '': model_dict = torch.load(args.restore) model.load_state_dict(model_dict) model.to(device) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) best_score = -1000000 for i in range(args.epoch): model.train() report_loss, start_time, n_samples = 0, time.time(), 0 count, total = 0, len(train_set) // args.batch_size + 1 for batch in train_batch: Y, T = batch Y = Y.to(device) T = T.to(device) optimizer.zero_grad() loss = model(Y, T) loss.backward() optimizer.step() report_loss += loss.item() #break n_samples += len(Y.data) count += 1 if count % args.report == 0 or count == total: print('%d/%d, epoch: %d, report_loss: %.3f, time: %.2f' % (count, total, i + 1, report_loss / n_samples, time.time() - start_time)) score = eval(model, dev_set, args.batch_size) model.train() if score > best_score: best_score = score save_model(os.path.join(args.dir, 'best_checkpoint.pt'), model) else: save_model(os.path.join(args.dir, 'checkpoint.pt'), model) report_loss, start_time, n_samples = 0, time.time(), 0 return model
def train(epoch): data_loader = get_dataloader(True) for idx, (input,target) in enumerate(data_loader): # 梯度清零 optimizer.zero_grad() # 看能不能用gpu input = input.to(lib.device) target = target.to(lib.device) output = model(input) loss = F.nll_loss(output,target) print(epoch,idx,loss.item()) # 反向传播 loss.backward() # 梯度更新 optimizer.step() if idx % 100 == 0: """ pytorch有两种模型保存方式: 一、保存整个神经网络的的结构信息和模型参数信息,save的对象是网络net 二、只保存神经网络的训练模型参数,save的对象是net.state_dict() """ torch.save(model.state_dict(),"./model/model.pkl") torch.save(optimizer.state_dict(),"./model/optimizer.pkl")
def __dataloader(self, split): transforms = utils.get_transformations(self.hparams, split) shuffle = split == "train" drop_last = split == "train" weighted_sampling = (split == "train") & self.hparams.weighted_sampling dataset_args = { 'transforms': transforms, 'normalize': self.hparams.normalize } dataloader_args = { 'batch_size': self.hparams.batch_size, 'num_workers': self.hparams.num_workers, 'pin_memory': True, 'shuffle': shuffle, 'drop_last': drop_last } dataloader = get_dataloader( split=split, dataset_args=dataset_args, dataloader_args=dataloader_args, weighted_sampling=weighted_sampling ) return dataloader
def eval(): model = Seq2Seq().to(config.device) model.load_state_dict(torch.load("./models/model.pkl")) loss_list = [] acc_list = [] data_loader = get_dataloader(train=False) # 获取测试集 with torch.no_grad(): for idx, (input, target, input_len, target_len) in enumerate(data_loader): input = input.to(config.device) # target = target #[batch_size,max_len] input_len = input_len.to(config.device) # decoder_predict:[batch_size,max_len] decoder_outputs, decoder_predict = model.evaluate(input, input_len) # [batch_Size,max_len,vocab_size] loss = F.nll_loss(decoder_outputs.view(-1, len(config.ns)), target.to(config.device).view(-1), ignore_index=config.ns.PAD) loss_list.append(loss.item()) # 把traget 和 decoder_predict进行inverse_transform target_inverse_tranformed = [config.ns.inverse_transform(i) for i in target.numpy()] predict_inverse_tranformed = [config.ns.inverse_transform(i) for i in decoder_predict] cur_eq = [1 if target_inverse_tranformed[i] == predict_inverse_tranformed[i] else 0 for i in range(len(target_inverse_tranformed))] acc_list.extend(cur_eq) # print(np.mean(cur_eq)) print("mean acc:{} mean loss:{:.6f}".format(np.mean(acc_list), np.mean(loss_list)))
def model_eval(model_path, datapath, savepath): batch_size, num_workers = 16, 1 dl_val = get_dataloader(datapath, batch_size, num_workers) # print("CUDA Available: ",torch.cuda.is_available()) # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = FullNetwork().double() #.to(device) model.load_state_dict(torch.load(model_path)) model.eval() criterion = nn.MSELoss() losses = [] psnr_val = [] idx = 0 for _, item in enumerate(dl_val): ref_bgr, lab256, lab64 = item[0].numpy(), item[1], item[2] x = lab64[:, 0, :, :].view(-1, 1, 64, 64) #.to(device) y256 = lab256[:, 1:, :, :].view(-1, 2, 256, 256) #.to(device) with torch.no_grad(): outputs = model(x) loss = criterion(outputs, y256) losses.append(loss.item()) for i in range(len(outputs)): l, a, b = np.transpose( lab256[i, 0, :, :].numpy()), np.transpose( outputs[i, 0, :, :].numpy()), np.transpose( outputs[i, 1, :, :].numpy()) bgr = cv2.cvtColor( np.dstack((l, a, b)).astype(np.float32), cv2.COLOR_Lab2BGR) psnr_val.append( psnr(ref_bgr[i], np.array(bgr * 255, dtype=np.uint8))) cv2.imwrite('{}/{}.jpg'.format(savepath, idx), bgr * 255) idx += 1 avg_loss = np.mean(np.array(losses)) print('val loss: {:.5f}'.format(avg_loss)) print('avg_psnr: {:.5f}'.format(np.mean(psnr_val)))
def train(self): a_loader, b_loader = get_dataloader(self.args) for epoch in range(self.args.train_epochs): print(f'Training epoch {epoch}') for i, (a_real, b_real) in tqdm(enumerate(zip(a_loader, b_loader))): # forward pass self.a_real = a_real[0].to(self.device) self.b_real = b_real[0].to(self.device) self.forward() # update generator parameters self.g_optim.zero_grad() with torch.no_grad(): dnet_A_pred = self.dnet_A(self.b_fake) dnet_B_pred = self.dnet_B(self.a_fake) gan_loss_A = self.gan_loss_func( dnet_A_pred, torch.ones_like(dnet_A_pred, device=self.device)) gan_loss_B = self.gan_loss_func( dnet_B_pred, torch.ones_like(dnet_B_pred, device=self.device)) cycle_loss_A = self.cycle_loss_func( self.a_real, self.a_rec) * self.args.lambda_A cycle_loss_B = self.cycle_loss_func( self.b_real, self.b_rec) * self.args.lambda_B total_loss = gan_loss_A + gan_loss_B + cycle_loss_A + cycle_loss_B total_loss.backward() self.g_optim.step() # update discriminator parameters self.d_optim.zero_grad() iter_list = [(self.a_real, self.a_fake, self.dnet_B, self.image_buffer_A), (self.b_real, self.b_fake, self.dnet_A, self.image_buffer_B)] for real, fake, dnet, image_buffer in iter_list: fake = fake.detach() fake = image_buffer.get_image(fake) real_pred = dnet(real) fake_pred = dnet(fake) real_loss = self.cycle_loss_func( real_pred, torch.ones_like(real_pred, device=self.device)) fake_loss = self.cycle_loss_func( fake_pred, torch.zeros_like(fake_pred, device=self.device)) combined_d_loss = (real_loss + fake_loss) / 2 combined_d_loss.backward() self.d_optim.step() self.save_networks()
def train(epoch): tarin_dataloader = get_dataloader(train=True) bar = tqdm(tarin_dataloader, total=len(tarin_dataloader)) for idx, (input, target) in enumerate(bar): optimizer.zero_grad() output = model(input) loss = F.nll_loss(output, target) loss.backward() optimizer.step() bar.set_description("epoch:{} idx:{} loss:{:.6f}".format( epoch, idx, loss.item()))
def main(): """ main Main function, flow of program. """ # To stablish a seed for all the project seed_everything(params['seed']) # Model model = create_model(model=params['model'], encoder=params['encoder'],\ encoder_weights=params['encoder_weights']) # Running architecture (GPU or CPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('Using GPU?: ', torch.cuda.is_available()) # Image Loaders proc_fn = smp.encoders.get_preprocessing_fn(params['encoder'], params['encoder_weights']) train_loader = get_aug_dataloader(train_file=params['train_file'],\ img_size=params['img_size'], batch_size=params['batch_size'],\ proc_fn=proc_fn) val_loader = get_dataloader(data_file=params['val_file'], img_size=params['img_size'],\ batch_size=params['batch_size'], proc_fn=proc_fn) # Creates the criterion (loss function) criterion = smp.utils.losses.DiceLoss() # Creates optimizer (Changes the weights based on loss) if params['optimizer'] == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=params['lear_rate']) elif params['optimizer'] == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=params['lear_rate'], momentum=0.9) # Create folder for weights pathlib.Path(params['weights_path']).mkdir(parents=True, exist_ok=True) # Metrics metrics = [\ smp.utils.metrics.IoU(threshold=0.5),\ smp.utils.metrics.Fscore(threshold=0.5),\ smp.utils.metrics.Accuracy(threshold=0.5),\ smp.utils.metrics.Recall(threshold=0.5),\ smp.utils.metrics.Precision(threshold=0.5)] # Training and Validation for the model train_validate(model=model, train_loader=train_loader, val_loader=val_loader,\ optimizer=optimizer, criterion=criterion, metrics=metrics,\ device=device, epochs=params['epochs'],\ save_criteria=params['save_criteria'],\ weights_path=params['weights_path'], save_name=params['save_name'])
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) if not os.path.exists(config.result_dir): os.makedirs(config.result_dir) # Data loader. celeba_loader = None rafd_loader = None if config.dataset in ['CelebA', 'Both']: celeba_loader = get_dataloader(config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, 'CelebA', config.mode, config.num_workers) if config.dataset in ['RaFD', 'Both']: rafd_loader = get_dataloader(config.rafd_image_dir, None, None, config.rafd_crop_size, config.image_size, config.batch_size, 'RaFD', config.mode, config.num_workers) # Solver for training and testing StarGAN. stargan = StarGAN(celeba_loader, rafd_loader, config) if config.mode == 'train': if config.dataset in ['CelebA', 'RaFD']: stargan.train() elif config.dataset in ['Both']: stargan.train_multi() elif config.mode == 'test': if config.dataset in ['CelebA', 'RaFD']: stargan.test() elif config.dataset in ['Both']: stargan.test_multi()
def model_train(stop_loss_criteria, datapath, model_savepath): batch_size, num_workers = 160, 8 dl_train = get_dataloader(datapath, batch_size, num_workers) print("CUDA Available: ", torch.cuda.is_available()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = FullNetwork().double().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, verbose=True, factor=0.75) criterion = nn.MSELoss() num_epochs = 1000 losses = [] for epoch in range(num_epochs): running_loss = [] batch_idx = 1 print('train batch ', end=' ') for i, item in enumerate(dl_train): lab256, lab64 = item[1], item[2] x = lab64[:, 0, :, :].view(-1, 1, 64, 64).to(device) y256 = lab256[:, 1:, :, :].view(-1, 2, 256, 256).to(device) optimizer.zero_grad() outputs = model(x) loss = criterion(outputs, y256) running_loss.append(loss.item()) loss.backward() optimizer.step() if i % 100 == 99: print('{}'.format(batch_idx), end=' ') batch_idx += 1 train_loss = np.mean(np.array(running_loss)) print('\n[{}/{}] train loss: {:.5f}'.format(epoch + 1, num_epochs, train_loss)) scheduler.step(train_loss) if len(losses) < 3: losses.append(train_loss) else: del losses[0] losses.append(train_loss) if np.mean(np.array(losses)) <= stop_loss_criteria: break torch.save(model.state_dict(), model_savepath) # model_train(70, '../../coco/train/', 'model.pth')
def eval(): loss_list = [] acc_list = [] data_loader=get_dataloader(train=False,batch_size=test_batch_size) for idx,(input,target) in tqdm(enumerate(data_loader),total=len(data_loader),ascii=True): with torch.no_grad(): output = model(input) cur_loss = criterion(output,target) loss_list.append(cur_loss.numpy()) pred = output.max(dim=-1)[-1] cur_acc = pred.eq(target.float().mean()) acc_list.append(cur_acc.numpy()) print('total loss,acc',np.mean(loss_list),np.mean(acc_list))
def main(): model = FaceNetModel(embedding_size=cfg.embedding_size, num_classes=cfg.num_classes).to(device) if cfg.use_warmup: optimizer = optim.Adam(model.parameters(), lr=cfg.start_learning_rate) # scheduler = lr_scheduler.StepLR(optimizer, step_size = 50, gamma = 0.1) scheduler = WarmAndReduce_LR( optimizer, cfg.base_learning_rate, cfg.num_epochs, use_warmup=cfg.use_warmup, start_learning_rate=cfg.start_learning_rate, warmup_epoch=cfg.warmup_epoch) else: optimizer = optim.Adam(model.parameters(), lr=cfg.base_learning_rate) # scheduler = lr_scheduler.StepLR(optimizer, step_size = 50, gamma = 0.1) scheduler = WarmAndReduce_LR(optimizer, cfg.base_learning_rate, cfg.num_epochs, use_warmup=cfg.use_warmup) if cfg.start_epoch != 0: checkpoint = torch.load( './log/checkpoint_epoch{}.pth'.format(cfg.start_epoch - 1), map_location='cuda:0') print("Load weights from {}".format( './log/checkpoint_epoch{}.pth'.format(cfg.start_epoch - 1))) if cfg.del_classifier: model_dict = model.state_dict() checkpoint['state_dict'] = { k: v for k, v in checkpoint['state_dict'].items() if k in model_dict } model_dict.update(checkpoint['state_dict']) model.load_state_dict(model_dict) else: model.load_state_dict(checkpoint['state_dict']) for epoch in range(cfg.start_epoch, cfg.num_epochs + cfg.start_epoch): # scheduler.step() print(80 * '=') print('Epoch [{}/{}] Learning Rate:{:8f}'.format( epoch, cfg.num_epochs + cfg.start_epoch - 1, scheduler.get_lr()[0])) data_loaders, data_size = get_dataloader(data_path, cfg.batch_size, cfg.num_workers, cfg.image_size) train_valid(model, optimizer, scheduler, epoch, data_loaders, data_size) print(80 * '=')
def test(self): # load trained model gnet_A = torch.load(self.saved_net_path / 'gnet_A.bin') gnet_B = torch.load(self.saved_net_path / 'gnet_B.bin') a_loader, b_loader = get_dataloader(self.args) for i, (a_real, b_real) in tqdm(enumerate(zip(a_loader, b_loader))): with torch.no_grad(): fake_B = self.gnet_A(a_real[0].to(self.device)).cpu().data fake_A = self.gnet_B(b_real[0].to(self.device)).cpu().data # Save image files save_image(fake_A, self.output_A_dir / ('%04d.png' % (i + 1))) save_image(fake_B, self.output_B_dir / ('%04d.png' % (i + 1)))
def predict( task: str = Task.Main, model_type: str = Config.VanillaEfficientNet, load_state_dict: str = None, transform_type: str = Aug.BaseTransform, data_root: str = Config.Valid, save_path: str = Config.Inference, ): '''주어진 Train Data에 대한 inference. True Label과 Pred Label의 두가지 컬럼을 구성된 csv파일을 export ''' # load phase if load_state_dict is None: load_state_dict = LOAD_STATE_DICT model = load_model(model_type, task, load_state_dict) model.cuda() model.eval() dataloader = get_dataloader( task=task, phase="test", data_root=data_root, transform_type=transform_type, batch_size=1, shuffle=False, drop_last=False, ) # inference phase with torch.no_grad(): pred_list = [] true_list = [] for imgs, labels in tqdm(dataloader, desc="Inference"): imgs = imgs.cuda() outputs = model(imgs) _, preds = torch.max(outputs, 1) pred_list.append(preds.item()) true_list.append(labels.item()) # export phase if save_path: model_name = os.path.basename(load_state_dict) if model_name not in os.listdir(save_path): os.mkdir(os.path.join(save_path, model_name)) result = pd.DataFrame(dict(y_pred=pred_list, y_true=true_list)) result.to_csv(os.path.join(save_path, model_name, "prediction.csv"), index=False)
def test(): rec = models.R().to(device) checkpoint = torch.load(f"{config.OUT_DIR}/rec_checkpoint.pt") rec.load_state_dict(checkpoint["model"]) _, loader = get_dataloader() rec.eval() ximgs, xlabels, _ = next(iter(loader)) ximgs = ximgs.to(device) inf_out = rec(ximgs[1, :, :, :].reshape((1, 1, 128, 512))) print( f"Network Output: f{decode(torch.argmax(inf_out, dim=2).cpu().numpy(), 0)}" ) print(f"Ground Truth: {xlabels[1]}")