def resume(self, checkpoint_dir, hyperparameters): # Load generators last_model_name = get_model_list(checkpoint_dir, 'gen') state_dict = torch.load(last_model_name) self.gen_a.load_state_dict(state_dict['a']) self.gen_b.load_state_dict(state_dict['b']) iterations = int(last_model_name[-11:-3]) # Load discriminators last_model_name = get_model_list(checkpoint_dir, 'dis') state_dict = torch.load(last_model_name) self.dis_a.load_state_dict(state_dict['a']) self.dis_b.load_state_dict(state_dict['b']) # Load segmentor last_model_name = get_model_list(checkpoint_dir, 'seg') state_dict = torch.load(last_model_name) self.seg.load_state_dict(state_dict) # Load optimizers state_dict = torch.load(os.path.join(checkpoint_dir, 'opt.pt')) self.dis_opt.load_state_dict(state_dict['dis']) self.gen_opt.load_state_dict(state_dict['gen']) state_dict = torch.load(os.path.join(checkpoint_dir, 'opt_seg.pt')) self.seg_opt.load_state_dict(state_dict) # Reinitilize schedulers self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters['lr_policy'], hyperparameters, iterations) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters['lr_policy'], hyperparameters, iterations) self.seg_scheduler = get_scheduler(self.seg_opt, 'constant', None, iterations) print('Resume from iteration %d' % iterations) return iterations
def init(self): opt = self.args if not os.path.exists(opt.saved_dir): os.makedirs(opt.saved_dir) self.fake_A_pool = ImagePool( opt.pool_size ) # create image buffer to store previously generated images self.fake_B_pool = ImagePool(opt.pool_size) self.crit_cycle = torch.nn.L1Loss() self.crit_idt = torch.nn.L1Loss() self.crit_gan = GANLoss(opt.gan_mode).cuda() self.cam_loss = CAMLoss() self.optim_G = torch.optim.Adam(itertools.chain( self.model.G_A.parameters(), self.model.G_B.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optim_D = torch.optim.Adam( itertools.chain(self.model.D_A.parameters(), self.model.D_B.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) # default: 0.5 self.optimizers = [self.optim_G, self.optim_D] self.schedulers = [ get_scheduler(optimizer, self.args) for optimizer in self.optimizers ]
def setup(self, opt): """Load and print networks""" if opt.phase == 'train': self.schedulers = utils.get_scheduler(self.optimizer, opt) if opt.phase in ['sample', 'interpolate'] or opt.resume: load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch self.load_denoise_model(load_suffix) self.print_networks(opt.verbose)
def get_poseaug_model(args, dataset): """ return PoseAug augmentor and discriminator and corresponding optimizer and scheduler """ # Create model: G and D print("==> Creating model...") device = torch.device("cuda") num_joints = dataset.skeleton().num_joints() # generator for PoseAug model_G = PoseGenerator(args, num_joints * 3).to(device) model_G.apply(init_weights) print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_G.parameters()) / 1000000.0)) # discriminator for 3D model_d3d = Pos3dDiscriminator(num_joints).to(device) model_d3d.apply(init_weights) print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_d3d.parameters()) / 1000000.0)) # discriminator for 2D model_d2d = Pos2dDiscriminator(num_joints).to(device) model_d2d.apply(init_weights) print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_d2d.parameters()) / 1000000.0)) # prepare optimizer g_optimizer = torch.optim.Adam(model_G.parameters(), lr=args.lr_g) d3d_optimizer = torch.optim.Adam(model_d3d.parameters(), lr=args.lr_d) d2d_optimizer = torch.optim.Adam(model_d2d.parameters(), lr=args.lr_d) # prepare scheduler g_lr_scheduler = get_scheduler(g_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs) d3d_lr_scheduler = get_scheduler(d3d_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs) d2d_lr_scheduler = get_scheduler(d2d_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs) return { 'model_G': model_G, 'model_d3d': model_d3d, 'model_d2d': model_d2d, 'optimizer_G': g_optimizer, 'optimizer_d3d': d3d_optimizer, 'optimizer_d2d': d2d_optimizer, 'scheduler_G': g_lr_scheduler, 'scheduler_d3d': d3d_lr_scheduler, 'scheduler_d2d': d2d_lr_scheduler, }
def __init__(self, hyperparameters, opts): super(MUNIT_Trainer, self).__init__() lr = hyperparameters['lr'] self.opts = opts # Initiate the networks self.gen_a = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.seg = segmentor(num_classes=2, channels=hyperparameters['input_dim_b'], hyperpars=hyperparameters['seg']) self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] # fix the noise used in sampling display_size = int(hyperparameters['display_size']) self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.seg_opt = torch.optim.SGD(self.seg.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters['lr_policy'], hyperparameters=hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters['lr_policy'], hyperparameters=hyperparameters) self.seg_scheduler = get_scheduler(self.seg_opt, 'constant', hyperparameters=None) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) self.criterion_seg = DiceLoss(ignore_index=hyperparameters['seg']['ignore_index'])
def train(**kwargs): opt._parse(kwargs) train_writer = None value_writer = None if opt.vis: train_writer = SummaryWriter( log_dir='./runs/train_' + datetime.now().strftime('%y%m%d-%H-%M-%S')) value_writer = SummaryWriter( log_dir='./runs/val_' + datetime.now().strftime('%y%m%d-%H-%M-%S')) previous_loss = 1e10 # 上次学习的loss best_precision = 0 # 最好的精确度 start_epoch = 0 lr = opt.lr perf_scores_history = [] # 绩效分数 # step1: criterion and optimizer # 1. 铰链损失(Hinge Loss):主要用于支持向量机(SVM) 中; # 2. 互熵损失 (Cross Entropy Loss,Softmax Loss ):用于Logistic 回归与Softmax 分类中; # 3. 平方损失(Square Loss):主要是最小二乘法(OLS)中; # 4. 指数损失(Exponential Loss) :主要用于Adaboost 集成学习算法中; # 5. 其他损失(如0-1损失,绝对值损失) criterion = t.nn.CrossEntropyLoss().to(opt.device) # 损失函数 # step2: meters train_losses = AverageMeter() # 误差仪表 train_top1 = AverageMeter() # top1 仪表 train_top5 = AverageMeter() # top5 仪表 pylogger = PythonLogger(msglogger) # step3: configure model model = getattr(models, opt.model)() # 获得网络结构 compression_scheduler = distiller.CompressionScheduler(model) optimizer = model.get_optimizer(lr, opt.weight_decay) # 优化器 if opt.load_model_path: # # 把所有的张量加载到CPU中 # t.load(opt.load_model_path, map_location=lambda storage, loc: storage) # t.load(opt.load_model_path, map_location='cpu') # # 把所有的张量加载到GPU 1中 # t.load(opt.load_model_path, map_location=lambda storage, loc: storage.cuda(1)) # # 把张量从GPU 1 移动到 GPU 0 # t.load(opt.load_model_path, map_location={'cuda:1': 'cuda:0'}) checkpoint = t.load(opt.load_model_path) start_epoch = checkpoint["epoch"] # compression_scheduler.load_state_dict(checkpoint['compression_scheduler'], False) best_precision = checkpoint["best_precision"] model.load_state_dict(checkpoint["state_dict"]) optimizer = checkpoint['optimizer'] model.to(opt.device) # 加载模型到 GPU if opt.compress: compression_scheduler = distiller.file_config( model, optimizer, opt.compress, compression_scheduler) # 加载模型修剪计划表 model.to(opt.device) # 学习速率调整器 lr_scheduler = get_scheduler(optimizer, opt) # step4: data_image train_data = DatasetFromFilename(opt.data_root, flag='train') # 训练集 val_data = DatasetFromFilename(opt.data_root, flag='test') # 验证集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # 训练集加载器 val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # 验证集加载器 # train for epoch in range(start_epoch, opt.max_epoch): model.train() if opt.pruning: compression_scheduler.on_epoch_begin(epoch) # epoch 开始修剪 train_losses.reset() # 重置仪表 train_top1.reset() # 重置仪表 # print('训练数据集大小', len(train_dataloader)) total_samples = len(train_dataloader.sampler) steps_per_epoch = math.ceil(total_samples / opt.batch_size) train_progressor = ProgressBar(mode="Train ", epoch=epoch, total_epoch=opt.max_epoch, model_name=opt.model, lr=lr, total=len(train_dataloader)) lr = lr_scheduler.get_lr()[0] for ii, (data, labels, img_path, tag) in enumerate(train_dataloader): if not check_date(img_path, tag, msglogger): return if opt.pruning: compression_scheduler.on_minibatch_begin( epoch, ii, steps_per_epoch, optimizer) # batch 开始修剪 train_progressor.current = ii + 1 # 训练集当前进度 # train model input = data.to(opt.device) target = labels.to(opt.device) if train_writer: grid = make_grid( (input.data.cpu() * 0.225 + 0.45).clamp(min=0, max=1)) train_writer.add_image('train_images', grid, ii * (epoch + 1)) # 训练图片 score = model(input) # 网络结构返回值 # 计算损失 loss = criterion(score, target) if opt.pruning: # Before running the backward phase, we allow the scheduler to modify the loss # (e.g. add regularization loss) agg_loss = compression_scheduler.before_backward_pass( epoch, ii, steps_per_epoch, loss, optimizer=optimizer, return_loss_components=True) # 模型修建误差 loss = agg_loss.overall_loss train_losses.update(loss.item(), input.size(0)) # loss = criterion(score[0], target) # 计算损失 Inception3网络 optimizer.zero_grad() # 参数梯度设成0 loss.backward() # 反向传播 optimizer.step() # 更新参数 if opt.pruning: compression_scheduler.on_minibatch_end(epoch, ii, steps_per_epoch, optimizer) # batch 结束修剪 precision1_train, precision5_train = accuracy( score, target, topk=(1, 5)) # top1 和 top5 的准确率 # writer.add_graph(model, input) # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2)) # Inception3网络 train_losses.update(loss.item(), input.size(0)) train_top1.update(precision1_train[0].item(), input.size(0)) train_top5.update(precision5_train[0].item(), input.size(0)) train_progressor.current_loss = train_losses.avg train_progressor.current_top1 = train_top1.avg train_progressor.current_top5 = train_top5.avg train_progressor() # 打印进度 if ii % opt.print_freq == 0: if train_writer: train_writer.add_scalar('loss', train_losses.avg, ii * (epoch + 1)) # 训练误差 train_writer.add_text( 'top1', 'train accuracy top1 %s' % train_top1.avg, ii * (epoch + 1)) # top1准确率文本 train_writer.add_scalars( 'accuracy', { 'top1': train_top1.avg, 'top5': train_top5.avg, 'loss': train_losses.avg }, ii * (epoch + 1)) # train_progressor.done() # 保存训练结果为txt # validate and visualize if opt.pruning: distiller.log_weights_sparsity(model, epoch, loggers=[pylogger]) # 打印模型修剪结果 compression_scheduler.on_epoch_end(epoch, optimizer) # epoch 结束修剪 val_loss, val_top1, val_top5 = val(model, criterion, val_dataloader, epoch, value_writer, lr) # 校验模型 sparsity = distiller.model_sparsity(model) perf_scores_history.append( distiller.MutableNamedTuple( { 'sparsity': sparsity, 'top1': val_top1, 'top5': val_top5, 'epoch': epoch + 1, 'lr': lr, 'loss': val_loss }, )) # 保持绩效分数历史记录从最好到最差的排序 # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序 perf_scores_history.sort(key=operator.attrgetter( 'sparsity', 'top1', 'top5', 'epoch'), reverse=True) for score in perf_scores_history[:1]: msglogger.info( '==> Best [Top1: %.3f Top5: %.3f Sparsity: %.2f on epoch: %d Lr: %f Loss: %f]', score.top1, score.top5, score.sparsity, score.epoch, lr, score.loss) best_precision = max(perf_scores_history[0].top1, best_precision) # 最大top1 准确率 is_best = epoch + 1 == perf_scores_history[ 0].epoch # 当前epoch 和最佳epoch 一样 if is_best: model.save({ "epoch": epoch + 1, "model_name": opt.model, "state_dict": model.state_dict(), "best_precision": best_precision, "optimizer": optimizer, "valid_loss": [val_loss, val_top1, val_top5], 'compression_scheduler': compression_scheduler.state_dict(), }) # 保存模型 # update learning rate lr_scheduler.step(epoch) # 更新学习效率 # 如果训练误差比上次大 降低学习效率 # if train_losses.val > previous_loss: # lr = lr * opt.lr_decay # # 当loss大于上一次loss,降低学习率 # for param_group in optimizer.param_groups: # param_group['lr'] = lr # # previous_loss = train_losses.val t.cuda.empty_cache() # 这个命令是清除没用的临时变量的
def main(args): print('==> Using settings {}'.format(args)) device = torch.device("cuda") print('==> Loading dataset...') data_dict = data_preparation(args) print("==> Creating PoseNet model...") model_pos = model_pos_preparation(args, data_dict['dataset'], device) model_pos_eval = model_pos_preparation(args, data_dict['dataset'], device) # used for evaluation only # prepare optimizer for posenet posenet_optimizer = torch.optim.Adam(model_pos.parameters(), lr=args.lr_p) posenet_lr_scheduler = get_scheduler(posenet_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs) print("==> Creating PoseAug model...") poseaug_dict = get_poseaug_model(args, data_dict['dataset']) # loss function criterion = nn.MSELoss(reduction='mean').to(device) # GAN trick: data buffer for fake data fake_3d_sample = Sample_from_Pool() fake_2d_sample = Sample_from_Pool() args.checkpoint = path.join( args.checkpoint, args.posenet_name, args.keypoints, datetime.datetime.now().isoformat() + '_' + args.note) os.makedirs(args.checkpoint, exist_ok=True) print('==> Making checkpoint dir: {}'.format(args.checkpoint)) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), args) logger.record_args(str(model_pos)) logger.set_names([ 'epoch', 'lr', 'error_h36m_p1', 'error_h36m_p2', 'error_3dhp_p1', 'error_3dhp_p2' ]) # Init monitor for net work training ######################################################### summary = Summary(args.checkpoint) writer = summary.create_summary() ########################################################## # start training ########################################################## start_epoch = 0 dhpp1_best = None s911p1_best = None for _ in range(start_epoch, args.epochs): if summary.epoch == 0: # evaluate the pre-train model for epoch 0. h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args, data_dict, model_pos, model_pos_eval, device, summary, writer, tag='_fake') h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args, data_dict, model_pos, model_pos_eval, device, summary, writer, tag='_real') summary.summary_epoch_update() # update train loader dataloader_update(args=args, data_dict=data_dict, device=device) # Train for one epoch train_gan(args, poseaug_dict, data_dict, model_pos, criterion, fake_3d_sample, fake_2d_sample, summary, writer) if summary.epoch > args.warmup: train_posenet(model_pos, data_dict['train_fake2d3d_loader'], posenet_optimizer, criterion, device) h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args, data_dict, model_pos, model_pos_eval, device, summary, writer, tag='_fake') train_posenet(model_pos, data_dict['train_det2d3d_loader'], posenet_optimizer, criterion, device) h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args, data_dict, model_pos, model_pos_eval, device, summary, writer, tag='_real') # Update learning rates ######################## poseaug_dict['scheduler_G'].step() poseaug_dict['scheduler_d3d'].step() poseaug_dict['scheduler_d2d'].step() posenet_lr_scheduler.step() lr_now = posenet_optimizer.param_groups[0]['lr'] print('\nEpoch: %d | LR: %.8f' % (summary.epoch, lr_now)) # Update log file logger.append( [summary.epoch, lr_now, h36m_p1, h36m_p2, dhp_p1, dhp_p2]) # Update checkpoint if dhpp1_best is None or dhpp1_best > dhp_p1: dhpp1_best = dhp_p1 logger.record_args( "==> Saving checkpoint at epoch '{}', with dhp_p1 {}".format( summary.epoch, dhpp1_best)) save_ckpt( { 'epoch': summary.epoch, 'model_pos': model_pos.state_dict() }, args.checkpoint, suffix='best_dhp_p1') if s911p1_best is None or s911p1_best > h36m_p1: s911p1_best = h36m_p1 logger.record_args( "==> Saving checkpoint at epoch '{}', with s911p1 {}".format( summary.epoch, s911p1_best)) save_ckpt( { 'epoch': summary.epoch, 'model_pos': model_pos.state_dict() }, args.checkpoint, suffix='best_h36m_p1') summary.summary_epoch_update() writer.close() logger.close()
def train(self): previous_loss = 1e10 # 上次学习的loss lr = self.opt.lr perf_scores_history = [] pylogger = PythonLogger(msglogger) self.train_load_model() self.load_compress() self.create_write() lr_scheduler = get_scheduler(self.optimizer, opt) for epoch in range(self.start_epoch, self.opt.max_epoch): self.model.train() self.load_data() if self.opt.pruning: self.compression_scheduler.on_epoch_begin(epoch) # epoch 开始修剪 self.train_losses.reset() # 重置仪表 self.train_top1.reset() # 重置仪表 # print('训练数据集大小', len(train_dataloader)) total_samples = len(self.train_dataloader.sampler) steps_per_epoch = math.ceil(total_samples / self.opt.batch_size) train_progressor = ProgressBar(mode="Train ", epoch=epoch, total_epoch=self.opt.max_epoch, model_name=self.opt.model, total=len(self.train_dataloader)) lr = lr_scheduler.get_lr() for ii, (data, labels, img_path) in enumerate(self.train_dataloader): if self.opt.pruning: self.compression_scheduler.on_minibatch_begin( epoch, ii, steps_per_epoch, self.optimizer) # batch 开始修剪 train_progressor.current = ii + 1 # 训练集当前进度 # train model input = data.to(self.opt.device) target = labels.to(self.opt.device) score = self.model(input) # 网络结构返回值 loss = self.criterion(score, target) # 计算损失 if self.opt.pruning: # Before running the backward phase, we allow the scheduler to modify the loss # (e.g. add regularization loss) agg_loss = self.compression_scheduler.before_backward_pass( epoch, ii, steps_per_epoch, loss, optimizer=self.optimizer, return_loss_components=True) # 模型修建误差 loss = agg_loss.overall_loss self.train_losses.update(loss.item(), input.size(0)) # loss = criterion(score[0], target) # 计算损失 Inception3网络 self.optimizer.zero_grad() # 参数梯度设成0 loss.backward() # 反向传播 self.optimizer.step() # 更新参数 if opt.pruning: self.compression_scheduler.on_minibatch_end( epoch, ii, steps_per_epoch, self.optimizer) # batch 结束修剪 precision1_train, precision5_train = accuracy( score, target, topk=(1, 5)) # top1 和 top5 的准确率 # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2)) # Inception3网络 self.train_losses.update(loss.item(), input.size(0)) self.train_top1.update(precision1_train[0].item(), input.size(0)) self.train_top5.update(precision5_train[0].item(), input.size(0)) train_progressor.current_loss = self.train_losses.avg train_progressor.current_top1 = self.train_top1.avg train_progressor.current_top5 = self.train_top5.avg train_progressor() # 打印进度 if (ii + 1) % self.opt.print_freq == 0: self.visualization_train(input, ii, epoch) if self.opt.pruning: distiller.log_weights_sparsity(self.model, epoch, loggers=[pylogger]) # 打印模型修剪结果 self.compression_scheduler.on_epoch_end( epoch, self.optimizer) # epoch 结束修剪 val_loss, val_top1, val_top5 = val(self.model, self.criterion, self.val_dataloader, epoch, self.value_writer) # 校验模型 sparsity = distiller.model_sparsity(self.model) perf_scores_history.append( distiller.MutableNamedTuple( { 'sparsity': sparsity, 'top1': val_top1, 'top5': val_top5, 'epoch': epoch + 1, 'lr': lr, 'loss': val_loss }, )) # 保持绩效分数历史记录从最好到最差的排序 # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序 perf_scores_history.sort(key=operator.attrgetter( 'sparsity', 'top1', 'top5', 'epoch'), reverse=True) for score in perf_scores_history[:1]: msglogger.info( '==> Best [Top1: %.3f Top5: %.3f Sparsity: %.2f on epoch: %d Lr: %f Loss: %f]', score.top1, score.top5, score.sparsity, score.epoch, lr, score.loss) is_best = epoch == perf_scores_history[ 0].epoch # 当前epoch 和最佳epoch 一样 self.best_precision = max(perf_scores_history[0].top1, self.best_precision) # 最大top1 准确率 if is_best: self.train_save_model(epoch, val_loss, val_top1, val_top5) # update learning rate lr = lr_scheduler.get_lr() # # 如果训练误差比上次大 降低学习效率 # if self.train_losses.val > previous_loss: # lr = lr * self.opt.lr_decay # # 当loss大于上一次loss,降低学习率 # for param_group in self.optimizer.param_groups: # param_group['lr'] = lr # # previous_loss = self.train_losses.val t.cuda.empty_cache()
model = get_model(cfg, [l1_cls_num, l2_cls_num], device, logger) if cfg.TRAIN_STAGE == 2: last_stage_weight_path = os.path.join(model_dir, 'best_model_stage1.pth') load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(0) elif cfg.TRAIN_STAGE == 1: last_stage_weight_path = os.path.join(args.pretrained_path) load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(1) # load_pretrained_weight(model, args.pretrained_path) combiner = Combiner(cfg, device) optimizer = get_optimizer(cfg, model) scheduler = get_scheduler(cfg, optimizer) # ----- END MODEL BUILDER ----- trainLoader = DataLoader( train_set, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.PIN_MEMORY, drop_last=True ) validLoader = DataLoader( valid_set, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
def __init__(self, opt): super(DDPModel, self).__init__() self.gpu_ids = opt.gpu_ids self.device = torch.device( 'cuda:{}'.format(opt.gpu_ids[0])) if opt.gpu_ids else torch.device( 'cpu') # get device name: CPU or GPU self.save_dir = os.path.join( opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) print('Directory created: %s' % self.save_dir) # define schedule if opt.beta_schedule == 'cosine': """ Cosine Schedule @inproceedings{ anonymous2021improved, title={Improved Denoising Diffusion Probabilistic Models}, author={Anonymous}, booktitle={Submitted to International Conference on Learning Representations}, year={2021}, url={https://openreview.net/forum?id=-NEXDKk8gZ}, note={under review} } """ s = 0.008 x = np.linspace(0, opt.num_timesteps - 1, opt.num_timesteps - 1) alphas_cumprod = np.cos( ((x / opt.num_timesteps) + s) / (1 + s) * np.pi * 0.5)**2 alphas_cumprod = alphas_cumprod / alphas_cumprod[0] alphas_cumprod[alphas_cumprod > 0.999999] = 0.999999 alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) betas = np.clip(1 - (alphas_cumprod / alphas_cumprod_prev), a_min=0, a_max=0.999) alphas = 1. - betas else: betas = self.get_beta_schedule(opt.beta_schedule, opt.beta_start, opt.beta_end, opt.num_timesteps) alphas = 1. - betas alphas_cumprod = np.cumprod(alphas, axis=0) alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) assert alphas_cumprod_prev.shape == betas.shape assert isinstance( betas, np.ndarray) and (betas >= 0).all() and (betas <= 1).all() timesteps, = betas.shape self.num_timesteps = int(timesteps) self.betas = torch.tensor(betas) self.alphas_cumprod = torch.tensor(alphas_cumprod) self.alphas_cumprod_prev = torch.tensor(alphas_cumprod_prev) # calculations for diffusion q(x_t | x_{t-1}) and others self.sqrt_alphas_cumprod = torch.tensor(np.sqrt(alphas_cumprod), dtype=torch.float32, device=self.device) self.sqrt_one_minus_alphas_cumprod = torch.tensor( np.sqrt(1. - alphas_cumprod), dtype=torch.float32, device=self.device) self.log_one_minus_alphas_cumprod = torch.tensor( np.log(1. - alphas_cumprod), dtype=torch.float32, device=self.device) self.sqrt_recip_alphas_cumprod = torch.tensor(np.sqrt(1. / alphas_cumprod), dtype=torch.float32, device=self.device) self.sqrt_recipm1_alphas_cumprod = torch.tensor( np.sqrt(1. / alphas_cumprod - 1), dtype=torch.float32, device=self.device) # calculations for posterior q(x_{t-1} | x_t, x_0) posterior_variance = betas * (1. - alphas_cumprod_prev) / ( 1. - alphas_cumprod) # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.posterior_variance = torch.tensor(posterior_variance, dtype=torch.float32, device=self.device) # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain self.posterior_log_variance_clipped = torch.tensor(np.log( np.maximum(posterior_variance, 1e-20)), dtype=torch.float32, device=self.device) self.posterior_mean_coef1 = torch.tensor( betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod), dtype=torch.float32, device=self.device) self.posterior_mean_coef2 = torch.tensor( (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod), dtype=torch.float32, device=self.device) # setup denoise model model = [] if opt.block_size != 1: model += [utils.SpaceToDepth(opt.block_size)] model += [ unet.Unet(opt.input_nc, opt.input_nc, num_middles=1, ngf=opt.ngf, norm=opt.norm, activation=opt.activation, use_dropout=opt.dropout, use_attention=opt.attention, device=self.device) ] if opt.block_size != 1: model += [utils.SpaceToDepth(opt.block_size)] self.denoise_model = utils.init_net(nn.Sequential(*model), opt.init_type, opt.init_gain, opt.gpu_ids) if opt.phase == 'train': # setup optimizer, visualizer, and learning rate scheduler self.optimizer = torch.optim.Adam(self.denoise_model.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) if 'mse' in opt.loss_type: self.loss_criteria = nn.MSELoss() elif 'l1' in opt.loss_type: self.loss_criteria = nn.L1Loss() else: raise NotImplementedError(opt.loss_type) # set prediction function if 'noisepred' in opt.loss_type: self.pred_fn = DDPModel._noisepred else: raise NotImplementedError(opt.loss_type) self.loss_type = opt.loss_type self.visualizer = visualizer.Visualizer(opt) self.scheduler = utils.get_scheduler(self.optimizer, opt) self.lr_policy = opt.lr_policy else: self.image_size = (opt.batch_size, opt.input_nc, opt.load_size, opt.load_size) self.denoise_model.train(False) # set prediction function if 'noisepred' in opt.loss_type: self.pred_fn = self.predict_start_from_noise else: raise NotImplementedError(opt.loss_type) if opt.phase == 'interpolate': self.mix_rate = opt.mix_rate