def __init__(self, device=None, jit=True): self.device = device self.jit = jit self.opt = Namespace( **{ 'n_blocks1': 7, 'n_blocks2': 3, 'batch_size': 1, 'resolution': 512, 'name': 'Real_fixed' }) scriptdir = os.path.dirname(os.path.realpath(__file__)) csv_file = "Video_data_train_processed.csv" with open("Video_data_train.csv", "r") as r: with open(csv_file, "w") as w: w.write(r.read().format(scriptdir=scriptdir)) data_config_train = { 'reso': (self.opt.resolution, self.opt.resolution) } traindata = VideoData(csv_file=csv_file, data_config=data_config_train, transform=None) self.train_loader = torch.utils.data.DataLoader( traindata, batch_size=self.opt.batch_size, shuffle=True, num_workers=self.opt.batch_size, collate_fn=_collate_filter_none) netB = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=self.opt.n_blocks1, n_blocks2=self.opt.n_blocks2) if self.device == 'cuda': netB.cuda() netB.eval() for param in netB.parameters(): # freeze netB param.requires_grad = False self.netB = netB netG = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=self.opt.n_blocks1, n_blocks2=self.opt.n_blocks2) netG.apply(conv_init) self.netG = netG if self.device == 'cuda': self.netG.cuda() # TODO(asuhan): is this needed? torch.backends.cudnn.benchmark = True netD = MultiscaleDiscriminator(input_nc=3, num_D=1, norm_layer=nn.InstanceNorm2d, ndf=64) netD.apply(conv_init) netD = nn.DataParallel(netD) self.netD = netD if self.device == 'cuda': self.netD.cuda() self.l1_loss = alpha_loss() self.c_loss = compose_loss() self.g_loss = alpha_gradient_loss() self.GAN_loss = GANloss() self.optimizerG = optim.Adam(netG.parameters(), lr=1e-4) self.optimizerD = optim.Adam(netD.parameters(), lr=1e-5) self.log_writer = SummaryWriter(scriptdir) self.model_dir = scriptdir self._maybe_trace()
batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size, collate_fn=collate_filter_none) print('\n[Phase 2] : Initialization') netB = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1, n_blocks2=args.n_blocks2) netB = nn.DataParallel(netB) netB.load_state_dict(torch.load(args.init_model)) netB.cuda() netB.eval() for param in netB.parameters(): # freeze netD param.requires_grad = False netG = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1, n_blocks2=args.n_blocks2) netG.apply(conv_init) netG = nn.DataParallel(netG) netG.cuda() torch.backends.cudnn.benchmark = True netD = MultiscaleDiscriminator(input_nc=3, num_D=1, norm_layer=nn.InstanceNorm2d, ndf=64)
batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size, collate_fn=collate_filter_none) print('\n[Phase 2] : Initialization') netB = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1_t, n_blocks2=args.n_blocks2_t) netB = nn.DataParallel(netB) netB.load_state_dict(torch.load(args.init_model)) netB.cuda() netB.eval() for param in netB.parameters(): # freeze netB param.requires_grad = False netG = ResnetConditionHR_mo(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1_s, n_blocks2=args.n_blocks2_s) netG.apply(conv_init) netG = nn.DataParallel(netG) netG.cuda() torch.backends.cudnn.benchmark = True netD = MultiscaleDiscriminator(input_nc=3, num_D=1, norm_layer=nn.InstanceNorm2d, ndf=64)
output_nc=4, n_blocks1=7, n_blocks2=3, norm_layer=nn.BatchNorm2d) net.apply(conv_init) net = nn.DataParallel(net) # net.load_state_dict(torch.load(model_dir + 'net_epoch_X')) #uncomment this if you are initializing your model net.cuda() torch.backends.cudnn.benchmark = True # Loss l1_loss = alpha_loss() c_loss = compose_loss() g_loss = alpha_gradient_loss() optimizer = optim.Adam(net.parameters(), lr=1e-4) # optimizer.load_state_dict(torch.load(model_dir + 'optim_epoch_X')) #uncomment this if you are initializing your model log_writer = SummaryWriter(tb_dir) print('Starting Training') step = 50 # steps to visualize training images in tensorboard KK = len(train_loader) for epoch in range(0, args.epoch): net.train() netL, alL, fgL, fg_cL, al_fg_cL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0
def main(): # CUDA # os.environ["CUDA_VISIBLE_DEVICES"]="4" # print('CUDA Device: ' + os.environ["CUDA_VISIBLE_DEVICES"]) print(f'Is CUDA available: {torch.cuda.is_available()}') """Parses arguments.""" parser = argparse.ArgumentParser( description='Training Background Matting on Adobe Dataset') parser.add_argument('-n', '--name', type=str, help='Name of tensorboard and model saving folders') parser.add_argument('-bs', '--batch_size', type=int, help='Batch Size') parser.add_argument('-res', '--reso', type=int, help='Input image resolution') parser.add_argument('-init_model', '--init_model', type=str, help='Initial model file') parser.add_argument('-w', '--workers', type=int, default=None, help='Number of worker to load data') parser.add_argument('-ep', '--epochs', type=int, default=15, help='Maximum Epoch') parser.add_argument( '-n_blocks1', '--n_blocks1', type=int, default=7, help='Number of residual blocks after Context Switching') parser.add_argument('-n_blocks2', '--n_blocks2', type=int, default=3, help='Number of residual blocks for Fg and alpha each') args = parser.parse_args() if args.workers is None: args.workers = args.batch_size ##Directories tb_dir = f'tb_summary/{args.name}' model_dir = f'models/{args.name}' if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(tb_dir): os.makedirs(tb_dir) ## Input list data_config_train = { 'reso': (args.reso, args.reso) } # if trimap is true, rcnn is used # DATA LOADING print('\n[Phase 1] : Data Preparation') # Original Data traindata = VideoData( csv_file='Video_data_train.csv', data_config=data_config_train, transform=None ) # Write a dataloader function that can read the database provided by .csv file train_loader = DataLoader(traindata, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=collate_filter_none) print('\n[Phase 2] : Initialization') netB = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1, n_blocks2=args.n_blocks2) netB = nn.DataParallel(netB) netB.load_state_dict(torch.load(args.init_model)) netB.cuda() netB.eval() for param in netB.parameters(): # freeze netB param.requires_grad = False netG = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=args.n_blocks1, n_blocks2=args.n_blocks2) netG.apply(conv_init) netG = nn.DataParallel(netG) netG.cuda() torch.backends.cudnn.benchmark = True netD = MultiscaleDiscriminator(input_nc=3, num_D=1, norm_layer=nn.InstanceNorm2d, ndf=64) netD.apply(conv_init) netD = nn.DataParallel(netD) netD.cuda() # Loss l1_loss = alpha_loss() c_loss = compose_loss() g_loss = alpha_gradient_loss() GAN_loss = GANloss() optimizerG = Adam(netG.parameters(), lr=1e-4) optimizerD = Adam(netD.parameters(), lr=1e-5) log_writer = SummaryWriter(tb_dir) print('Starting Training') step = 50 KK = len(train_loader) wt = 1 for epoch in range(0, args.epochs): netG.train() netD.train() lG, lD, GenL, DisL_r, DisL_f, alL, fgL, compL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 t0 = get_time() for i, data in enumerate(train_loader): # Initiating bg = data['bg'].cuda() image = data['image'].cuda() seg = data['seg'].cuda() multi_fr = data['multi_fr'].cuda() seg_gt = data['seg-gt'].cuda() back_rnd = data['back-rnd'].cuda() mask0 = torch.ones(seg.shape).cuda() tr0 = get_time() # pseudo-supervision alpha_pred_sup, fg_pred_sup = netB(image, bg, seg, multi_fr) mask = (alpha_pred_sup > -0.98).type(torch.FloatTensor).cuda() mask1 = (seg_gt > 0.95).type(torch.FloatTensor).cuda() ## Train Generator alpha_pred, fg_pred = netG(image, bg, seg, multi_fr) ##pseudo-supervised losses al_loss = l1_loss( alpha_pred_sup, alpha_pred, mask0) + 0.5 * g_loss(alpha_pred_sup, alpha_pred, mask0) fg_loss = l1_loss(fg_pred_sup, fg_pred, mask) # compose into same background comp_loss = c_loss(image, alpha_pred, fg_pred, bg, mask1) # randomly permute the background perm = torch.LongTensor(np.random.permutation(bg.shape[0])) bg_sh = bg[perm, :, :, :] al_mask = (alpha_pred > 0.95).type(torch.FloatTensor).cuda() # Choose the target background for composition # back_rnd: contains separate set of background videos captured # bg_sh: contains randomly permuted captured background from the same minibatch if np.random.random_sample() > 0.5: bg_sh = back_rnd image_sh = compose_image_withshift( alpha_pred, image * al_mask + fg_pred * (1 - al_mask), bg_sh, seg) fake_response = netD(image_sh) loss_ganG = GAN_loss(fake_response, label_type=True) lossG = loss_ganG + wt * (0.05 * comp_loss + 0.05 * al_loss + 0.05 * fg_loss) optimizerG.zero_grad() lossG.backward() optimizerG.step() # Train Discriminator fake_response = netD(image_sh) real_response = netD(image) loss_ganD_fake = GAN_loss(fake_response, label_type=False) loss_ganD_real = GAN_loss(real_response, label_type=True) lossD = (loss_ganD_real + loss_ganD_fake) * 0.5 # Update discriminator for every 5 generator update if i % 5 == 0: optimizerD.zero_grad() lossD.backward() optimizerD.step() lG += lossG.data lD += lossD.data GenL += loss_ganG.data DisL_r += loss_ganD_real.data DisL_f += loss_ganD_fake.data alL += al_loss.data fgL += fg_loss.data compL += comp_loss.data log_writer.add_scalar('Generator Loss', lossG.data, epoch * KK + i + 1) log_writer.add_scalar('Discriminator Loss', lossD.data, epoch * KK + i + 1) log_writer.add_scalar('Generator Loss: Fake', loss_ganG.data, epoch * KK + i + 1) log_writer.add_scalar('Discriminator Loss: Real', loss_ganD_real.data, epoch * KK + i + 1) log_writer.add_scalar('Discriminator Loss: Fake', loss_ganD_fake.data, epoch * KK + i + 1) log_writer.add_scalar('Generator Loss: Alpha', al_loss.data, epoch * KK + i + 1) log_writer.add_scalar('Generator Loss: Fg', fg_loss.data, epoch * KK + i + 1) log_writer.add_scalar('Generator Loss: Comp', comp_loss.data, epoch * KK + i + 1) t1 = get_time() elapse += t1 - t0 elapse_run += t1 - tr0 t0 = t1 if i % step == (step - 1): print(f'[{epoch + 1}, {i + 1:5d}] ' f'Gen-loss: {lG / step:.4f} ' f'Disc-loss: {lD / step:.4f} ' f'Alpha-loss: {alL / step:.4f} ' f'Fg-loss: {fgL / step:.4f} ' f'Comp-loss: {compL / step:.4f} ' f'Time-all: {elapse / step:.4f} ' f'Time-fwbw: {elapse_run / step:.4f}') lG, lD, GenL, DisL_r, DisL_f, alL, fgL, compL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 write_tb_log(image, 'image', log_writer, i) write_tb_log(seg, 'seg', log_writer, i) write_tb_log(alpha_pred_sup, 'alpha-sup', log_writer, i) write_tb_log(alpha_pred, 'alpha_pred', log_writer, i) write_tb_log(fg_pred_sup * mask, 'fg-pred-sup', log_writer, i) write_tb_log(fg_pred * mask, 'fg_pred', log_writer, i) # composition alpha_pred = (alpha_pred + 1) / 2 comp = fg_pred * alpha_pred + (1 - alpha_pred) * bg write_tb_log(comp, 'composite-same', log_writer, i) write_tb_log(image_sh, 'composite-diff', log_writer, i) del comp del bg, image, seg, multi_fr, seg_gt, back_rnd del mask0, alpha_pred_sup, fg_pred_sup, mask, mask1 del alpha_pred, fg_pred, al_loss, fg_loss, comp_loss del bg_sh, image_sh, fake_response, real_response del lossG, lossD, loss_ganD_real, loss_ganD_fake, loss_ganG if epoch % 2 == 0: ep = epoch + 1 torch.save(netG.state_dict(), f'{model_dir}/netG_epoch_{ep}.pth') torch.save(optimizerG.state_dict(), f'{model_dir}/optimG_epoch_{ep}.pth') torch.save(netD.state_dict(), f'{model_dir}/netD_epoch_{ep}.pth') torch.save(optimizerD.state_dict(), f'{model_dir}/optimD_epoch_{ep}.pth') # Change weight every 2 epoch to put more stress on discriminator weight and less on pseudo-supervision wt = wt / 2
def main(): # CUDA # os.environ["CUDA_VISIBLE_DEVICES"]="4" # print('CUDA Device: ' + os.environ["CUDA_VISIBLE_DEVICES"]) print(f'Is CUDA available: {torch.cuda.is_available()}') """Parses arguments.""" parser = argparse.ArgumentParser( description='Training Background Matting on Adobe Dataset') parser.add_argument('-n', '--name', type=str, help='Name of tensorboard and model saving folders') parser.add_argument('-bs', '--batch_size', type=int, help='Batch Size') parser.add_argument('-res', '--reso', type=int, help='Input image resolution') parser.add_argument( '-cont', '--continue', action='store_true', help= 'Indicates to run the continue training using the latest saved model') parser.add_argument('-w', '--workers', type=int, default=None, help='Number of worker to load data') parser.add_argument('-ep', '--epochs', type=int, default=60, help='Maximum Epoch') parser.add_argument( '-n_blocks1', '--n_blocks1', type=int, default=7, help='Number of residual blocks after Context Switching') parser.add_argument('-n_blocks2', '--n_blocks2', type=int, default=3, help='Number of residual blocks for Fg and alpha each') args = parser.parse_args() if args.workers is None: args.workers = args.batch_size continue_training = getattr(args, 'continue') # Directories tb_dir = f'tb_summary/{args.name}' model_dir = f'models/{args.name}' if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(tb_dir): os.makedirs(tb_dir) # Input list data_config_train = { 'reso': [args.reso, args.reso], 'trimapK': [5, 5], 'noise': True } # choice for data loading parameters # DATA LOADING print('\n[Phase 1] : Data Preparation') # Original Data traindata = AdobeDataAffineHR( csv_file='Data_adobe/Adobe_train_data.csv', data_config=data_config_train, transform=None ) # Write a dataloader function that can read the database provided by .csv file train_loader = DataLoader(traindata, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=collate_filter_none) print('\n[Phase 2] : Initialization') # Find latest saved model model, optim = '', '' start_epoch = 0 if continue_training: for name in os.listdir(model_dir): if name.endswith('.pth') and name.startswith('net_epoch_'): ep = int(name[len('net_epoch_'):-4]) if ep > start_epoch: start_epoch = ep model = name if model: model = f'{model_dir}/{model}' optim = f'{model_dir}/optim_epoch_{start_epoch}.pth' else: continue_training = False net = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=7, n_blocks2=3, norm_layer=nn.BatchNorm2d) net.apply(conv_init) net = nn.DataParallel(net) if continue_training: net.load_state_dict(torch.load(model)) net.cuda() torch.backends.cudnn.benchmark = True # Loss l1_loss = alpha_loss() c_loss = compose_loss() g_loss = alpha_gradient_loss() optimizer = Adam(net.parameters(), lr=1e-4) if continue_training: optimizer.load_state_dict(torch.load(optim)) log_writer = SummaryWriter(tb_dir) print('Starting Training') step = 50 # steps to visualize training images in tensorboard KK = len(train_loader) for epoch in range(start_epoch, args.epochs): net.train() netL, alL, fgL, fg_cL, al_fg_cL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0 t0 = get_time() testL = 0 ct_tst = 0 for i, data in enumerate(train_loader): # Initiating fg = data['fg'].cuda() bg = data['bg'].cuda() alpha = data['alpha'].cuda() image = data['image'].cuda() bg_tr = data['bg_tr'].cuda() seg = data['seg'].cuda() multi_fr = data['multi_fr'].cuda() mask = (alpha > -0.99).type(torch.FloatTensor).cuda() mask0 = torch.ones(alpha.shape).cuda() tr0 = get_time() alpha_pred, fg_pred = net(image, bg_tr, seg, multi_fr) ## Put needed loss here al_loss = l1_loss(alpha, alpha_pred, mask0) fg_loss = l1_loss(fg, fg_pred, mask) al_mask = (alpha_pred > 0.95).type(torch.FloatTensor).cuda() fg_pred_c = image * al_mask + fg_pred * (1 - al_mask) fg_c_loss = c_loss(image, alpha_pred, fg_pred_c, bg, mask0) al_fg_c_loss = g_loss(alpha, alpha_pred, mask0) loss = al_loss + 2 * fg_loss + fg_c_loss + al_fg_c_loss optimizer.zero_grad() loss.backward() optimizer.step() netL += loss.data alL += al_loss.data fgL += fg_loss.data fg_cL += fg_c_loss.data al_fg_cL += al_fg_c_loss.data log_writer.add_scalar('training_loss', loss.data, epoch * KK + i + 1) log_writer.add_scalar('alpha_loss', al_loss.data, epoch * KK + i + 1) log_writer.add_scalar('fg_loss', fg_loss.data, epoch * KK + i + 1) log_writer.add_scalar('comp_loss', fg_c_loss.data, epoch * KK + i + 1) log_writer.add_scalar('alpha_gradient_loss', al_fg_c_loss.data, epoch * KK + i + 1) t1 = get_time() elapse += t1 - t0 elapse_run += t1 - tr0 t0 = t1 testL += loss.data ct_tst += 1 if i % step == (step - 1): print(f'[{epoch + 1}, {i + 1:5d}] ' f'Total-loss: {netL / step:.4f} ' f'Alpha-loss: {alL / step:.4f} ' f'Fg-loss: {fgL / step:.4f} ' f'Comp-loss: {fg_cL / step:.4f} ' f'Alpha-gradient-loss: {al_fg_cL / step:.4f} ' f'Time-all: {elapse / step:.4f} ' f'Time-fwbw: {elapse_run / step:.4f}') netL, alL, fgL, fg_cL, al_fg_cL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0 write_tb_log(image, 'image', log_writer, i) write_tb_log(seg, 'seg', log_writer, i) write_tb_log(alpha, 'alpha', log_writer, i) write_tb_log(alpha_pred, 'alpha_pred', log_writer, i) write_tb_log(fg * mask, 'fg', log_writer, i) write_tb_log(fg_pred * mask, 'fg_pred', log_writer, i) write_tb_log(multi_fr[0:4, 0, ...].unsqueeze(1), 'multi_fr', log_writer, i) # composition alpha_pred = (alpha_pred + 1) / 2 comp = fg_pred * alpha_pred + (1 - alpha_pred) * bg write_tb_log(comp, 'composite', log_writer, i) del comp del fg, bg, alpha, image, alpha_pred, fg_pred, bg_tr, seg, multi_fr # Saving torch.save(net.state_dict(), f'{model_dir}/net_epoch_{epoch + 1}.pth') torch.save(optimizer.state_dict(), f'{model_dir}/optim_epoch_{epoch + 1}.pth')
def __init__(self, device=None, jit=True): self.device = device self.jit = jit self.opt = Namespace( **{ 'n_blocks1': 7, 'n_blocks2': 3, 'batch_size': 1, 'resolution': 512, 'name': 'Real_fixed' }) data_config_train = { 'reso': (self.opt.resolution, self.opt.resolution) } traindata = VideoData(csv_file='Video_data_train.csv', data_config=data_config_train, transform=None) self.train_loader = torch.utils.data.DataLoader( traindata, batch_size=self.opt.batch_size, shuffle=True, num_workers=self.opt.batch_size, collate_fn=_collate_filter_none) netB = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=self.opt.n_blocks1, n_blocks2=self.opt.n_blocks2) if self.device == 'cuda': netB.cuda() netB.eval() for param in netB.parameters(): # freeze netB param.requires_grad = False self.netB = netB netG = ResnetConditionHR(input_nc=(3, 3, 1, 4), output_nc=4, n_blocks1=self.opt.n_blocks1, n_blocks2=self.opt.n_blocks2) netG.apply(conv_init) self.netG = netG if self.device == 'cuda': self.netG.cuda() # TODO(asuhan): is this needed? torch.backends.cudnn.benchmark = True netD = MultiscaleDiscriminator(input_nc=3, num_D=1, norm_layer=nn.InstanceNorm2d, ndf=64) netD.apply(conv_init) netD = nn.DataParallel(netD) self.netD = netD if self.device == 'cuda': self.netD.cuda() self.l1_loss = alpha_loss() self.c_loss = compose_loss() self.g_loss = alpha_gradient_loss() self.GAN_loss = GANloss() self.optimizerG = optim.Adam(netG.parameters(), lr=1e-4) self.optimizerD = optim.Adam(netD.parameters(), lr=1e-5) tb_dir = '/home/circleci/project/benchmark/models/Background-Matting/TB_Summary/' + self.opt.name if not os.path.exists(tb_dir): os.makedirs(tb_dir) self.log_writer = SummaryWriter(tb_dir) self.model_dir = '/home/circleci/project/benchmark/models/Background-Matting/Models/' + self.opt.name if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self._maybe_trace()