def load_model(): t_path = os.path.join('./ckpt', 'pre_train_t_net', 'model/ckpt_lastest.pth') m_path = os.path.join('./ckpt', 'pre_train_m_net', 'model/ckpt_lastest.pth') assert os.path.isfile(t_path), 'Wrong model path: {}'.format(m_path) print('Loading model ...') t_model = network.net_T() m_model = network.net_M() t_model.load_state_dict(torch.load(t_path)['state_dict']) m_model.load_state_dict(torch.load(m_path)['state_dict']) t_model.eval() m_model.eval() t_model.to(device) m_model.to(device) return t_model, m_model
def main(): # 定义参数 print("=============> Loading args") args = get_args() # 设置cpu/gpu print("============> Environment init") if args.without_gpu: print("use CPU !") device = torch.device('cpu') else: if torch.cuda.is_available(): device = torch.device('cuda') else: print("No GPU is is available !") # 构建网络模型 print("============> Building model ...") if args.train_phase == 'pre_train_t_net': model = network.net_T() elif args.train_phase == 'pre_train_m_net': model = network.net_M() model.apply(weight_init) elif args.train_phase == 'end_to_end': model = network.net_F() if args.pretrain: model = Train_Log.load_pretrain(model) else: raise ValueError('Wrong train phase request!') model.to(device) # 构建datasets print("============> Loading datasets ...") train_data = dataset.human_matting_data(args) trainloader = DataLoader(train_data, batch_size=args.train_batch, drop_last=True, shuffle=True, num_workers=args.nThreads, pin_memory=True) # debug setting save_latest_freq = int(len(train_data) // args.train_batch * 0.55) if args.debug: args.save_epoch = 1 args.train_batch = 1 # defualt debug: 1 args.nEpochs = 1 args.print_iter = 1 save_latest_freq = 10 # set optimizer print("============> Set optimizer ...") lr = args.lr optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), \ lr=lr, betas=(0.9, 0.999), weight_decay=0.0005) # set train print("============> Start Train ! ...") start_epoch = 1 trainlog = Train_Log(args) if args.continue_train: start_epoch, model = trainlog.load_model(model) model.train() for epoch in range(start_epoch, args.nEpochs + 1): loss_ = 0 L_alpha_ = 0 L_composition_ = 0 L_cross_ = 0 if args.lrdecayType != 'keep': lr = set_lr(args, epoch, optimizer) t0 = time.time() for i, sample_batched in enumerate(trainloader): optimizer.zero_grad() # pretrain t_net if args.train_phase == 'pre_train_t_net': img, trimap_gt = sample_batched['image'], sample_batched[ 'trimap'] img, trimap_gt = img.to(device), trimap_gt.to(device) trimap_pre = model(img) if args.debug: #debug only assert tuple(trimap_pre.shape) == (args.train_batch, 3, args.patch_size, args.patch_size) assert tuple(trimap_gt.shape) == (args.train_batch, 1, args.patch_size, args.patch_size) loss = loss_f_T(trimap_pre, trimap_gt) loss_ += loss.item() if i != 0 and i % args.print_iter == 0: save_img(args, (img, trimap_pre, trimap_gt), epoch, i) print("[epoch:{} iter:{}] \tloss: {:.5f}".format( epoch, i, loss)) if i != 0 and i % save_latest_freq == 0: print("average loss: {:.5f}\nsaving model ....".format( loss_ / (i + 1))) trainlog.save_model(model, epoch) # pretrain m_net elif args.train_phase == 'pre_train_m_net': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) alpha_pre = model(img, trimap_gt) if args.debug: assert tuple(alpha_pre.shape) == (args.train_batch, 1, args.patch_size, args.patch_size) img_dir = os.path.join(args.saveDir, args.train_phase, 'save_img') img_fg_bg = np.concatenate((torch2numpy( trimap_gt[0]), torch2numpy(fg[0]), torch2numpy(bg[0])), axis=-1) img_fg_bg = np.transpose(img_fg_bg, (1, 2, 0)) cv2.imwrite( img_dir + '/fgbg_{}_{}.png'.format(str(epoch), str(i)), img_fg_bg) loss, L_alpha, L_composition = loss_f_M( img, alpha_pre, alpha_gt, bg, fg, trimap_gt) loss_ += loss.item() L_alpha_ += L_alpha.item() L_composition_ += L_composition.item() if i != 0 and i % args.print_iter == 0: save_img(args, (img, alpha_pre, alpha_gt), epoch, i) print("[epoch:{} iter:{}] loss: {:.5f} loss_a: {:.5f} loss_c: {:.5f}"\ .format(epoch, i, loss, L_alpha, L_composition)) if i != 0 and i % save_latest_freq == 0: print("average loss: {:.5f}\nsaving model ....".format( loss_ / (i + 1))) trainlog.save_model(model, epoch) # end_to_end train elif args.train_phase == 'end_to_end': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) trimap_pre, alpha_pre = model(img) loss, L_alpha, L_composition, L_cross = loss_function( img, trimap_pre, trimap_gt, alpha_pre, alpha_gt, bg) loss_ += loss.item() L_alpha_ += L_alpha.item() L_composition_ += L_composition.item() L_cross_ += L_cross.item() loss.backward() optimizer.step() # shuffle data after each epoch to recreate the dataset print('epoch end, shuffle datasets again ...') train_data.shuffle_data() #trainloader.dataset.shuffle_data() t1 = time.time() if args.train_phase == 'pre_train_t_net': loss_ = loss_ / (i + 1) log = "[{} / {}] \tloss: {:.5f}\ttime: {:.0f}".format( epoch, args.nEpochs, loss_, t1 - t0) elif args.train_phase == 'pre_train_m_net': loss_ = loss_ / (i + 1) L_alpha_ = L_alpha_ / (i + 1) L_composition_ = L_composition_ / (i + 1) log = "[{} / {}] loss: {:.5f} loss_a: {:.5f} loss_c: {:.5f} time: {:.0f}"\ .format(epoch, args.nEpochs, loss_, L_alpha_, L_composition_, t1 - t0) elif args.train_phase == 'end_to_end': loss_ = loss_ / (i + 1) L_alpha_ = L_alpha_ / (i + 1) L_composition_ = L_composition_ / (i + 1) L_cross_ = L_cross_ / (i + 1) log = "[{} / {}] loss: {:.5f} loss_a: {:.5f} loss_c: {:.5f} loss_t: {:.5f} time: {:.0f}"\ .format(epoch, args.nEpochs, loss_, L_alpha_, L_composition_, L_cross_, t1 - t0) print(log) trainlog.save_log(log) trainlog.save_model(model, epoch) if epoch % args.save_epoch == 0: trainlog.save_model(model, epoch, save_as=True)
def main(): args = get_args() if args.without_gpu: print("use CPU !") device = torch.device('cpu') else: if torch.cuda.is_available(): device = torch.device('cuda') else: print("No GPU is is available !") print("============> Building model ...") trainlog = Train_Log(args) if args.train_phase == 'pre_train_t_net': model = network.net_T() elif args.train_phase == 'pre_train_m_net': model = network.net_M() model.apply(weight_init) elif args.train_phase == 'end_to_end': model = network.net_F() if args.pretrain: model = trainlog.load_pretrain(model) else: raise ValueError('Wrong train phase request!') train_data = dataset.human_matting_data(args, split='train') val_data = dataset.human_matting_data(args, split='val') model.to(device) print(args) print("============> Loading datasets ...") trainloader = DataLoader(train_data, batch_size=args.train_batch, drop_last=True, shuffle=True, num_workers=args.train_nThreads, pin_memory=True) valloader = DataLoader(val_data, batch_size=args.val_batch, drop_last=True, shuffle=True, num_workers=args.val_nThreads, pin_memory=True) print("============> Set optimizer ...") lr = args.lr optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), \ lr=lr, betas=(0.9, 0.999), weight_decay=0.0005) print("============> Start Train ! ...") start_epoch = 1 if args.continue_train: start_epoch, model, optimizer = trainlog.load_model(model, optimizer) model.train() for epoch in range(start_epoch, args.nEpochs + 1): train_loss_ = 0 train_L_alpha_ = 0 train_L_composition_ = 0 train_L_cross_ = 0 train_SAD_ = 0 train_MSE_ = 0 train_Gradient_ = 0 train_Connectivity_ = 0 val_loss_ = 0 val_L_alpha_ = 0 val_L_composition_ = 0 val_L_cross_ = 0 val_SAD_ = 0 val_MSE_ = 0 val_Gradient_ = 0 val_Connectivity_ = 0 if args.lrdecayType != 'keep': lr = set_lr(args, epoch, optimizer) t0 = time.time() for i, sample_batched in enumerate(trainloader): optimizer.zero_grad() if args.train_phase == 'pre_train_t_net': img, trimap_gt = sample_batched['image'], sample_batched[ 'trimap'] img, trimap_gt = img.to(device), trimap_gt.to(device) trimap_pre = model(img) if args.debug: #debug only assert tuple(trimap_pre.shape) == (args.train_batch, 3, args.patch_size, args.patch_size) assert tuple(trimap_gt.shape) == (args.train_batch, 1, args.patch_size, args.patch_size) train_loss = loss_f_T(trimap_pre, trimap_gt) train_loss_ += train_loss.item() elif args.train_phase == 'pre_train_m_net': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) alpha_pre = model(img, trimap_gt) train_loss, train_L_alpha, train_L_composition = loss_f_M( img, alpha_pre, alpha_gt, bg, fg, trimap_gt) train_loss_ += train_loss.item() train_L_alpha_ += train_L_alpha.item() train_L_composition_ += train_L_composition.item() alpha_pre = alpha_pre[:, 0, :, :].cpu().detach().numpy() alpha_gt = alpha_gt[:, 0, :, :].cpu().detach().numpy() SAD, MSE, Gradient, Connectivity = matting_measure( alpha_pre, alpha_gt) train_SAD_ += SAD train_MSE_ += MSE train_Gradient_ += Gradient train_Connectivity_ += Connectivity elif args.train_phase == 'end_to_end': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) trimap_pre, alpha_pre, img, alpha_gt, bg, fg = model({ 'img': img, 'alpha_g': alpha_gt, 'back': bg, 'front': fg }) train_loss, train_L_alpha, train_L_composition, train_L_cross = loss_function( img, trimap_pre, trimap_gt, alpha_pre, alpha_gt, bg, fg) train_loss_ += train_loss.item() train_L_alpha_ += train_L_alpha.item() train_L_composition_ += train_L_composition.item() train_L_cross_ += train_L_cross.item() alpha_pre = alpha_pre[:, 0, :, :].cpu().detach().numpy() alpha_gt = alpha_gt[:, 0, :, :].cpu().detach().numpy() SAD, MSE, Gradient, Connectivity = matting_measure( alpha_pre, alpha_gt) train_SAD_ += SAD train_MSE_ += MSE train_Gradient_ += Gradient train_Connectivity_ += Connectivity train_loss.backward() optimizer.step() for j, sample_batched in enumerate(valloader): if args.train_phase == 'pre_train_t_net': img, trimap_gt = sample_batched['image'], sample_batched[ 'trimap'] img, trimap_gt = img.to(device), trimap_gt.to(device) trimap_pre = model(img) if args.debug: #debug only assert tuple(trimap_pre.shape) == (args.train_batch, 3, args.patch_size, args.patch_size) assert tuple(trimap_gt.shape) == (args.train_batch, 1, args.patch_size, args.patch_size) val_loss = loss_f_T(trimap_pre, trimap_gt) val_loss_ += val_loss.item() elif args.train_phase == 'pre_train_m_net': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) alpha_pre = model(img, trimap_gt) val_loss, val_L_alpha, val_L_composition = loss_f_M( img, alpha_pre, alpha_gt, bg, fg, trimap_gt) val_loss_ += val_loss.item() val_L_alpha_ += val_L_alpha.item() val_L_composition_ += val_L_composition.item() alpha_pre = alpha_pre[:, 0, :, :].cpu().detach().numpy() alpha_gt = alpha_gt[:, 0, :, :].cpu().detach().numpy() SAD, MSE, Gradient, Connectivity = matting_measure( alpha_pre, alpha_gt) val_SAD_ += SAD val_MSE_ += MSE val_Gradient_ += Gradient val_Connectivity_ += Connectivity elif args.train_phase == 'end_to_end': img, trimap_gt, alpha_gt, bg, fg = sample_batched[ 'image'], sample_batched['trimap'], sample_batched[ 'alpha'], sample_batched['bg'], sample_batched['fg'] img, trimap_gt, alpha_gt, bg, fg = img.to( device), trimap_gt.to(device), alpha_gt.to(device), bg.to( device), fg.to(device) trimap_pre, alpha_pre, img, alpha_gt, bg, fg = model({ 'img': img, 'alpha_g': alpha_gt, 'back': bg, 'front': fg }) val_loss, val_L_alpha, val_L_composition, val_L_cross = loss_function( img, trimap_pre, trimap_gt, alpha_pre, alpha_gt, bg, fg) val_loss_ += val_loss.item() val_L_alpha_ += val_L_alpha.item() val_L_composition_ += val_L_composition.item() val_L_cross_ += val_L_cross.item() alpha_pre = alpha_pre[:, 0, :, :].cpu().detach().numpy() alpha_gt = alpha_gt[:, 0, :, :].cpu().detach().numpy() SAD, MSE, Gradient, Connectivity = matting_measure( alpha_pre, alpha_gt) val_SAD_ += SAD val_MSE_ += MSE val_Gradient_ += Gradient val_Connectivity_ += Connectivity # shuffle data after each epoch to recreate the dataset print('epoch end, shuffle datasets again ...') #trainloader.dataset.shuffle_data() t1 = time.time() if args.train_phase == 'pre_train_t_net': train_loss_ = train_loss_ / (i + 1) val_loss_ = val_loss_ / (j + 1) trainlog.writer.add_scalar('train_loss', train_loss_, epoch) trainlog.writer.add_scalar('val_loss', val_loss_, epoch) log = "[{} / {}]\ttime: {:.0f}\ttrain_loss: {:.5f}\tval_loss: {:.5f}" \ .format(epoch, args.nEpochs, t1-t0, train_loss_, val_loss_) elif args.train_phase == 'pre_train_m_net': train_loss_ = train_loss_ / (i + 1) train_L_alpha_ = train_L_alpha_ / (i + 1) train_L_composition_ = train_L_composition_ / (i + 1) train_SAD_ = train_SAD_ / (i + 1) train_MSE_ = train_MSE_ / (i + 1) train_Gradient_ = train_Gradient_ / (i + 1) train_Connectivity_ = train_Connectivity_ / (i + 1) val_loss_ = val_loss_ / (j + 1) val_L_alpha_ = val_L_alpha_ / (j + 1) val_L_composition_ = val_L_composition_ / (j + 1) val_SAD_ = val_SAD_ / (j + 1) val_MSE_ = val_MSE_ / (j + 1) val_Gradient_ = val_Gradient_ / (j + 1) val_Connectivity_ = val_Connectivity_ / (j + 1) trainlog.writer.add_scalar('train_loss', train_loss_, epoch) trainlog.writer.add_scalar('train_loss_a', train_L_alpha_, epoch) trainlog.writer.add_scalar('train_loss_c', train_L_composition_, epoch) trainlog.writer.add_scalar('train_SAD', train_SAD_, epoch) trainlog.writer.add_scalar('train_MSE', train_MSE_, epoch) trainlog.writer.add_scalar('train_Gradient', train_Gradient_, epoch) trainlog.writer.add_scalar('train_Connectivity', train_Connectivity_, epoch) trainlog.writer.add_scalar('val_loss', val_loss_, epoch) trainlog.writer.add_scalar('val_loss_a', val_L_alpha_, epoch) trainlog.writer.add_scalar('val_loss_c', val_L_composition_, epoch) trainlog.writer.add_scalar('val_SAD', val_SAD_, epoch) trainlog.writer.add_scalar('val_MSE', val_MSE_, epoch) trainlog.writer.add_scalar('val_Gradient', val_Gradient_, epoch) trainlog.writer.add_scalar('val_Connectivity', val_Connectivity_, epoch) log = "[{} / {}]\ttime: {:.0f}\ttrain_loss: {:.5f}\ttrain_loss_a: {:.5f}\ttrain_loss_c: {:.5f}\n \ train_SAD: {:.5f}\ttrain_MSE: {:.5f}\ttrain_Gradient: {:.5f}\ttrain_Connectivity: {:.5f}\n \ val_loss: {:.5f}\tval_loss_a: {:.5f}\tval_loss_c: {:.5f}\n \ val_SAD: {:.5f}\tval_MSE: {:.5f}\tval_Gradient: {:.5f}\tval_Connectivity: {:.5f}" \ .format(epoch, args.nEpochs, t1 - t0, \ train_loss_, train_L_alpha_, train_L_composition_, \ train_SAD_, train_MSE_, train_Gradient_, train_Connectivity_, \ val_loss_, val_L_alpha_, val_L_composition_, \ val_SAD_, val_MSE_, val_Gradient_, val_Connectivity_) elif args.train_phase == 'end_to_end': train_loss_ = train_loss_ / (i + 1) train_L_alpha_ = train_L_alpha_ / (i + 1) train_L_composition_ = train_L_composition_ / (i + 1) train_L_cross_ = train_L_cross_ / (i + 1) train_SAD_ = train_SAD_ / (i + 1) train_MSE_ = train_MSE_ / (i + 1) train_Gradient_ = train_Gradient_ / (i + 1) train_Connectivity_ = train_Connectivity_ / (i + 1) val_loss_ = val_loss_ / (j + 1) val_L_alpha_ = val_L_alpha_ / (j + 1) val_L_composition_ = val_L_composition_ / (j + 1) val_L_cross_ = val_L_cross_ / (j + 1) val_SAD_ = val_SAD_ / (j + 1) val_MSE_ = val_MSE_ / (j + 1) val_Gradient_ = val_Gradient_ / (j + 1) val_Connectivity_ = val_Connectivity_ / (j + 1) trainlog.writer.add_scalar('train_loss', train_loss_, epoch) trainlog.writer.add_scalar('train_loss_a', train_L_alpha_, epoch) trainlog.writer.add_scalar('train_loss_c', train_L_composition_, epoch) trainlog.writer.add_scalar('train_loss_t', train_L_cross_, epoch) trainlog.writer.add_scalar('train_SAD', train_SAD_, epoch) trainlog.writer.add_scalar('train_MSE', train_MSE_, epoch) trainlog.writer.add_scalar('train_Gradient', train_Gradient_, epoch) trainlog.writer.add_scalar('train_Connectivity', train_Connectivity_, epoch) trainlog.writer.add_scalar('val_loss', val_loss_, epoch) trainlog.writer.add_scalar('val_loss_a', val_L_alpha_, epoch) trainlog.writer.add_scalar('val_loss_c', val_L_composition_, epoch) trainlog.writer.add_scalar('val_loss_t', val_L_cross_, epoch) trainlog.writer.add_scalar('val_SAD', val_SAD_, epoch) trainlog.writer.add_scalar('val_MSE', val_MSE_, epoch) trainlog.writer.add_scalar('val_Gradient', val_Gradient_, epoch) trainlog.writer.add_scalar('val_Connectivity', val_Connectivity_, epoch) log = "[{} / {}]\ttime: {:.0f}\ttrain_loss: {:.5f}\ttrain_loss_a: {:.5f}\ttrain_loss_c: {:.5f}\ttrain_loss_t: {:.5f}\n \ train_SAD: {:.5f}\ttrain_MSE: {:.5f}\ttrain_Gradient: {:.5f}\ttrain_Connectivity: {:.5f}\n \ val_loss: {:.5f}\tval_loss_a: {:.5f}\tval_loss_c: {:.5f}\tval_loss_t: {:.5f}\n \ val_SAD: {:.5f}\tval_MSE: {:.5f}\tval_Gradient: {:.5f}\tval_Connectivity: {:.5f}" \ .format(epoch, args.nEpochs, t1 - t0, \ train_loss_, train_L_alpha_, train_L_composition_, train_L_cross_, \ train_SAD_, train_MSE_, train_Gradient_, train_Connectivity_, \ val_loss_, val_L_alpha_, val_L_composition_, val_L_cross_,\ val_SAD_, val_MSE_, val_Gradient_, val_Connectivity_) print(log) trainlog.save_log(log) trainlog.save_model(model, optimizer, epoch, val_loss_)