'weight_decay': 5e-4, 'momentum': 0.95, 'snapshot': '', 'pretrain': os.path.join(ckpt_path, 'VideoSaliency_2019-12-24 22:05:11', '50000.pth'), # 'pretrain': '', 'imgs_file': 'Pre-train/pretrain_all_seq_DUT_TR_DAFB2_DAVSOD2.txt', # 'imgs_file': 'video_saliency/train_all_DAFB2_DAVSOD_5f.txt', 'train_loader': 'both' # 'train_loader': 'video_sequence' } imgs_file = os.path.join(datasets_root, args['imgs_file']) # imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt') joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(520), joint_transforms.RandomCrop(473), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) # joint_seq_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(520), # joint_transforms.RandomCrop(473) # ]) input_size = (473, 473) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
'imgs_file2': 'Pre-train/pretrain_all_seq_DUT_TR_DAFB2.txt', # 'imgs_file': 'video_saliency/train_all_DAFB2_DAVSOD_5f.txt', # 'train_loader': 'video_image' 'train_loader': 'flow_image3', # 'train_loader': 'video_sequence' 'image_size': 430, 'crop_size': 380, 'self_distill': 0.1, 'teacher_distill': 0.6 } imgs_file = os.path.join(datasets_root, args['imgs_file']) # imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt') joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(args['image_size']), joint_transforms.RandomCrop(args['crop_size']), # joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) # joint_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(290), # joint_transforms.RandomCrop(256), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) # ]) # joint_seq_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(520),
def train_online(net, seq_name='breakdance'): online_args = { 'iter_num': 100, 'train_batch_size': 5, 'lr': 1e-8, 'lr_decay': 0.95, 'weight_decay': 5e-4, 'momentum': 0.95, } joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(473), # joint_transforms.RandomCrop(473), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) ]) target_transform = transforms.ToTensor() train_set = VideoFirstImageFolder(to_test['davis'], gt_root, seq_name, online_args['train_batch_size'], joint_transform, img_transform, target_transform) online_train_loader = DataLoader( train_set, batch_size=online_args['train_batch_size'], num_workers=1, shuffle=False) optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' ], 'lr': 2 * online_args['lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' ], 'lr': online_args['lr'], 'weight_decay': online_args['weight_decay'] }], momentum=online_args['momentum']) criterion = nn.BCEWithLogitsLoss().cuda() net.train().cuda() fix_parameters(net.named_parameters()) for curr_iter in range(0, online_args['iter_num']): total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter( ), AvgMeter() loss2_record, loss3_record, loss4_record = AvgMeter(), AvgMeter( ), AvgMeter() for i, data in enumerate(online_train_loader): optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] optimizer.param_groups[1]['lr'] = online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs0, outputs1, outputs2, outputs3, outputs4 = net(inputs) loss0 = criterion(outputs0, labels) loss1 = criterion(outputs1, labels.narrow(0, 1, 4)) loss2 = criterion(outputs2, labels.narrow(0, 2, 3)) loss3 = criterion(outputs3, labels.narrow(0, 3, 2)) loss4 = criterion(outputs4, labels.narrow(0, 4, 1)) total_loss = loss0 + loss1 + loss2 + loss3 + loss4 total_loss.backward() optimizer.step() total_loss_record.update(total_loss.data, batch_size) loss0_record.update(loss0.data, batch_size) loss1_record.update(loss1.data, batch_size) loss2_record.update(loss2.data, batch_size) loss3_record.update(loss3.data, batch_size) loss4_record.update(loss4.data, batch_size) log = '[iter %d], [total loss %.5f], [loss0 %.5f], [loss1 %.5f], [loss2 %.5f], [loss3 %.5f], ' \ '[loss4 %.5f], [lr %.13f]' % \ (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg, loss3_record.avg, loss4_record.avg, optimizer.param_groups[1]['lr']) print(log) return net
def train_online(net, seq_name='breakdance'): online_args = { 'iter_num': 100, 'train_batch_size': 1, 'lr': 1e-10, 'lr_decay': 0.95, 'weight_decay': 5e-4, 'momentum': 0.95, } joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(380), # joint_transforms.RandomCrop(473), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) ]) target_transform = transforms.ToTensor() # train_set = VideoFSImageFolder(to_test['davis'], seq_name, use_first=True, joint_transform=joint_transform, transform=img_transform) train_set = VideoFirstImageFolder(to_test['davis'], gt_root, seq_name, joint_transform=joint_transform, transform=img_transform, target_transform=target_transform) online_train_loader = DataLoader( train_set, batch_size=online_args['train_batch_size'], num_workers=1, shuffle=False) # criterion = nn.MSELoss().cuda() criterion = nn.BCEWithLogitsLoss().cuda() erosion = Erosion2d(1, 1, 5, soft_max=False).cuda() net.train() net.cuda() # fix_parameters(net.named_parameters()) optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' ], 'lr': 2 * online_args['lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' ], 'lr': online_args['lr'], 'weight_decay': online_args['weight_decay'] }], momentum=online_args['momentum']) for curr_iter in range(0, online_args['iter_num']): total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter( ), AvgMeter() loss2_record = AvgMeter() for i, data in enumerate(online_train_loader): optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] optimizer.param_groups[1]['lr'] = online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() if args['model'] == 'BASNet': total_loss, loss0, loss1, loss2 = train_BASNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'R3Net': total_loss, loss0, loss1, loss2 = train_R3Net( net, inputs, criterion, erosion, labels) elif args['model'] == 'DSSNet': total_loss, loss0, loss1, loss2 = train_DSSNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'CPD': total_loss, loss0, loss1, loss2 = train_CPD( net, inputs, criterion, erosion, labels) elif args['model'] == 'RAS': total_loss, loss0, loss1, loss2 = train_RAS( net, inputs, criterion, erosion, labels) elif args['model'] == 'PoolNet': total_loss, loss0, loss1, loss2 = train_PoolNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'F3Net': total_loss, loss0, loss1, loss2 = train_F3Net( net, inputs, criterion, erosion, labels) elif args['model'] == 'R2Net': total_loss, loss0, loss1, loss2 = train_R2Net( net, inputs, criterion, erosion, labels) total_loss.backward() optimizer.step() total_loss_record.update(total_loss.data, batch_size) loss0_record.update(loss0.data, batch_size) loss1_record.update(loss1.data, batch_size) loss2_record.update(loss2.data, batch_size) # loss3_record.update(loss3.data, batch_size) # loss4_record.update(loss4.data, batch_size) log = '[iter %d], [total loss %.5f], [loss0 %.8f], [loss1 %.8f], [loss2 %.8f], [lr %.13f]' % \ (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg, optimizer.param_groups[1]['lr']) print(log) print('taking snapshot ...') torch.save( net.state_dict(), os.path.join(ckpt_path, exp_name, str(args['snapshot']) + '_' + seq_name + '_online.pth')) # torch.save(optimizer.state_dict(), # os.path.join(ckpt_path, exp_name, '%d_optim.pth' % curr_iter)) return net
img, gt = flip(img, gt, flip_p) img, gt = rotate(img, gt, degree_random) return img, gt def __len__(self): return len(self.imgs) if __name__ == '__main__': from torchvision import transforms import joint_transforms from torch.utils.data import DataLoader from config import msra10k_path, video_seq_path, video_seq_gt_path, video_train_path import numpy as np joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(250), joint_transforms.RandomCrop(200), # joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) joint_seq_transform = joint_transforms.Compose([ joint_transforms.ImageResize(250), joint_transforms.RandomCrop(200) ]) img_transform = transforms.Compose([ # transforms.ColorJitter(hue=[-0.1, 0.1]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])