def main(args): device = "cuda" args.distributed = dist.get_world_size() > 1 transforms = video_transforms.Compose([ RandomSelectFrames(16), video_transforms.Resize(args.size), video_transforms.CenterCrop(args.size), volume_transforms.ClipToTensor(), tensor_transforms.Normalize(0.5, 0.5) ]) f = open( '/home/shirakawa/movie/code/iVideoGAN/over16frame_list_training.txt', 'rb') train_file_list = pickle.load(f) print(len(train_file_list)) dataset = MITDataset(train_file_list, transform=transforms) sampler = dist.data_sampler(dataset, shuffle=True, distributed=args.distributed) #loader = DataLoader( # dataset, batch_size=128 // args.n_gpu, sampler=sampler, num_workers=2 #) loader = DataLoader(dataset, batch_size=32 // args.n_gpu, sampler=sampler, num_workers=2) model = VQVAE().to(device) if args.distributed: model = nn.parallel.DistributedDataParallel( model, device_ids=[dist.get_local_rank()], output_device=dist.get_local_rank(), ) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = None if args.sched == "cycle": scheduler = CycleScheduler( optimizer, args.lr, n_iter=len(loader) * args.epoch, momentum=None, warmup_proportion=0.05, ) for i in range(args.epoch): train(i, loader, model, optimizer, scheduler, device) if dist.is_primary(): torch.save(model.state_dict(), f"checkpoint_vid_v2/vqvae_{str(i + 1).zfill(3)}.pt")
def __init__(self, args): # Initialozation self.args = args self.isize = args.isize self.nfr = self.args.nfr self.plist = {'train': args.tr_plist, 'test': args.ts_plist} # set transforms train_transforms = video_transforms.Compose([ video_transforms.Resize( (int(self.isize * 1.1), int(self.isize * 1.1))), video_transforms.RandomRotation(10), video_transforms.RandomCrop((self.args.isize, self.args.isize)), video_transforms.RandomHorizontalFlip(), #video_transforms.ColorJitter(), video_transforms.Resize((self.isize, self.isize)), volume_transforms.ClipToTensor() ]) test_transforms = video_transforms.Compose([ video_transforms.Resize((self.isize, self.isize)), volume_transforms.ClipToTensor() ]) self.transforms = {'train': train_transforms, 'test': test_transforms}
def __init__(self, isize, nfr, path_li, transforms=None): # set self self.isize = isize self.nfr = nfr self.paths = path_li self.transforms = transforms self.mask_transforms = video_transforms.Compose([ video_transforms.Resize((self.isize, self.isize)), volume_transforms.ClipToTensor(channel_nb=1) ]) # Set index self.data_path_li, self.real_path_li, self.mask_path_li = self.path_reader( self.paths) #video path list nframe_li = self.count_frame(self.mask_path_li) #num of frame list div_nfr_li = [i // self.nfr for i in nframe_li] #num of nfrsize list # div_nfr_li -> data index self.total_div_nfr = div_nfr_li for i in range(len(div_nfr_li)): if i != 0: self.total_div_nfr[i] += self.total_div_nfr[i - 1]
def __init__(self, root, dataset, is_train, is_transform=True, img_size=112): self.root = root self.dataset = dataset self.is_train = is_train self.is_transform = is_transform self.nframes = 0 self.video_transform_one = video_transforms.Compose([ video_transforms.RandomRotation(10), video_transforms.RandomCrop((112, 112)), video_transforms.RandomHorizontalFlip(), volume_transforms.ClipToTensor(), video_transforms.Normalize((0.4339, 0.4046, 0.3776), (0.151987, 0.14855, 0.1569)) ]) ######################################################################### # load the image feature and image name ########################################################################## if self.is_train: if self.dataset == 'MITS': vid_list = open('./MITS_split/MITS.lst') self.nframes = 90 elif self.dataset == 'KITS': vid_list = open('./KITS_split/KITS.lst') self.nframes = 160 else: print('no such dataset') return vid_list = list(vid_list) self.data_list = [] for line in vid_list: line = line.strip('\r\n') # print(line) self.data_list.append(line)
def video_to_flow(video): # video tensor(-1, 1) to ndarray(0, 255) norm_video = [normalize(v) for v in video.permute(2, 0, 1, 3, 4)] # (D, B, C, W, H) norm_video = torch.stack(norm_video).permute(1, 0, 3, 4, 2) # (B, D, W, H, C) nd_video = norm_video.cpu().numpy() transform = video_transforms.Compose([volume_transforms.ClipToTensor()]) # calc optical flow flow_videos = [] for v in nd_video: flow_imgs = [] for i, img in enumerate(v): next_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) #next_img = img if i == 0: hsv_mask = np.zeros_like(img) hsv_mask[:, :, 1] = 255 else: flow = cv2.calcOpticalFlowFarneback(prv_img, next_img, None, 0.5, 3, 15, 3, 5, 1.2, 0) mag, ang = cv2.cartToPolar(flow[:, :, 0], flow[:, :, 1], angleInDegrees=True) hsv_mask[:, :, 0] = ang / 2 hsv_mask[:, :, 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) rgb = cv2.cvtColor(hsv_mask, cv2.COLOR_HSV2RGB) #cv2.imwrite("/mnt/fs2/2018/ohshiro/opt_flow_.png", rgb) rgb = Image.fromarray(np.uint8(rgb)) flow_imgs.append(rgb) prv_img = next_img flow_imgs.append(rgb) flow_imgs = transform(flow_imgs) flow_videos.append(flow_imgs) return torch.stack(flow_videos) * 2 - 1
def run_training(opt): # Index of sequence item to leave out for validation leave_out_idx = opt.leave_out scale_size = (256, 342) crop_size = (224, 224) if opt.use_heatmaps: channel_nb = opt.heatmap_nb elif opt.use_flow: channel_nb = 2 else: channel_nb = 3 # Initialize transforms if not opt.use_heatmaps: base_transform_list = [ video_transforms.Scale(crop_size), volume_transforms.ClipToTensor(channel_nb=channel_nb) ] video_transform_list = [ video_transforms.Scale(scale_size), video_transforms.RandomCrop(crop_size), volume_transforms.ClipToTensor(channel_nb=channel_nb) ] else: base_transform_list = [volume_transforms.ToTensor()] video_transform_list = [ tensor_transforms.SpatialRandomCrop(crop_size), volume_transforms.ToTensor() ] base_transform = video_transforms.Compose(base_transform_list) video_transform = video_transforms.Compose(video_transform_list) # Initialize datasets leave_out_idx = opt.leave_out # Initialize dataset if opt.dataset == 'gteagazeplus': all_subjects = [ 'Ahmad', 'Alireza', 'Carlos', 'Rahul', 'Yin', 'Shaghayegh' ] train_seqs, valid_seqs = evaluation.leave_one_out( all_subjects, leave_out_idx) dataset = GTEAGazePlus( root_folder='data/GTEAGazePlusdata2', # TODO remove flow_type=opt.flow_type, full_res=True, heatmaps=opt.use_heatmaps, heatmap_size=scale_size, original_labels=True, rescale_flows=opt.rescale_flows, seqs=train_seqs, use_flow=opt.use_flow) val_dataset = GTEAGazePlus( root_folder='data/GTEAGazePlusdata2', # TODO remove flow_type=opt.flow_type, full_res=True, heatmaps=opt.use_heatmaps, heatmap_size=scale_size, original_labels=True, rescale_flows=opt.rescale_flows, seqs=valid_seqs, use_flow=opt.use_flow) elif opt.dataset == 'smthg': dataset = smthg.Smthg(flow_type=opt.flow_type, rescale_flows=opt.rescale_flows, use_flow=opt.use_flow, split='train') val_dataset = smthg.Smthg(flow_type=opt.flow_type, rescale_flows=opt.rescale_flows, split='valid', use_flow=opt.use_flow) elif opt.dataset == 'smthgv2': dataset = smthgv2.SmthgV2(use_flow=opt.use_flow, split='train', rescale_flows=opt.rescale_flows) val_dataset = smthgv2.SmthgV2(split='valid', use_flow=opt.use_flow, rescale_flows=opt.rescale_flows) else: raise ValueError('the opt.dataset name provided {0} is not handled' 'by this script'.format(opt.dataset)) action_dataset = ActionDataset(dataset, base_transform=base_transform, clip_size=opt.clip_size, transform=video_transform) val_action_dataset = ActionDataset(val_dataset, base_transform=base_transform, clip_size=opt.clip_size, transform=video_transform) # Initialize sampler if opt.weighted_training: weights = [1 / k for k in dataset.class_counts] sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(action_dataset)) else: sampler = torch.utils.data.sampler.RandomSampler(action_dataset) # Initialize dataloaders dataloader = torch.utils.data.DataLoader(action_dataset, sampler=sampler, batch_size=opt.batch_size, num_workers=opt.threads) val_dataloader = torch.utils.data.DataLoader(val_action_dataset, shuffle=False, batch_size=opt.batch_size, num_workers=opt.threads) # Initialize C3D neural network if opt.network == 'c3d': c3dnet = c3d.C3D() if opt.pretrained: c3dnet.load_state_dict(torch.load('data/c3d.pickle')) model = c3d_adapt.C3DAdapt(opt, c3dnet, action_dataset.class_nb, in_channels=channel_nb) elif opt.network == 'i3d': if opt.use_flow: i3dnet = i3d.I3D(class_nb=400, modality='flow', dropout_rate=0.5) if opt.pretrained: i3dnet.load_state_dict(torch.load('data/i3d_flow.pth')) model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb) else: # Loads RGB weights and then adapts network i3dnet = i3d.I3D(class_nb=400, modality='rgb', dropout_rate=0.5) if opt.pretrained: i3dnet.load_state_dict(torch.load('data/i3d_rgb.pth')) model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb, in_channels=channel_nb) elif opt.network == 'i3dense': densenet = torchvision.models.densenet121(pretrained=True) i3densenet = i3dense.I3DenseNet(copy.deepcopy(densenet), opt.clip_size, inflate_block_convs=True) model = i3dense_adapt.I3DenseAdapt(opt, i3densenet, action_dataset.class_nb, channel_nb=channel_nb) elif opt.network == 'i3res': resnet = torchvision.models.resnet50(pretrained=True) i3resnet = i3res.I3ResNet(resnet, frame_nb=opt.clip_size) model = i3res_adapt.I3ResAdapt(opt, i3resnet, class_nb=action_dataset.class_nb, channel_nb=channel_nb) else: raise ValueError( 'network should be in [i3res|i3dense|i3d|c3d but got {}]').format( opt.network) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.net.parameters(), lr=opt.lr, momentum=opt.momentum) model.set_criterion(criterion) model.set_optimizer(optimizer) if opt.plateau_scheduler and opt.continue_training: raise ValueError('Plateau scheduler and continue training ' 'are incompatible for now') if opt.plateau_scheduler: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=opt.plateau_factor, patience=opt.plateau_patience, threshold=opt.plateau_thresh, threshold_mode='rel') model.set_lr_scheduler(scheduler) # Use multiple GPUS if opt.gpu_parallel: available_gpus = torch.cuda.device_count() device_ids = list(range(opt.gpu_nb)) print('Using {} out of {} available GPUs'.format( len(device_ids), available_gpus)) model.net = torch.nn.DataParallel(model.net, device_ids=device_ids) # Load existing weights, opt.continue_training is epoch to load if opt.continue_training: if opt.continue_epoch == 0: model.net.eval() model.load(latest=True) else: model.load(epoch=opt.continue_epoch) # New learning rate for SGD TODO add momentum update model.update_optimizer(lr=opt.lr, momentum=opt.momentum) train.train_net(dataloader, model, opt, valid_dataloader=val_dataloader, visualize=opt.visualize, test_aggreg=opt.test_aggreg)
def test(): step = 0 # Data load transforms = video_transforms.Compose([ video_transforms.Resize((args.isize, args.isize)), volume_transforms.ClipToTensor() ]) dataset = MdfDataLoader(args.isize, args.nfr, args.test_data_path, transforms) dataloader = torch.utils.data.DataLoader( dataset=dataset, batch_size = args.batchsize, drop_last = True, shuffle=False, num_workers=8 ) model_list = [line.rstrip() for line in open(args.test_model_list_path)] with torch.no_grad(): for m_i, m in enumerate(model_list): print("\n {}".format(m)) model, name = load_model(m) save_root = os.path.join(SAVEROOT, "iamges", name) if not os.path.exists(save_root): os.makedirs(save_root) print(save_root) gts = [] predicts = [] pbar = tqdm(dataloader, leave=True, ncols=100, total=len(dataloader)) for i, data in enumerate(pbar): input, real, gt, lb = (d.to('cuda') for d in data) predict = model(input) t_pre = threshold(predict) m_pre = morphology_proc(t_pre) gts.append(gt.permute(0,2,3,4,1).cpu().numpy()) predicts.append(predict.permute(0,2,3,4,1).cpu().numpy()) # -- SAVE IMAGE -- """ grid = torch.cat([normalize(input), normalize(real), gray2rgb(gt), gray2rgb(predict), gray2rgb(t_pre), gray2rgb(m_pre)], dim=3) for image in grid.permute(0,2,1,3,4): save_image(image, os.path.join(save_root, "%06d.png" % (step)), nrow=args.nfr) step += 1 pbar.set_description("[TEST %d/%d]" % (m_i+1, len(model_list))) """ gts = np.asarray(np.stack(gts), dtype=np.int32).flatten() predicts = np.asarray(np.stack(predicts)).flatten() if args.metric == 'roc': score = roc(gts, predicts, name) elif args.metric == 'pr': score = pr(gts, predicts, name) f1 = evaluate(gts, predicts, metric='f1_score') print("%s / %s == %f" % (m, args.metric, score)) print("%s / f1 == %f" % (m, f1)) plt.savefig(os.path.join(SAVEROOT, "%s_curve.png" % (args.metric)))
def run_testing(opt): scale_size = (256, 342) crop_size = (224, 224) if opt.use_heatmaps: channel_nb = opt.heatmap_nb elif opt.use_flow: channel_nb = 2 else: channel_nb = 3 # Initialize transforms if not opt.use_heatmaps: base_transform_list = [ video_transforms.Scale(crop_size), volume_transforms.ClipToTensor(channel_nb=channel_nb) ] video_transform_list = [ video_transforms.Scale(scale_size), video_transforms.RandomCrop(crop_size), volume_transforms.ClipToTensor(channel_nb=channel_nb) ] else: base_transform_list = [volume_transforms.ToTensor()] video_transform_list = [ tensor_transforms.SpatialRandomCrop(crop_size), volume_transforms.ToTensor() ] base_transform = video_transforms.Compose(base_transform_list) video_transform = video_transforms.Compose(video_transform_list) if opt.dataset == 'smthg': dataset = smthg.Smthg(flow_type=opt.flow_type, rescale_flows=opt.rescale_flows, split=opt.split, use_flow=opt.use_flow) elif opt.dataset == 'gteagazeplus': all_subjects = [ 'Ahmad', 'Alireza', 'Carlos', 'Rahul', 'Yin', 'Shaghayegh' ] train_seqs, valid_seqs = evaluation.leave_one_out( all_subjects, opt.leave_out) dataset = GTEAGazePlus(flow_type=opt.flow_type, heatmaps=opt.use_heatmaps, heatmap_size=crop_size, rescale_flows=opt.rescale_flows, seqs=valid_seqs, use_flow=opt.use_flow) action_dataset = ActionDataset(dataset, base_transform=base_transform, clip_size=opt.clip_size, transform=video_transform, test=True) assert opt.batch_size == 1, 'During testing batch size should be 1 bug got {}'.format( opt.batch_size) val_dataloader = torch.utils.data.DataLoader(action_dataset, shuffle=False, batch_size=opt.batch_size, num_workers=opt.threads) # Initialize C3D neural network if opt.network == 'c3d': c3dnet = c3d.C3D() model = c3d_adapt.C3DAdapt(opt, c3dnet, action_dataset.class_nb, in_channels=channel_nb) elif opt.network == 'i3d': if opt.use_flow: i3dnet = i3d.I3D(class_nb=400, modality='flow', dropout_rate=0.5) model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb) else: i3dnet = i3d.I3D(class_nb=400, modality='rgb', dropout_rate=0.5) model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb, in_channels=channel_nb) elif opt.network == 'i3dense': densenet = torchvision.models.densenet121(pretrained=True) i3densenet = i3dense.I3DenseNet(copy.deepcopy(densenet), opt.clip_size, inflate_block_convs=True) model = i3dense_adapt.I3DenseAdapt(opt, i3densenet, action_dataset.class_nb, channel_nb=channel_nb) elif opt.network == 'i3res': resnet = torchvision.models.resnet50(pretrained=True) i3resnet = i3res.I3ResNet(resnet, frame_nb=opt.clip_size) model = i3res_adapt.I3ResAdapt(opt, i3resnet, class_nb=action_dataset.class_nb, channel_nb=channel_nb) else: raise ValueError( 'network should be in [i3res|i3dense|i3d|c3d but got {}]').format( opt.network) optimizer = torch.optim.SGD(model.net.parameters(), lr=1) model.set_optimizer(optimizer) # Use multiple GPUS if opt.gpu_parallel: available_gpus = torch.cuda.device_count() device_ids = list(range(opt.gpu_nb)) print('Using {} out of {} available GPUs'.format( len(device_ids), available_gpus)) model.net = torch.nn.DataParallel(model.net, device_ids=device_ids) # Load existing weights model.net.eval() if opt.use_gpu: model.net.cuda() model.load(load_path=opt.checkpoint_path) accuracy = test.test(val_dataloader, model, opt=opt, save_predictions=True) print('Computed accuracy: {}'.format(accuracy))