예제 #1
0
def main(args):
    device = "cuda"

    args.distributed = dist.get_world_size() > 1

    transforms = video_transforms.Compose([
        RandomSelectFrames(16),
        video_transforms.Resize(args.size),
        video_transforms.CenterCrop(args.size),
        volume_transforms.ClipToTensor(),
        tensor_transforms.Normalize(0.5, 0.5)
    ])

    f = open(
        '/home/shirakawa/movie/code/iVideoGAN/over16frame_list_training.txt',
        'rb')
    train_file_list = pickle.load(f)
    print(len(train_file_list))

    dataset = MITDataset(train_file_list, transform=transforms)
    sampler = dist.data_sampler(dataset,
                                shuffle=True,
                                distributed=args.distributed)
    #loader = DataLoader(
    #    dataset, batch_size=128 // args.n_gpu, sampler=sampler, num_workers=2
    #)
    loader = DataLoader(dataset,
                        batch_size=32 // args.n_gpu,
                        sampler=sampler,
                        num_workers=2)

    model = VQVAE().to(device)

    if args.distributed:
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[dist.get_local_rank()],
            output_device=dist.get_local_rank(),
        )

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = None
    if args.sched == "cycle":
        scheduler = CycleScheduler(
            optimizer,
            args.lr,
            n_iter=len(loader) * args.epoch,
            momentum=None,
            warmup_proportion=0.05,
        )

    for i in range(args.epoch):
        train(i, loader, model, optimizer, scheduler, device)

        if dist.is_primary():
            torch.save(model.state_dict(),
                       f"checkpoint_vid_v2/vqvae_{str(i + 1).zfill(3)}.pt")
예제 #2
0
    def __init__(self, args):
        # Initialozation
        self.args = args
        self.isize = args.isize
        self.nfr = self.args.nfr
        self.plist = {'train': args.tr_plist, 'test': args.ts_plist}

        # set transforms
        train_transforms = video_transforms.Compose([
            video_transforms.Resize(
                (int(self.isize * 1.1), int(self.isize * 1.1))),
            video_transforms.RandomRotation(10),
            video_transforms.RandomCrop((self.args.isize, self.args.isize)),
            video_transforms.RandomHorizontalFlip(),
            #video_transforms.ColorJitter(),
            video_transforms.Resize((self.isize, self.isize)),
            volume_transforms.ClipToTensor()
        ])
        test_transforms = video_transforms.Compose([
            video_transforms.Resize((self.isize, self.isize)),
            volume_transforms.ClipToTensor()
        ])
        self.transforms = {'train': train_transforms, 'test': test_transforms}
예제 #3
0
    def __init__(self, isize, nfr, path_li, transforms=None):
        # set self
        self.isize = isize
        self.nfr = nfr
        self.paths = path_li
        self.transforms = transforms
        self.mask_transforms = video_transforms.Compose([
            video_transforms.Resize((self.isize, self.isize)),
            volume_transforms.ClipToTensor(channel_nb=1)
        ])

        # Set index
        self.data_path_li, self.real_path_li, self.mask_path_li = self.path_reader(
            self.paths)  #video path list
        nframe_li = self.count_frame(self.mask_path_li)  #num of frame list
        div_nfr_li = [i // self.nfr for i in nframe_li]  #num of nfrsize list
        # div_nfr_li -> data index
        self.total_div_nfr = div_nfr_li
        for i in range(len(div_nfr_li)):
            if i != 0: self.total_div_nfr[i] += self.total_div_nfr[i - 1]
예제 #4
0
    def __init__(self,
                 root,
                 dataset,
                 is_train,
                 is_transform=True,
                 img_size=112):
        self.root = root
        self.dataset = dataset
        self.is_train = is_train
        self.is_transform = is_transform
        self.nframes = 0
        self.video_transform_one = video_transforms.Compose([
            video_transforms.RandomRotation(10),
            video_transforms.RandomCrop((112, 112)),
            video_transforms.RandomHorizontalFlip(),
            volume_transforms.ClipToTensor(),
            video_transforms.Normalize((0.4339, 0.4046, 0.3776),
                                       (0.151987, 0.14855, 0.1569))
        ])

        #########################################################################
        # load the image feature and image name
        ##########################################################################
        if self.is_train:
            if self.dataset == 'MITS':
                vid_list = open('./MITS_split/MITS.lst')
                self.nframes = 90
            elif self.dataset == 'KITS':
                vid_list = open('./KITS_split/KITS.lst')
                self.nframes = 160
            else:
                print('no such dataset')
                return
            vid_list = list(vid_list)

            self.data_list = []

            for line in vid_list:
                line = line.strip('\r\n')
                # print(line)
                self.data_list.append(line)
예제 #5
0
def video_to_flow(video):
    # video tensor(-1, 1) to ndarray(0, 255)
    norm_video = [normalize(v)
                  for v in video.permute(2, 0, 1, 3, 4)]  # (D, B, C, W, H)
    norm_video = torch.stack(norm_video).permute(1, 0, 3, 4,
                                                 2)  # (B, D, W, H, C)
    nd_video = norm_video.cpu().numpy()

    transform = video_transforms.Compose([volume_transforms.ClipToTensor()])
    # calc optical flow
    flow_videos = []
    for v in nd_video:
        flow_imgs = []
        for i, img in enumerate(v):
            next_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            #next_img = img
            if i == 0:
                hsv_mask = np.zeros_like(img)
                hsv_mask[:, :, 1] = 255
            else:
                flow = cv2.calcOpticalFlowFarneback(prv_img, next_img, None,
                                                    0.5, 3, 15, 3, 5, 1.2, 0)
                mag, ang = cv2.cartToPolar(flow[:, :, 0],
                                           flow[:, :, 1],
                                           angleInDegrees=True)
                hsv_mask[:, :, 0] = ang / 2
                hsv_mask[:, :, 2] = cv2.normalize(mag, None, 0, 255,
                                                  cv2.NORM_MINMAX)

                rgb = cv2.cvtColor(hsv_mask, cv2.COLOR_HSV2RGB)
                #cv2.imwrite("/mnt/fs2/2018/ohshiro/opt_flow_.png", rgb)
                rgb = Image.fromarray(np.uint8(rgb))
                flow_imgs.append(rgb)
            prv_img = next_img
        flow_imgs.append(rgb)
        flow_imgs = transform(flow_imgs)
        flow_videos.append(flow_imgs)

    return torch.stack(flow_videos) * 2 - 1
예제 #6
0
def run_training(opt):
    # Index of sequence item to leave out for validation
    leave_out_idx = opt.leave_out

    scale_size = (256, 342)
    crop_size = (224, 224)
    if opt.use_heatmaps:
        channel_nb = opt.heatmap_nb
    elif opt.use_flow:
        channel_nb = 2
    else:
        channel_nb = 3

    # Initialize transforms
    if not opt.use_heatmaps:
        base_transform_list = [
            video_transforms.Scale(crop_size),
            volume_transforms.ClipToTensor(channel_nb=channel_nb)
        ]
        video_transform_list = [
            video_transforms.Scale(scale_size),
            video_transforms.RandomCrop(crop_size),
            volume_transforms.ClipToTensor(channel_nb=channel_nb)
        ]
    else:
        base_transform_list = [volume_transforms.ToTensor()]
        video_transform_list = [
            tensor_transforms.SpatialRandomCrop(crop_size),
            volume_transforms.ToTensor()
        ]
    base_transform = video_transforms.Compose(base_transform_list)
    video_transform = video_transforms.Compose(video_transform_list)

    # Initialize datasets
    leave_out_idx = opt.leave_out

    # Initialize dataset
    if opt.dataset == 'gteagazeplus':
        all_subjects = [
            'Ahmad', 'Alireza', 'Carlos', 'Rahul', 'Yin', 'Shaghayegh'
        ]
        train_seqs, valid_seqs = evaluation.leave_one_out(
            all_subjects, leave_out_idx)
        dataset = GTEAGazePlus(
            root_folder='data/GTEAGazePlusdata2',  # TODO remove
            flow_type=opt.flow_type,
            full_res=True,
            heatmaps=opt.use_heatmaps,
            heatmap_size=scale_size,
            original_labels=True,
            rescale_flows=opt.rescale_flows,
            seqs=train_seqs,
            use_flow=opt.use_flow)
        val_dataset = GTEAGazePlus(
            root_folder='data/GTEAGazePlusdata2',  # TODO remove
            flow_type=opt.flow_type,
            full_res=True,
            heatmaps=opt.use_heatmaps,
            heatmap_size=scale_size,
            original_labels=True,
            rescale_flows=opt.rescale_flows,
            seqs=valid_seqs,
            use_flow=opt.use_flow)
    elif opt.dataset == 'smthg':
        dataset = smthg.Smthg(flow_type=opt.flow_type,
                              rescale_flows=opt.rescale_flows,
                              use_flow=opt.use_flow,
                              split='train')

        val_dataset = smthg.Smthg(flow_type=opt.flow_type,
                                  rescale_flows=opt.rescale_flows,
                                  split='valid',
                                  use_flow=opt.use_flow)
    elif opt.dataset == 'smthgv2':
        dataset = smthgv2.SmthgV2(use_flow=opt.use_flow,
                                  split='train',
                                  rescale_flows=opt.rescale_flows)

        val_dataset = smthgv2.SmthgV2(split='valid',
                                      use_flow=opt.use_flow,
                                      rescale_flows=opt.rescale_flows)
    else:
        raise ValueError('the opt.dataset name provided {0} is not handled'
                         'by this script'.format(opt.dataset))
    action_dataset = ActionDataset(dataset,
                                   base_transform=base_transform,
                                   clip_size=opt.clip_size,
                                   transform=video_transform)
    val_action_dataset = ActionDataset(val_dataset,
                                       base_transform=base_transform,
                                       clip_size=opt.clip_size,
                                       transform=video_transform)

    # Initialize sampler
    if opt.weighted_training:
        weights = [1 / k for k in dataset.class_counts]
        sampler = torch.utils.data.sampler.WeightedRandomSampler(
            weights, len(action_dataset))
    else:
        sampler = torch.utils.data.sampler.RandomSampler(action_dataset)

    # Initialize dataloaders
    dataloader = torch.utils.data.DataLoader(action_dataset,
                                             sampler=sampler,
                                             batch_size=opt.batch_size,
                                             num_workers=opt.threads)

    val_dataloader = torch.utils.data.DataLoader(val_action_dataset,
                                                 shuffle=False,
                                                 batch_size=opt.batch_size,
                                                 num_workers=opt.threads)

    # Initialize C3D neural network
    if opt.network == 'c3d':
        c3dnet = c3d.C3D()
        if opt.pretrained:
            c3dnet.load_state_dict(torch.load('data/c3d.pickle'))
        model = c3d_adapt.C3DAdapt(opt,
                                   c3dnet,
                                   action_dataset.class_nb,
                                   in_channels=channel_nb)
    elif opt.network == 'i3d':
        if opt.use_flow:
            i3dnet = i3d.I3D(class_nb=400, modality='flow', dropout_rate=0.5)
            if opt.pretrained:
                i3dnet.load_state_dict(torch.load('data/i3d_flow.pth'))
            model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb)
        else:
            # Loads RGB weights and then adapts network
            i3dnet = i3d.I3D(class_nb=400, modality='rgb', dropout_rate=0.5)
            if opt.pretrained:
                i3dnet.load_state_dict(torch.load('data/i3d_rgb.pth'))
            model = i3d_adapt.I3DAdapt(opt,
                                       i3dnet,
                                       action_dataset.class_nb,
                                       in_channels=channel_nb)
    elif opt.network == 'i3dense':
        densenet = torchvision.models.densenet121(pretrained=True)
        i3densenet = i3dense.I3DenseNet(copy.deepcopy(densenet),
                                        opt.clip_size,
                                        inflate_block_convs=True)
        model = i3dense_adapt.I3DenseAdapt(opt,
                                           i3densenet,
                                           action_dataset.class_nb,
                                           channel_nb=channel_nb)
    elif opt.network == 'i3res':
        resnet = torchvision.models.resnet50(pretrained=True)
        i3resnet = i3res.I3ResNet(resnet, frame_nb=opt.clip_size)
        model = i3res_adapt.I3ResAdapt(opt,
                                       i3resnet,
                                       class_nb=action_dataset.class_nb,
                                       channel_nb=channel_nb)
    else:
        raise ValueError(
            'network should be in [i3res|i3dense|i3d|c3d but got {}]').format(
                opt.network)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.net.parameters(),
                                lr=opt.lr,
                                momentum=opt.momentum)

    model.set_criterion(criterion)
    model.set_optimizer(optimizer)
    if opt.plateau_scheduler and opt.continue_training:
        raise ValueError('Plateau scheduler and continue training '
                         'are incompatible for now')
    if opt.plateau_scheduler:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=opt.plateau_factor,
            patience=opt.plateau_patience,
            threshold=opt.plateau_thresh,
            threshold_mode='rel')
        model.set_lr_scheduler(scheduler)

    # Use multiple GPUS
    if opt.gpu_parallel:
        available_gpus = torch.cuda.device_count()
        device_ids = list(range(opt.gpu_nb))
        print('Using {} out of {} available GPUs'.format(
            len(device_ids), available_gpus))
        model.net = torch.nn.DataParallel(model.net, device_ids=device_ids)

    # Load existing weights, opt.continue_training is epoch to load
    if opt.continue_training:
        if opt.continue_epoch == 0:
            model.net.eval()
            model.load(latest=True)
        else:
            model.load(epoch=opt.continue_epoch)

        # New learning rate for SGD TODO add momentum update
        model.update_optimizer(lr=opt.lr, momentum=opt.momentum)

    train.train_net(dataloader,
                    model,
                    opt,
                    valid_dataloader=val_dataloader,
                    visualize=opt.visualize,
                    test_aggreg=opt.test_aggreg)
예제 #7
0
def test():
    step = 0

    # Data load
    transforms = video_transforms.Compose([
            video_transforms.Resize((args.isize, args.isize)),
            volume_transforms.ClipToTensor()
        ])
    dataset = MdfDataLoader(args.isize, args.nfr, args.test_data_path, transforms)
    dataloader = torch.utils.data.DataLoader(
                dataset=dataset,
                batch_size = args.batchsize,
                drop_last = True,
                shuffle=False,
                num_workers=8
            )
    
    model_list = [line.rstrip() for line in open(args.test_model_list_path)]

    with torch.no_grad():
        for m_i, m in enumerate(model_list):
            print("\n {}".format(m))
            model, name = load_model(m)
            
            save_root = os.path.join(SAVEROOT, "iamges", name)
            if not os.path.exists(save_root): os.makedirs(save_root)
            print(save_root)
 
            gts = []
            predicts = []
            pbar = tqdm(dataloader, leave=True, ncols=100, total=len(dataloader))

            for i, data in enumerate(pbar):

                input, real, gt, lb = (d.to('cuda') for d in data)
                predict = model(input)
                t_pre = threshold(predict)
                m_pre = morphology_proc(t_pre)

                gts.append(gt.permute(0,2,3,4,1).cpu().numpy())
                predicts.append(predict.permute(0,2,3,4,1).cpu().numpy())
                
                # -- SAVE IMAGE --
                """
                grid = torch.cat([normalize(input), normalize(real), gray2rgb(gt), gray2rgb(predict), gray2rgb(t_pre), gray2rgb(m_pre)], dim=3)
                for image in grid.permute(0,2,1,3,4):
                    save_image(image, os.path.join(save_root, "%06d.png" % (step)), nrow=args.nfr)
                    step += 1
                pbar.set_description("[TEST %d/%d]" % (m_i+1, len(model_list)))
                """
           
            gts = np.asarray(np.stack(gts), dtype=np.int32).flatten()
            predicts = np.asarray(np.stack(predicts)).flatten()
            if args.metric == 'roc':
               score = roc(gts, predicts, name)
            elif args.metric == 'pr':            
               score = pr(gts, predicts, name)
            f1 = evaluate(gts, predicts, metric='f1_score')
            print("%s / %s == %f" % (m, args.metric, score))
            print("%s / f1 == %f" % (m, f1))
        plt.savefig(os.path.join(SAVEROOT, "%s_curve.png" % (args.metric)))
예제 #8
0
파일: c3d_test.py 프로젝트: towei/hand-cnns
def run_testing(opt):
    scale_size = (256, 342)
    crop_size = (224, 224)
    if opt.use_heatmaps:
        channel_nb = opt.heatmap_nb
    elif opt.use_flow:
        channel_nb = 2
    else:
        channel_nb = 3

    # Initialize transforms
    if not opt.use_heatmaps:
        base_transform_list = [
            video_transforms.Scale(crop_size),
            volume_transforms.ClipToTensor(channel_nb=channel_nb)
        ]
        video_transform_list = [
            video_transforms.Scale(scale_size),
            video_transforms.RandomCrop(crop_size),
            volume_transforms.ClipToTensor(channel_nb=channel_nb)
        ]
    else:
        base_transform_list = [volume_transforms.ToTensor()]
        video_transform_list = [
            tensor_transforms.SpatialRandomCrop(crop_size),
            volume_transforms.ToTensor()
        ]
    base_transform = video_transforms.Compose(base_transform_list)
    video_transform = video_transforms.Compose(video_transform_list)

    if opt.dataset == 'smthg':
        dataset = smthg.Smthg(flow_type=opt.flow_type,
                              rescale_flows=opt.rescale_flows,
                              split=opt.split,
                              use_flow=opt.use_flow)
    elif opt.dataset == 'gteagazeplus':
        all_subjects = [
            'Ahmad', 'Alireza', 'Carlos', 'Rahul', 'Yin', 'Shaghayegh'
        ]
        train_seqs, valid_seqs = evaluation.leave_one_out(
            all_subjects, opt.leave_out)
        dataset = GTEAGazePlus(flow_type=opt.flow_type,
                               heatmaps=opt.use_heatmaps,
                               heatmap_size=crop_size,
                               rescale_flows=opt.rescale_flows,
                               seqs=valid_seqs,
                               use_flow=opt.use_flow)
    action_dataset = ActionDataset(dataset,
                                   base_transform=base_transform,
                                   clip_size=opt.clip_size,
                                   transform=video_transform,
                                   test=True)
    assert opt.batch_size == 1, 'During testing batch size should be 1 bug got {}'.format(
        opt.batch_size)
    val_dataloader = torch.utils.data.DataLoader(action_dataset,
                                                 shuffle=False,
                                                 batch_size=opt.batch_size,
                                                 num_workers=opt.threads)

    # Initialize C3D neural network
    if opt.network == 'c3d':
        c3dnet = c3d.C3D()
        model = c3d_adapt.C3DAdapt(opt,
                                   c3dnet,
                                   action_dataset.class_nb,
                                   in_channels=channel_nb)

    elif opt.network == 'i3d':
        if opt.use_flow:
            i3dnet = i3d.I3D(class_nb=400, modality='flow', dropout_rate=0.5)
            model = i3d_adapt.I3DAdapt(opt, i3dnet, action_dataset.class_nb)
        else:
            i3dnet = i3d.I3D(class_nb=400, modality='rgb', dropout_rate=0.5)
            model = i3d_adapt.I3DAdapt(opt,
                                       i3dnet,
                                       action_dataset.class_nb,
                                       in_channels=channel_nb)
    elif opt.network == 'i3dense':
        densenet = torchvision.models.densenet121(pretrained=True)
        i3densenet = i3dense.I3DenseNet(copy.deepcopy(densenet),
                                        opt.clip_size,
                                        inflate_block_convs=True)
        model = i3dense_adapt.I3DenseAdapt(opt,
                                           i3densenet,
                                           action_dataset.class_nb,
                                           channel_nb=channel_nb)
    elif opt.network == 'i3res':
        resnet = torchvision.models.resnet50(pretrained=True)
        i3resnet = i3res.I3ResNet(resnet, frame_nb=opt.clip_size)
        model = i3res_adapt.I3ResAdapt(opt,
                                       i3resnet,
                                       class_nb=action_dataset.class_nb,
                                       channel_nb=channel_nb)
    else:
        raise ValueError(
            'network should be in [i3res|i3dense|i3d|c3d but got {}]').format(
                opt.network)

    optimizer = torch.optim.SGD(model.net.parameters(), lr=1)

    model.set_optimizer(optimizer)

    # Use multiple GPUS
    if opt.gpu_parallel:
        available_gpus = torch.cuda.device_count()
        device_ids = list(range(opt.gpu_nb))
        print('Using {} out of {} available GPUs'.format(
            len(device_ids), available_gpus))
        model.net = torch.nn.DataParallel(model.net, device_ids=device_ids)

    # Load existing weights
    model.net.eval()
    if opt.use_gpu:
        model.net.cuda()
    model.load(load_path=opt.checkpoint_path)

    accuracy = test.test(val_dataloader, model, opt=opt, save_predictions=True)
    print('Computed accuracy: {}'.format(accuracy))