Exemplo n.º 1
0
    def __init__(self, dataset_folder, spt_path,  boxes_file, vid2idx, mode='train',get_loader=get_default_video_loader,
                 sample_size=112,  classes_idx=None):

        self.dataset_folder = dataset_folder
        self.sample_size = sample_size
        self.boxes_file = boxes_file
        self.vid2idx = vid2idx
        self.mode = mode
        self.data, self.max_frames, self.max_actions = make_dataset_names( dataset_folder, spt_path, boxes_file, mode)
        self.loader = get_loader()
        self.classes_idx = classes_idx
        # mean = [112.07945832, 112.87372333, 106.90993363]  # ucf-101 24 classes
        mean = [103.29825354, 104.63845484,  90.79830328]  # jhmdb from .png
        spatial_transform = Compose([Scale(sample_size),  # [Resize(sample_size),
                                     ToTensor(),
                                     Normalize(mean, [1, 1, 1])])
        self.spatial_transform=spatial_transform
    split_txt_path = os.path.abspath(
        os.path.join(root_path, dataset_cfg.dataset.split_txt_path))

    ### get videos id
    actions = dataset_cfg.dataset.classes
    cls2idx = {actions[i]: i for i in range(0, len(actions))}
    vid2idx, vid_names = get_vid_dict(dataset_frames)

    # # get mean
    # mean = [112.07945832, 112.87372333, 106.90993363]  # ucf-101 24 classes
    mean = [0.5, 0.5, 0.5]
    std = [0.5, 0.5, 0.5]

    spatial_transform = Compose([
        Scale(sample_size),  # [Resize(sample_size),
        ToTensor(),
        Normalize(mean, std)
    ])
    temporal_transform = LoopPadding(sample_duration)

    n_classes = len(actions)

    #######################################################
    #          Part 1-1 - train nTPN - without reg         #
    #######################################################

    print(' -----------------------------------------------------')
    print('|          Part 1-1 - train TPN - without reg         |')
    print(' -----------------------------------------------------')

    ## Define Dataloaders
    cls2idx = { classes[i] : i for i in range(0, len(classes)) }


    dataset_folder = '/gpu-data2/sgal/JHMDB-act-detector-frames'
    splt_txt_path =  '/gpu-data2/sgal/splits'
    boxes_file = '/gpu-data2/sgal/poses.json'
    sample_size = 112
    sample_duration = 16 #len(images)

    batch_size = 1
    n_threads = 0
    
    mean = [103.29825354, 104.63845484,  90.79830328] # jhmdb from .png

    spatial_transform = Compose([Scale(sample_size), # [Resize(sample_size),
                                 # CenterCrop(sample_size),
                                 ToTensor(),
                                 Normalize(mean, [1, 1, 1])])
    temporal_transform = LoopPadding(sample_duration)

    data = Video(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform, json_file = boxes_file,
                 split_txt_path=splt_txt_path, mode='train', classes_idx=cls2idx)
    data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size,
                                              shuffle=False, num_workers=n_threads, pin_memory=True)
    # clips, (h,w), gt_tubes, gt_bboxes, n_actions, n_frames = next(data_loader.__iter__())
    # for i in data:
    #     clips, (h,w), gt_tubes, gt_bboxes, n_actions, n_frames = i
    #     # print('gt_bboxes.shape :',gt_bboxes)
    #     # print('gt_bboxes.shape :',gt_bboxes.shape)
    #     # print('gt_tubes :',gt_tubes)
    #     # print('clips.shape :',clips.shape)
    sample_size = 112
    sample_duration = 16  # len(images)
    batch_size = 1
    n_threads = 0

    # # get mean
    # mean =  [103.75581543 104.79421473  91.16894564] # jhmdb
    mean = [103.29825354, 104.63845484, 90.79830328]  # jhmdb from .png

    # generate model
    last_fc = False

    scale_size = [sample_size, sample_size]
    spatial_transform = Compose([
        Scale(sample_size),  # [Resize(sample_size),
        ToTensor()
    ])
    temporal_transform = LoopPadding(sample_duration)

    spatial_transform2 = Compose([  # [Resize(sample_size),
        ToTensor()
    ])

    ## UCF code
    dataset_folder = '/gpu-data/sgal/UCF-101-frames'
    boxes_file = './pyannot.pkl'
    actions = [
        'Basketball', 'BasketballDunk', 'Biking', 'CliffDiving',
        'CricketBowling', 'Diving', 'Fencing', 'FloorGymnastics', 'GolfSwing',
        'HorseRiding', 'IceDancing', 'LongJump', 'PoleVault', 'RopeClimbing',
        'SalsaSpin', 'SkateBoarding', 'Skiing', 'Skijet', 'SoccerJuggling',