示例#1
0
    def __getitem__(self, idx):
        file_name, start_frame, back_address, label_path = self.file_list[idx]

        imgs = []
        idxes = []
        for i in range(self.interval + 1):
            if self.reverse:
                frame_num = (start_frame - i + 2 * self.interval)  ###backward
            else:
                frame_num = (start_frame + i)  ### forward

            img = default_loader(
                os.path.join(file_name + "{:06d}".format(frame_num) +
                             back_address))
            img = self.img_transform(img)
            imgs.append((img))
            idxes.append(frame_num)

        if (self.mode != 'test') and (self.mode != 'video'):
            label = default_loader(
                os.path.join(self.data_path + "gtFine", self.mode,
                             label_path.split("_")[0], label_path))
            #label = self.label_transform(label)
            #print(label.shape)
            label = self.encode_segmap(torch.tensor(np.array(label)[:, :, 0]))
            if self.bi_direction:
                return imgs, label, idxes
            return imgs, label
        else:
            return imgs, label_path
示例#2
0
    def __getitem__(self, idx):
        file_name, start_frame, back_address, label_path = self.file_list[idx]

        imgs = []
        idxes = []
        for i in range(self.interval+1):
            if self.reverse:
                frame_num = (start_frame-i+2*self.interval) ###backward
            else:
                frame_num = (start_frame+i) ### forward
            img = default_loader(   os.path.join
                                (self.data_path+self.train_prefix+file_name.split("_")[0],
                                file_name + "{:05d}".format(frame_num) + back_address))   
            
            img = np.array(img).astype(np.float32)
            img = img/255.0
            img = img - np.array([0.41189489566336, 0.4251328133025, 0.4326707089857])
            img = img / np.array([0.27413549931506, 0.28506257482912, 0.28284674400252])
            img = np.ascontiguousarray(img[ :, :, :],
                                          dtype=np.float32).transpose(2,0,1)
            img = torch.tensor(img)
            imgs.append((img))
            idxes.append(frame_num)

        if self.mode != 'test':
            label = default_loader(os.path.join
                                (self.data_path+self.eval_prefix, label_path)
                                )
            label = self.encode_segmap(torch.tensor(np.array(label)[:,:,0]))
            if self.bi_direction:
                return imgs, label, idxes
            return imgs, label
        else:
            return imgs, label_path
示例#3
0
    def __getitem__(self, idx):
        image = self.img_transform(default_loader(self.dataset[idx][0]))
        heatmap = self.heatmap_transform(default_loader(self.dataset[idx][1]))

        sample = (image, heatmap)

        return sample
示例#4
0
文件: celeb.py 项目: ada-shen/icCNN
 def __getitem__(self, idx):
     fn = self.imgs[idx]
     lbls = self.lbls[idx]
     if self.is_train:
         imgs = default_loader(self.dst_path + '/train/' + fn)
     else:
         imgs = default_loader(self.dst_path + '/test/' + fn)
     imgs = self.transform(imgs)
     lbls = torch.Tensor(lbls)
     return [imgs, lbls]
示例#5
0
    def classify(self, image: ImageType, text: str, image_tensor = None, zero_image=False, zero_text=False):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image
            zero_image: zero out the image features when classifying
            zero_text: zero out the text features when classifying
            return_type: either "prob" or "logits"

        Returns:
            {"label": 0, "confidence": 0.56}
        """
        sample = Sample()

        if image_tensor != None:
            sample.image = image_tensor
        else:
            


            if isinstance(image, str):
                if image.startswith("http"):
                    temp_file = tempfile.NamedTemporaryFile()
                    download(image, *os.path.split(temp_file.name), disable_tqdm=True)
                    image = tv_helpers.default_loader(temp_file.name)
                    temp_file.close()
                else:
                    image = tv_helpers.default_loader(image)

        
            image = self.processor_dict["image_processor"](image)
            sample.image = image

        text = self.processor_dict["text_processor"]({"text": text})


        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        sample_list = SampleList([sample])
        device = next(self.model.parameters()).device
        sample_list = sample_list.to(device)
        output = self.model(sample_list, zero_image=zero_image, zero_text=zero_text)
        scores = nn.functional.softmax(output["scores"], dim=1)

        if image_tensor != None:
            return scores

        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}
示例#6
0
    def __getitem__(self, index):
        siamese_target = np.random.randint(0, 2)
        img1, label1 = self.data[index], self.labels[index].item()
        # flag1, softlabel1 = self.flag[index], self.soft_label[index]
        if siamese_target == 1:
            siamese_index = index
            while siamese_index == index:
                siamese_index = np.random.choice(self.label_to_indices[label1])
        else:
            siamese_label = np.random.choice(
                list(self.labels_set - set([label1])))
            siamese_index = np.random.choice(
                self.label_to_indices[siamese_label])
        img2, label2 = self.data[siamese_index], self.labels[
            siamese_index].item()
        # flag2, softlabel2 = self.flag[siamese_index], self.soft_label[siamese_index]
        img1 = default_loader(img1)
        img2 = default_loader(img2)
        if self.transform is not None:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        # return (img1, img2), siamese_target, (int(label1), int(label2)), (flag1, flag2), (softlabel1, softlabel2)

        if self.class_name[index][:4] == self.class_name[siamese_index][:4] \
                and self.class_name[index][-4:] == self.class_name[siamese_index][-4:]:
            vf_labels11_12 = 1
            vf_labels11_21 = 1
            vf_labels22_12 = 1
            vf_labels22_21 = 1
            vf_labels12_21 = 1
        elif self.class_name[index][:4] == self.class_name[siamese_index][:4] \
                and self.class_name[index][-4:] != self.class_name[siamese_index][-4:]:
            vf_labels11_12 = 0
            vf_labels11_21 = 1
            vf_labels22_12 = 1
            vf_labels22_21 = 0
            vf_labels12_21 = 0
        elif self.class_name[index][:4] != self.class_name[siamese_index][:4] \
                and self.class_name[index][-4:] == self.class_name[siamese_index][-4:]:
            vf_labels11_12 = 1
            vf_labels11_21 = 0
            vf_labels22_12 = 0
            vf_labels22_21 = 1
            vf_labels12_21 = 0
        else:
            vf_labels11_12 = 0
            vf_labels11_21 = 0
            vf_labels22_12 = 0
            vf_labels22_21 = 0
            vf_labels12_21 = 0

        return (img1, img2), \
               (siamese_target, vf_labels11_12, vf_labels11_21, vf_labels22_12, vf_labels22_21, vf_labels12_21), \
               (int(label1), int(label2))
    def __init__(self):
        train_transform = transforms.Compose([
            transforms.Resize((384, 128), interpolation=3),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
            RandomErasing(probability=0.5, mean=[0.0, 0.0, 0.0])
        ])

        test_transform = transforms.Compose([
            transforms.Resize((384, 128), interpolation=3),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        self.trainset = Market1501(train_transform, 'train', opt.data_path)
        self.testset = Market1501(test_transform, 'test', opt.data_path)
        self.queryset = Market1501(test_transform, 'query', opt.data_path)

        self.train_loader = dataloader.DataLoader(
            self.trainset,
            sampler=RandomSampler(self.trainset,
                                  batch_id=opt.batchid,
                                  batch_image=opt.batchimage),
            batch_size=opt.batchid * opt.batchimage,
            num_workers=8,
            pin_memory=True)
        self.test_loader = dataloader.DataLoader(self.testset,
                                                 batch_size=opt.batchtest,
                                                 num_workers=8,
                                                 pin_memory=True)
        self.query_loader = dataloader.DataLoader(self.queryset,
                                                  batch_size=opt.batchtest,
                                                  num_workers=8,
                                                  pin_memory=True)

        if opt.mode == 'vis':
            self.query_image = test_transform(
                default_loader(
                    os.path.join(self.queryset.data_path, opt.query_image)))

        if opt.mode == 'compare':
            self.compare_img_a = test_transform(
                default_loader(opt.compare_img_a))
            self.compare_img_b = test_transform(
                default_loader(opt.compare_img_b))

            self.query_image = test_transform(
                default_loader("cache/query.jpg"))
            self.compare_img_b = test_transform(
                default_loader(opt.compare_img_b))
示例#8
0
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.rgb_img_seq[idx])
        rgb_path = os.path.join(self.rgb_dir, self.rgb_img_seq[idx])
        img_Image = default_loader(img_path)
        rgb_Image = default_loader(rgb_path)
        if self.transform:
            img_Image = self.transform(img_Image)
            rgb_Image = self.transform(rgb_Image)

        return rgb_Image, img_Image, int(
            self.rgb_img_seq[idx].split('_')[-1][:-4]), str(
                self.rgb_img_seq[idx][:-4])
示例#9
0
    def __getitem__(self, idx):
        if self.preloaded:
            img1 = self.images[self.pairs[idx][0]]
            img2 = self.images[self.pairs[idx][1]]
        else:
            img1 = default_loader(self.pairs[idx][0])
            img2 = default_loader(self.pairs[idx][1])

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return [img1, img2], self.issame[idx]
示例#10
0
    def __getitem__(self, index):
        img_name = self.img_names[index]
        mask_name = self.mask_names[index]
        img = default_loader(os.path.join(self.dir_images, img_name))
        mask = default_loader(os.path.join(self.dir_masks, mask_name))

        assert img.size == mask.size

        if not self.transform:
            return img, mask

        img, mask = self.apply_transform(img, mask)
        return img, mask
示例#11
0
 def get_raw_image(self, index, bbox=False):
     vid_id, act_id, frame_id = self.frames[index]
     participant_id = vid_id.split('_')[0]
     img_path = os.path.join(self.img_root, participant_id, vid_id,
                             f'frame_{frame_id:010d}.jpg')
     # To use high resolution images, the videos need to be downloaded first
     img = default_loader(
         img_path)  # this loads a smaller version of the image
     if bbox:
         img_bboxes = []
         objects = self.objects[vid_id][frame_id]
         orig_w, orig_h = self.video_info[vid_id]['res']
         img_w, img_h = img.size
         for obj in objects:
             for t, l, h, w in obj['bbox']:
                 h_scale = img_h / orig_h
                 w_scale = img_w / orig_w
                 t *= h_scale
                 h *= h_scale
                 l *= w_scale
                 w *= w_scale
                 if h < 10 or w < 10:
                     continue  # too thin or narrow? do not add bbox
                 bbox = [int(l), int(t), int(l + w), int(t + h)]
                 img_bboxes.append(img.crop(bbox))
         return img, img_bboxes
     return img
示例#12
0
 def __getitem__(self, idx):
     img = default_loader(self.samples[idx])
     label = self.img_label[idx]
     # The index_channel is used to shuffle channels of the original image
     index_channel = [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1],
                      [2, 1, 0]]
     p_index1 = 0.9
     if np.random.random() < p_index1:
         index1 = 0
     else:
         index1 = np.random.randint(self.domain_num)
     index2 = np.random.randint(self.domain_num)
     while index2 == index1:
         index2 = np.random.randint(self.domain_num)
     img_3channel = img.split()
     img1 = Image.merge('RGB', (img_3channel[index_channel[index1][0]],
                                img_3channel[index_channel[index1][1]],
                                img_3channel[index_channel[index1][2]]))
     img2 = Image.merge('RGB', (img_3channel[index_channel[index2][0]],
                                img_3channel[index_channel[index2][1]],
                                img_3channel[index_channel[index2][2]]))
     if self.transform is not None:
         img1 = self.transform(img1)
     if self.transform is not None:
         img2 = self.transform(img2)
     label1 = self.class_num * index1 + label
     label2 = self.class_num * index2 + label
     # The below operation can produce data with more diversity
     if np.random.randint(2) == 0:
         return img1, img2, label1, label2
     else:
         return img2, img1, label2, label1
def img_loader(args):
    test_transform = transforms.Compose([
        Resize((args.height, args.width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


    dataloaderX = defaultdict(list)

    # get path
    data_path = args.datadir
    imgs_path = []
    for i in os.walk(data_path):
        for j in i[2]:
            if os.path.splitext(j)[-1] == '.jpg' or os.path.splitext(j)[-1] == '.png':
                imgs_path.append(os.path.join(i[0], j))

    # img process
    imgs = []
    for i in imgs_path:
        img = default_loader(i)
        imgs.append(test_transform(img).unsqueeze_(0))

    # dataloaderX
    for num, path in enumerate(imgs_path):
        # the element is : {class1: [img ,label], class2: [img ,label]}
        dataloaderX[path.split('/')[-2]].append([imgs[num],torch.tensor([int(path.split('/')[-1].split('_')[0])])])
    print('[INFO]Total {} pairs of img...'.format(len(dataloaderX)))

    return dataloaderX
示例#14
0
    def __getimgs_bylabel__(self, label, img_num):
        if len(self.label_to_indices[label]) >= img_num:
            index = np.random.choice(self.label_to_indices[label],
                                     size=img_num,
                                     replace=False)
        else:
            index1 = np.random.choice(self.label_to_indices[label],
                                      size=len(self.label_to_indices[label]),
                                      replace=False)
            index2 = np.random.choice(self.label_to_indices[label],
                                      size=img_num -
                                      len(self.label_to_indices[label]),
                                      replace=True)
            index = np.concatenate((index1, index2))
        for i in range(img_num):
            img_temp = (self.data[index[i]])
            label_temp = (self.labels[index[i]])
            if type(label_temp) not in (tuple, list):
                label_temp = (label_temp, )
            label_temp = torch.LongTensor(label_temp)
            img_temp = default_loader(img_temp)
            if self.transform is not None:
                img_temp = self.transform(img_temp)
                img_temp = img_temp.unsqueeze(0)
            if i == 0:
                img = img_temp
                label = label_temp
            else:
                img = torch.cat((img, img_temp), 0)
                label = torch.cat((label, label_temp), 0)

        return img, label
示例#15
0
    def __getitem__(self, index: Tuple[Tuple[int, int], int]):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        """
        (img_idx, align_idx), audio_idx = index

        path, target = self._samples[img_idx]
        vid = default_loader(f"{self.root}/{path}")

        if self.should_align_faces:
            relative_bb = self.relative_bbs[align_idx]
            vid = self.align_face(vid, relative_bb)

        if self.transform is not None:
            vid = self.transform(vid)

        if self.target_transform is not None:
            target = self.target_transform(target)

        if self.should_sample_audio:
            if self.audio_mode == AudioMode.FAKE_NOISE_DIFFERENT_VIDEO or (
                self.audio_mode == AudioMode.MANIPULATION_METHOD_DIFFERENT_VIDEO
                and target
                == 4  # MANIPULATION_METHOD_DIFFERENT_VIDEO means we select different audio for manipulation vidoes
            ):
                aud_path, _ = self._samples[img_idx]
            else:
                aud_path, _ = self._samples[audio_idx]
            aud = self.audio_file_list(aud_path, stacked=True)
            aud: np.ndarray

            # this adds gaussian noise to audio input if it's supposed to be fake input
            if (
                self.audio_mode == AudioMode.FAKE_NOISE_DIFFERENT_VIDEO
                and audio_idx != img_idx
            ) or (self.audio_mode == AudioMode.FAKE_NOISE and target != 4):
                aud += np.random.normal(0, 1, aud.shape).astype(aud.dtype)

            sample = vid, aud

            # we have to do this because noisynets use the audio label for classification
            if self.audio_mode == AudioMode.MANIPULATION_METHOD_DIFFERENT_VIDEO:
                if target == 4:
                    audio_idx = img_idx
                else:
                    audio_idx = (
                        -1
                    )  # this is nessecary for the case of wanting exact audio,
                    # but using audio targets for training not class targets

            # this indicates if the audio and the images are in sync
            target = (target, int(audio_idx == img_idx))
        else:
            sample = vid

        return sample, target
示例#16
0
    def __getitem__(self, i):
        fp = os.path.join(self.data_path, self.file_list[i])
        img = default_loader(fp)
        img = self.transform(img)
        target = 0  # unsupervised data

        return img, target
示例#17
0
def encode_proc(model_path, model_config_path, img_root_path,
                img_key_path_list, img_size, device, output_path):
    model_config_json = open(model_config_path).read()
    print("ModelConfig:", model_config_json, file=sys.stderr, flush=True)
    model_config = VqvaeConfig.from_json(model_config_json)
    model = VQVAE(model_config).to(device)
    if device.type == "cuda":
        torch.cuda.set_device(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    transforms = build_transform(img_size)

    output_fp = open(output_path, "w")
    linecnt = 0
    for f in img_key_path_list:
        for line in open(f):
            linecnt += 1
            if linecnt % 100000 == 0:
                print("{} {} done".format(f, linecnt),
                      file=sys.stderr,
                      flush=True)
            img_key = line.strip()
            img_path = get_key_path(img_root_path, line.strip())
            try:
                img = default_loader(img_path)
            except:
                continue
            img = transforms(img)[None].to(device)
            id_t = model(img)[2].detach().cpu().flatten(1)
            print("{}\t{}".format(img_key, ",".join(
                (str(x) for x in id_t[0].tolist()))),
                  file=output_fp,
                  flush=True)
    output_fp.close()
示例#18
0
 def __getitem__(self, index):
     path = self.paths[index]
     image = default_loader(path)
     if self.transform is not None:
         image = self.transform(image)
     # Add a bogus label to be compatible with standard image datasets.
     return image, torch.tensor([0.])
示例#19
0
    def __getitem__(self, index):
        img, img_name, label = default_loader(
            self.imgs[index]), self.imgs[index], self.labels[index]
        if self.transform is not None:
            img = self.transform(img)

        return img, label, img_name
示例#20
0
    def loader(self, path, to_gray=False):
        img = default_loader(path)

        if to_gray:
            img = F.to_grayscale(img)

        return img
 def __iter__(self):
     worker_info = torch.utils.data.get_worker_info()
     if worker_info is not None:
         num_workers = worker_info.num_workers
         worker_id = worker_info.id
     else:
         num_workers = 1
         worker_id = 0
     pic_size = int(math.ceil(len(self.img_keys_file_list) / num_workers))
     file_list = self.img_keys_file_list[pic_size *
                                         worker_id:pic_size * worker_id +
                                         pic_size]
     self.rand.shuffle(file_list)
     for f in file_list:
         for line in open(f):
             img_key = line.strip()
             img_path = get_key_path(self.img_root_path, img_key)
             try:
                 img = default_loader(img_path)
             except:
                 continue
             if self.with_key:
                 yield img_key, self.transform(img)
             else:
                 yield self.transform(img)
    def __getitem__(self, item):
        path = self.paths[item]
        label = self.labels[item]

        if label:  # adversarial
            image = torch.from_numpy(np.load(path)['img'])
            if self.adv_transform:
                image = self.adv_transform(image)
        else:
            image = default_loader(path)
            if self.orig_transform:
                image = self.orig_transform(image)

        # path, image, label
        ret = [None, image, None]

        if self.return_paths:
            ret[0] = path

        if self.return_label:
            ret[2] = label

        ret = [r for r in ret if r is not None]
        ret = ret[0] if len(ret) == 1 else ret

        return ret
    def compare(self, query_image_path, input_image_path):
        # self.model.eval()

        # Extract feature
        print('extract features, this may take a few time')

        # query_image = self.test_transform(default_loader(query_image_path))
        input_image = self.test_transform(default_loader(input_image_path))

        # feature_a = extract_feature(self.model, tqdm([(torch.unsqueeze(query_image, 0), 1)]))
        feature_b = extract_feature(
            self.model, tqdm([(torch.unsqueeze(input_image, 0), 1)]))

        # sort images
        # feature_a = feature_a.view(-1, 1)
        feature_b = feature_b.view(-1, 1)

        # print(feature_b)

        # print(self.feature_a.size())

        score = torch.mm(self.feature_a, feature_b)
        score = score.squeeze(1).cpu()
        score = score.numpy()

        return score
示例#24
0
def infer(model, image_list, isFlip=True):
    device = torch.device('cpu' if args.cpu else 'cuda')
    test_transform = transforms.Compose([
        transforms.Resize((args.height, args.width), interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    inputlist = [
        test_transform(default_loader(imgpath)) for imgpath in image_list
    ]
    inputs = torch.stack(inputlist, dim=0)
    ff = torch.FloatTensor(inputs.size(0), 2048).zero_()
    num = 2 if isFlip else 1
    for i in range(num):
        if i == 1:
            inputs = fliphor(inputs)
        input_img = inputs.to(device)
        outputs = model(input_img)
        f = outputs[0].data.cpu()
        ff = ff + f
    fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
    ff = ff.div(fnorm.expand_as(ff))

    return ff
示例#25
0
def spectrogram_loader(filepath: str):
    if has_file_allowed_extension(filepath, IMG_EXTENSIONS):
        img = default_loader(filepath)
        data = np.array(img)
    else:
        data = np.load(filepath)
    return data
示例#26
0
def get_res_feature(frame_dir, feature_dir):

    # build resnet class
    resnet = ResNet(224)

    for sub_frame_dir in sorted(frame_dir.glob('*/')):
        video_feature = np.array([])
        for frame_filename in sorted(sub_frame_dir.glob(
                '*.jpg')):  # for each frame image in dir_path
            frame = default_loader(str(frame_filename))
            print(frame_filename)
            # extract ResNet feature
            res_conv5, res_pool5 = resnet(frame)
            # gpu variable -> cpu variable -> tensor -> numpy array -> 1D array
            frame_feature = res_pool5.cpu().data.numpy().flatten()
            if video_feature.size == 0:
                video_feature = np.hstack((video_feature, frame_feature))
            else:
                video_feature = np.vstack((video_feature, frame_feature))
            print(video_feature.shape)

        if not os.path.exists(str(feature_dir)):
            os.makedirs(str(feature_dir))
        h5_filename = str(feature_dir) + '/' + (
            str(sub_frame_dir).split('/'))[-1] + '.h5'
        h5file = h5py.File(h5_filename, 'w')
        h5file.create_dataset('pool5', data=video_feature)
        h5file.close()
示例#27
0
    def __getitem__(self, idx):

        temp = self.samples[idx]  # folder_files
        # print(temp)
        if self.img_flag[idx] == 1:
            foldername = 'gen_0000'
            filename = temp[9:]
        else:
            foldername = temp[:4]
            filename = temp[5:]
        img = default_loader(self.image_dir + '/' + foldername + '/' +
                             filename)
        if self.train_val == 'train_new':
            result = {
                'img': data_transforms['train'](img),
                'label': self.img_label[idx],
                'flag': self.img_flag[idx]
            }  # flag=0 for ture data and 0 for generated data
        else:
            result = {
                'img': data_transforms['val'](img),
                'label': self.img_label[idx],
                'flag': self.img_flag[idx]
            }
        return result
示例#28
0
 def iterate(self):
     for file in [
             f for f in listdir(self.image_dir)
             if isfile(join(self.image_dir, f))
     ]:
         yield file, self.img_transform(
             default_loader(join(self.image_dir, file)))
示例#29
0
 def __getitem__(self, index):
     image_dir = glob.glob(self.files_A[index] + '*.*')
     car_side = np.array(random.randint(0, 4))
     car_id = find_car_id(self.files_A[index])
     return self.transform(
         blank_extention(default_loader(
             image_dir[car_side]))), car_id, car_side
示例#30
0
    def __getitem__(self, index):
        if self.preprocessed:
            video_data = self.video_features[self.video_list[index]]
            video_data = torch.Tensor(video_data).cuda()
            print(video_data.size())
            return video_data, self.video_list[index]
            # imgs = []
            # for image_name in video_group.keys():
            #     image_data = video_group[image_name]
            #     imgs.append(torch.Tensor(image_data))
            # return torch.stack(imgs), self.video_list[index]
            # image_path = self.video_list[index]
            # print("image", image_path)
            # with h5py.File(image_path, 'r') as f:
            #     if self.with_name:
            #         return torch.Tensor(np.array(f['pool5'])), image_path.name[:-5]
            #     else:
            #         return torch.Tensor(np.array(f['pool5']))

        else:
            images = []
            print("here")
            count = 0
            for img_path in Path(self.video_list[index]).glob('*.jpg'):
                img = default_loader(img_path)
                img_tensor = self.transform(img)
                images.append(img_tensor)
                count += 1
                if count == 256:
                    break
            print(images[0].size())
            return torch.stack(images), img_path.parent.name[4:]
示例#31
0
def read_image_for_pytorch(image_file_name):
    img = default_loader(image_file_name)

    # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
    if img.mode == 'YCbCr':
        nchannel = 3
    else:
        nchannel = len(img.mode)

    # convert to numpy array
    img = np.array(img.getdata()).reshape(img.size[1], img.size[0], nchannel)

    # permute dimensions
    img = np.transpose(img, (2, 0, 1)).copy()
    return img