def __getitem__(self, idx): if len(self.video_clips) == 0: self.load_df() video, audio, info = self.video_clips.get_clip(idx) text = self.text_list[idx] actual_idx = self.ids_list[idx] label = [ self.class_to_idx[class_name] for class_name in self.labels[idx] ] one_hot_label = torch.zeros(len(self.class_to_idx)) one_hot_label[label] = 1 if self.video_processor is not None: video = self.video_processor(video) if self.audio_processor is not None: audio = self.audio_processor(audio) sample = Sample() sample.id = object_to_byte_tensor(actual_idx) sample.video = video sample.audio = audio sample.update(self.text_processor({"text": text})) sample.targets = one_hot_label return sample
def create_sample(video_transfomred, audio_transfomred, text_tensor, video_label): label = [self.class_to_idx[l] for l in video_label] one_hot_label = torch.zeros(len(self.class_to_idx)) one_hot_label[label] = 1 current_sample = Sample() current_sample.video = video_transfomred current_sample.audio = audio_transfomred current_sample.update(text_tensor) current_sample.targets = one_hot_label current_sample.dataset_type = 'test' current_sample.dataset_name = 'charades' return SampleList([current_sample]).to(self.device)