Пример #1
0
    def load_class_samples(self, d):
        if d['class'] not in self.data_cache:
            if d['class'] == '_silence_':
                samples = torch.zeros(self.silence_num_samples, 1,
                                      self.desired_samples)
                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'data'),
                        partial(self.mix_background, True, 'data'),
                        partial(self.extract_features, 'data')
                    ]))

            else:
                samples = []

                if d['class'] == '_unknown_':
                    unknown_dir = os.path.join(self.data_dir, '..',
                                               '_unknown_')
                    split = os.path.basename(self.class_file)
                    unknown_wavs = os.path.join(unknown_dir, split)
                    with open(unknown_wavs, 'r') as rf:
                        samples = [
                            os.path.join(unknown_dir, wav_file.strip('\n'))
                            for wav_file in rf.readlines()
                        ]
                else:
                    keyword_dir = os.path.join(self.data_dir, d['class'])
                    samples = glob.glob(os.path.join(keyword_dir, '*.wav'))

                if len(samples) == 0:
                    raise Exception(
                        "No Samples found for GoogleSpeechCommand {} at {}".
                        format(d['class'], keyword_dir))

                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'file_name'),
                        partial(self.load_audio, 'file_name', 'data'),
                        partial(self.adjust_volume, 'data'),
                        partial(self.shift_and_pad, 'data'),
                        partial(self.mix_background, self.use_background,
                                'data'),
                        partial(self.extract_features, 'data')
                    ]))

            loader = torch.utils.data.DataLoader(sample_ds,
                                                 batch_size=len(sample_ds),
                                                 shuffle=False)

            for sample in loader:
                self.data_cache[d['class']] = sample['data']
                break  # only need one sample because batch size equal to dataset length

        return {'class': d['class'], 'data': self.data_cache[d['class']]}
Пример #2
0
def create_cityscapes_datasource_train(opt):
    required_fpn_features = intersect(opt['features'], FPNfeatures)
    cityscapes = CityscapesDatasetAndFeatures(
        split='train',
        frame_ss=opt['frame_ss'],
        nSeq=opt['n_input_frames'] + opt['n_target_frames'],
        features=opt['features'],
        savedir=opt['save'],
        size=opt['nIt'] * opt['batchsize'])
    loaded_model = cityscapes.model

    def form_input_and_target_features(sample):
        for feat in required_fpn_features:
            sz = sample[feat].size()
            nI, nCPI = opt['n_input_frames'], opt['n_channels_per_input']
            nT, nCPT = opt['n_target_frames'], opt['n_channels_per_target']
            sample[u'input_features_' + feat] = sample[feat][0:nI, :, :, :]
            sample[u'input_features_' + feat] = \
                sample[u'input_features_' + feat].view((nI *nCPI, sz[2], sz[3]))
            sample[u'target_features_' + feat] = sample[feat][nI:, :, :, :]
            sample[u'target_features_' + feat] = \
                sample[u'target_features_' + feat].view((nT *nCPT, sz[2], sz[3]))

        return sample

    shuffled_cityscapes = ShuffleDataset(dataset=cityscapes)

    dataset = BatchDataset(  # batches
        dataset=TransformDataset(  # forms input and target features
            dataset=shuffled_cityscapes,
            transforms=form_input_and_target_features),
        batchsize=opt['batchsize'],
    )

    return dataset, required_fpn_features, loaded_model
Пример #3
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)
        #print(">>>" + image_dir)
        image_ds = TransformDataset(
            ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))
        #print(len(image_ds))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)
        for sample in loader:
            #print(sample)
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Пример #4
0
def load_class_images(d):
    if d['class'] not in IMAGENET_CACHE:
        image_dir = os.path.join(IMAGENET_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniImagenet class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            IMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': IMAGENET_CACHE[d['class']]}
Пример #5
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
		  # 获取路径
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character)
        # 获取指定路径下的所有图片
        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception("No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?".format(d['class'], image_dir))

         # ListDataset从图片列表中加载数据
        # 数据处理,包括旋转、创建字典、规范图片大小,转化为tensor
        image_ds = TransformDataset(ListDataset(class_images),
                                    compose([partial(convert_dict, 'file_name'),
                                             partial(load_image_path, 'file_name', 'data'),
                                             partial(rotate_image, 'data', float(rot[3:])),
                                             partial(scale_image, 'data', 28, 28),
                                             partial(convert_tensor, 'data')]))

			# 所有数据放到一个batch内
        loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False)
        
        # 取一个数据
        for sample in loader:
            # 将图片数据写入Omniglot_cache中
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break # only need one sample because batch size equal to dataset length
    
    # 返回类及类中的一个数据组成的字典
    return { 'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']] }
Пример #6
0
def load_class_images(d):
    if d['class'] not in CIFAR100_CACHE:
        image_dir = os.path.join(CIFAR100_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for CIFAR100 class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 32, 32),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.50400572, 0.48892908, 0.44281732),
                #                                   'std': (0.26477088, 0.25454896, 0.27408391)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            CIFAR100_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': CIFAR100_CACHE[d['class']]}
Пример #7
0
    def load(self, config, splits):
        split_dir = os.path.join(self.split_dir, config.data.split)

        ret = {}
        for split in splits:
            which = split in ['val', 'test']
            n_way = config.data.test_way if which and config.data.test_way != 0 else config.data.way
            n_support = config.data.test_shot if which and config.data.test_shot != 0 else config.data.shot
            n_query = config.data.test_query if which and config.data.test_query != 0 else config.data.query
            n_episodes = config.data.test_episodes if which else config.data.train_episodes

            class_names = self.read_class_names(split_dir=split_dir,
                                                split=split)
            dataset = TransformDataset(
                ListDataset(class_names),
                TransformCompose([
                    self.load_class_images,
                    TransformExtractEpisode(n_support=n_support,
                                            n_query=n_query)
                ]))
            if config.data.sequential:
                sampler = SequentialBatchSampler(len(dataset))
            else:
                sampler = EpisodicBatchSampler(len(dataset), n_way, n_episodes)

            ret[split] = torch.utils.data.DataLoader(dataset,
                                                     batch_sampler=sampler,
                                                     num_workers=0)
            pass
        return ret
Пример #8
0
def load_class_images(d):
    if d['class'] not in MINIIMAGENET_CACHE:
        image, classname = d['class'].split(',')
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'images')
        # Get all images with same class
        class_images = sorted(
            glob.glob(os.path.join(image_dir, '{}*'.format(classname))))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            print(sample['data'].shape)
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
Пример #9
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Пример #10
0
def load(opt, splits):
    split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        cache_path = get_cache_path(split)
        if os.path.exists(cache_path):
            with open(cache_path, "rb") as f:
                try:
                    data = pkl.load(f, encoding='bytes')
                    img_data = data[b'image_data']
                    class_dict = data[b'class_dict']
                except:
                    data = pkl.load(f)
                    img_data = data['image_data']
                    class_dict = data['class_dict']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, img_data, class_dict),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())
        class_names = [key for key in class_dict]
        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        class_index = defaultdict(list)
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            f.readline()
            for image_class in f.readlines():
                image, class_name = image_class.split(',')
                class_name = class_name.rstrip('\n')
                class_index[class_name].append(image)
        class_names = list(class_index.keys())

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, class_index),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Пример #12
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = { }
    for split in splits:
	      # 获取n_way
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']
        # 获取support的数量
        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']
        # 获取query的数量
        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']
        # 获取episode
        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']
        # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据
        transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容
                      load_class_images, # 取一个类中的一条数据
                      partial(extract_episode, n_support, n_query)] # 获取每个类的support和query

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        # 按照分割数据集的方式,获取相应的所有类名
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        
        # 对所有类划分support和query数据集
        ds = TransformDataset(ListDataset(class_names), transforms)
        
        
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        # 每个episode随机取n_way个类别
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)
        
        # 封装数据,数据划分为多个episode
        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Пример #13
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)
    print("Ret:", type(ret))
    for key, value in ret.items():
        print(key, type(value))
    return ret
Пример #14
0
 def get_data(self):
     """
     get dataset for current epoch.
     :return: Dataset:-> tuple(data,batch_index)
     """
     use_data_index = (self.delays <= 1).nonzero()[0]
     self.skip_data_index = (self.delays > 1).nonzero()[0]
     self.current_data_size = len(use_data_index)
     self.train_loss = np.zeros(len(self.dataset))
     self.train_acc = np.zeros(len(self.dataset))
     return TransformDataset(use_data_index, lambda i: (self.dataset[i], i))
Пример #15
0
def load(opt, splits):
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        speaker_ids = dataset[split]['class']
        data_split = dataset[split]['data']

        transforms = [
            partial(convert_dict, 'class'),
            partial(extract_episode, 'class', data_split, opt['data.min_len'],
                    opt['data.max_len'], n_support, n_query),
            partial(convert_tensor,
                    ['xq_padded', 'xs_padded', 'xq_len', 'xs_len'])
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(speaker_ids), transforms)

        #sampler = SequencialEpisodicBatchSampler(len(ds), n_way)
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Пример #16
0
    def __iter__(self):
        if self.dataset is None:
            self.dataset = self.load_dataset(from_disk=True)[self.split]
            transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')]
            if self.if_cuda:
                transforms.append(CudaTransform())
            self.transforms = compose(transforms)
        index_batches = self.shuffle_dataset()
        batches = TransformDataset(ListDataset(index_batches), self.transforms)

        print(f"\nSize of batches: {len(batches)}")
        for batch in batches:
            batch['n_way'] = self.n_way
            batch['n_support'] = self.n_support
            batch['n_query'] = self.n_query
            yield batch
def loader(opt):

    split_dir = os.path.join(opt.split_dir, opt.split_name)
    if opt.state == 'train':
        splits = opt.train_split_mode
    else:
        splits = ['test']

    ret = { }
    for split in splits:
        if split in ['val', 'test']:
            n_way = opt.test_way
            n_support = opt.test_shot
            n_query = opt.test_query
            n_episodes = opt.test_episodes
        else:
            n_way = opt.train_way
            n_support = opt.train_shot
            n_query = opt.train_query
            n_episodes = opt.train_episodes

        transforms = [partial(convert_dict, 'class'),
                      partial(load_class_images, opt.dataset_dir),
                      partial(extract_episode, n_support, n_query)]

        if opt.cuda:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        class_names = []

        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt.sequential:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Пример #18
0
def load_class_images(d):
    label, rot = d['class'], -1

    if 'rot' in d['class']:
        label, rot = d['class'].split('/rot')
        rot = int(rot)

    if label not in MINIIMAGENET_CACHE:
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', label)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniimagenet class {} at {}.".format(
                    label, image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.47234195 0.45386744 0.41036746),
                #                                   'std': (0.28678342 0.27806091 0.29304931)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[label] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    samples = MINIIMAGENET_CACHE[label]

    # Rotates images if needed
    if rot != -1:
        nRot = rot // 90
        samples = torch.rot90(samples.cuda(), nRot, dims=[2, 3]).cpu()

    return {'class': d['class'], 'data': samples}
Пример #19
0
def load_class_images(dataset, index_set, d):
    if d['class'] not in MINI_IMGNET_CACHE:
        image_ds = TransformDataset(
            ListDataset(index_set[d['class']]),
            compose([
                partial(convert_dict, 'img_idx'),
                partial(load_image, dataset, 'img_idx', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINI_IMGNET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINI_IMGNET_CACHE[d['class']]}
Пример #20
0
def _setup_class_omniglot(split, d, cache, init_entry, crop_transforms,
                          target_size, root_dir, augm_opt):
    alphabet, character, rot = d['class'].split('/')
    image_dir = os.path.join(root_dir, 'omniglot', 'data', alphabet, character)

    if augm_opt['rotation']:
        rotation_f = partial(utils.rotate_image, 'data', float(rot[3:]))
    else:
        rotation_f = partial(utils.nop)
        print(
            'WARNING - rotation augmentation is the default protocol for Omniglot'
        )

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
    else:
        crop_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            rotation_f,
            crop_f,
            partial(utils.scale_image, 'data', target_size, target_size),
            partial(utils.convert_tensor, 'data'),
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
Пример #21
0
def load_class_audio(split, d):
    class_audio = dataset[split][d['class']]

    if len(class_audio) == 0:
        raise Exception(f"No audio found for speaker {d['class']}")

    audio_ds = TransformDataset(
        ListDataset(class_audio),
        compose([
            partial(convert_dict, 'file_name'),
            partial(extract_audio_mfcc, d['class'], 'file_name', 'data')
        ]))

    loader = torch.utils.data.DataLoader(audio_ds,
                                         batch_size=len(audio_ds),
                                         shuffle=False)

    for sample in loader:
        data = sample
        break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': data}
Пример #22
0
def load_class_nlp(corpus, d):
    if d['class'] not in NLP_CACHE:
        class_corpus = list(filter(lambda x: x.domain == d["class"], corpus))

        image_ds = TransformDataset(
            ListDataset(class_corpus),
            compose([
                partial(convert_corpus, 'data'),
                partial(lookup, vocab, 'data'),
                partial(pad_text, 'data', 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            NLP_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': NLP_CACHE[d['class']]}
Пример #23
0
    def load_class_images(self, class_name):
        if class_name not in self.cache:
            alphabet, character, rot = class_name.split('/')
            image_dir = os.path.join(self.data_dir, 'data', alphabet,
                                     character)
            class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
            assert len(class_images) > 0

            image_ds = TransformDataset(
                ListDataset(class_images),
                TransformCompose([
                    TransformLoadImage(),
                    TransformRotateImage(rot=float(rot[3:])),
                    TransformScaleImage(height=28, width=28),
                    TransformConvertTensor()
                ]))
            for sample in torch.utils.data.DataLoader(image_ds,
                                                      batch_size=len(image_ds),
                                                      shuffle=False):
                self.cache[class_name] = sample
                break  # only need one sample because batch size equal to dataset length
            pass
        return self.cache[class_name]
Пример #24
0
def _setup_class_miniimagenet(split, d, cache, init_entry, crop_transforms,
                              target_size, root_dir, augm_opt):
    image_dir = os.path.join(root_dir, 'miniimagenet', 'data', d['class'])

    if augm_opt['rotation']:
        raise ValueError(
            'Augmentation with rotation not implemented for miniimagenet')

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
        scale_f = partial(utils.scale_image, 'data', target_size, target_size)
    else:
        crop_f = partial(utils.nop)
        scale_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.jpg')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            crop_f,
            scale_f,
            partial(utils.to_tensor, 'data'),
            # partial(utils.normalize_mini_image, 'data')
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
def load_class_images(class_index, d):
    if d['class'] not in MINIIMAGENET_CACHE:
        class_id = d['class']
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', class_id)

        class_images = [
            os.path.join(image_dir, 'images', img)
            for img in class_index[d['class']]
        ]

        if len(class_images) == 0:
            raise Exception("No images found for class %s." % d['class'])

        for image_path in class_images:
            if not os.path.exists(image_path):
                extract_images(class_index[d['class']], image_dir)
                break

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
Пример #26
0
def get_train_valid_loader(dataset,
                           batch_size,
                           train_transform=None,
                           valid_transform=None,
                           valid_size=None,
                           shuffle=True,
                           verbose=False,
                           num_workers=1,
                           pin_memory=False):
    """
    Utility function for loading and returning train and valid 
    multi-process iterators over any pytorch dataset. A sample 
    of the images can be optionally displayed.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - dataset: full dataset which contains training and validation data
    - batch_size: how many samples per batch to load. (train, val)
    - train_transform/valid_transform: callable function 
      applied to each sample of dataset. default: transforms.ToTensor().
    - valid_size: should be a integer in the range [1, len(dataset)].
    - shuffle: whether to shuffle the train/validation indices.
    - verbose: display the verbose information of dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be an integer in the range [1, %d]." % (
        len(dataset))
    if not valid_size:
        valid_size = int(0.1 * len(dataset))
    if not isinstance(valid_size,
                      int) or valid_size < 1 or valid_size > len(dataset):
        raise TypeError(error_msg)

    # define transform
    default_transform = lambda item: item  # identity maping
    train_transform = train_transform or default_transform
    valid_transform = valid_transform or default_transform

    # generate train/val datasets
    partitions = {'Train': len(dataset) - valid_size, 'Valid': valid_size}

    train_dataset = TransformDataset(
        SplitDataset(dataset, partitions, initial_partition='Train'),
        train_transform)

    valid_dataset = TransformDataset(
        SplitDataset(dataset, partitions, initial_partition='Valid'),
        valid_transform)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size[0],
                              shuffle=True,
                              num_workers=num_workers,
                              pin_memory=pin_memory)

    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size[1],
                              shuffle=False,
                              num_workers=num_workers,
                              pin_memory=pin_memory)

    return (train_loader, valid_loader)
Пример #27
0
def load_data(opt, splits):
    global root_dir
    root_dir = opt['data.root_dir']
    augm_opt = filter_opt(opt, 'augm')
    dataset = opt['data.dataset']
    split_dir = os.path.join(opt['data.root_dir'], opt['data.dataset'],
                             'splits', opt['data.split'])

    ret = {}
    # cache = {}
    cache = Cache()

    for split in splits:
        if split in ['val1', 'val5', 'test']:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['train', 'trainval']:
            # random shots
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val1':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=1,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val5':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=5,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        else:
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.test_shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)

        if split in ['val1', 'val5', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, split, dataset, cache, augm_opt),
            partial(extract_episode, SE, augm_opt)
        ]

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        split_file = 'val.txt' if split in ['val1', 'val5'
                                            ] else "{:s}.txt".format(split)
        with open(os.path.join(split_dir, split_file), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        sampler = EpisodicBatchSampler(SE, len(ds), n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Пример #28
0
def load_kws(opt, splits):
    #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split'])
    dataset_self = {}
    if splits[0] == 'test':
        files = sorted(os.listdir(KWS_DATA_DIR_TEST))
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(class_name):
                class_names.append(class_name)
        dataset_self['test'] = class_names
        data_dir = KWS_DATA_DIR_TEST
    else:
        data_dir = KWS_DATA_DIR
        files = sorted(os.listdir(KWS_DATA_DIR))
        val_class_names = [
            'label01', 'label13', 'label03', 'label13', 'label03', 'label13',
            'label03', 'label03'
        ]
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(
                    class_name) and not val_class_names.__contains__(
                        class_name):
                class_names.append(class_name)
        train_data = {}
        for name in class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            train_data[name] = name_files

        val_data = {}
        for name in val_class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            val_data[name] = name_files

        dataset_self['train'] = class_names
        dataset_self['val'] = val_class_names
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_features, data_dir),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(dataset_self[split]), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Пример #29
0
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            for class_name in f.readlines():
                name = class_name.split(',')[1].rstrip('\n')

                if name == 'label':
                    continue

                if opt['data.augmented']:
                    class_names.extend([
                        name + '/rot000', name + '/rot090', name + '/rot180',
                        name + '/rot270'
                    ])
                else:
                    class_names.append(name)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret