示例#1
0
 def __init__(self, data_dir, class_file, n_support, n_query, cuda, args):
     self.sample_rate = args['sample_rate']
     self.clip_duration_ms = args['clip_duration']
     self.window_size_ms = args['window_size']
     self.window_stride_ms = args['window_stride']
     self.feature_bin_count = args['num_features']
     self.foreground_volume = args['foreground_volume']
     self.time_shift_ms = args['time_shift']
     self.use_background = args['include_background']
     self.background_volume = args['bg_volume']
     self.background_frequency = args['bg_frequency']
     self.desired_samples = int(self.sample_rate * self.clip_duration_ms /
                                1000)
     self.silence = args['include_silence']
     self.silence_num_samples = args['num_silence']
     self.unknown = args['include_unknown']
     self.data_cache = {}
     self.data_dir = data_dir
     self.class_file = class_file
     self.n_support = n_support
     self.n_query = n_query
     self.background_data = self.load_background_data()
     self.mfcc = self.build_mfcc_extractor()
     self.transforms = [
         partial(convert_dict, 'class'), self.load_class_samples,
         self.extract_episode
     ]
     if cuda:
         self.transforms.append(CudaTransform())
     self.class_names = self.read()
     transforms = compose(self.transforms)
     super().__init__(ListDataset(self.class_names), transforms)
示例#2
0
def load(opt, splits):
    split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        cache_path = get_cache_path(split)
        if os.path.exists(cache_path):
            with open(cache_path, "rb") as f:
                try:
                    data = pkl.load(f, encoding='bytes')
                    img_data = data[b'image_data']
                    class_dict = data[b'class_dict']
                except:
                    data = pkl.load(f)
                    img_data = data['image_data']
                    class_dict = data['class_dict']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, img_data, class_dict),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())
        class_names = [key for key in class_dict]
        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        class_index = defaultdict(list)
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            f.readline()
            for image_class in f.readlines():
                image, class_name = image_class.split(',')
                class_name = class_name.rstrip('\n')
                class_index[class_name].append(image)
        class_names = list(class_index.keys())

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, class_index),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
示例#4
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = { }
    for split in splits:
	      # 获取n_way
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']
        # 获取support的数量
        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']
        # 获取query的数量
        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']
        # 获取episode
        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']
        # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据
        transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容
                      load_class_images, # 取一个类中的一条数据
                      partial(extract_episode, n_support, n_query)] # 获取每个类的support和query

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        # 按照分割数据集的方式,获取相应的所有类名
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        
        # 对所有类划分support和query数据集
        ds = TransformDataset(ListDataset(class_names), transforms)
        
        
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        # 每个episode随机取n_way个类别
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)
        
        # 封装数据,数据划分为多个episode
        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
示例#5
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)
    print("Ret:", type(ret))
    for key, value in ret.items():
        print(key, type(value))
    return ret
示例#6
0
def load(opt, splits):
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        speaker_ids = dataset[split]['class']
        data_split = dataset[split]['data']

        transforms = [
            partial(convert_dict, 'class'),
            partial(extract_episode, 'class', data_split, opt['data.min_len'],
                    opt['data.max_len'], n_support, n_query),
            partial(convert_tensor,
                    ['xq_padded', 'xs_padded', 'xq_len', 'xs_len'])
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(speaker_ids), transforms)

        #sampler = SequencialEpisodicBatchSampler(len(ds), n_way)
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
示例#7
0
    def __iter__(self):
        if self.dataset is None:
            self.dataset = self.load_dataset(from_disk=True)[self.split]
            transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')]
            if self.if_cuda:
                transforms.append(CudaTransform())
            self.transforms = compose(transforms)
        index_batches = self.shuffle_dataset()
        batches = TransformDataset(ListDataset(index_batches), self.transforms)

        print(f"\nSize of batches: {len(batches)}")
        for batch in batches:
            batch['n_way'] = self.n_way
            batch['n_support'] = self.n_support
            batch['n_query'] = self.n_query
            yield batch
示例#8
0
def load_kws(opt, splits):
    #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split'])
    dataset_self = {}
    if splits[0] == 'test':
        files = sorted(os.listdir(KWS_DATA_DIR_TEST))
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(class_name):
                class_names.append(class_name)
        dataset_self['test'] = class_names
        data_dir = KWS_DATA_DIR_TEST
    else:
        data_dir = KWS_DATA_DIR
        files = sorted(os.listdir(KWS_DATA_DIR))
        val_class_names = [
            'label01', 'label13', 'label03', 'label13', 'label03', 'label13',
            'label03', 'label03'
        ]
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(
                    class_name) and not val_class_names.__contains__(
                        class_name):
                class_names.append(class_name)
        train_data = {}
        for name in class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            train_data[name] = name_files

        val_data = {}
        for name in val_class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            val_data[name] = name_files

        dataset_self['train'] = class_names
        dataset_self['val'] = val_class_names
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_features, data_dir),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(dataset_self[split]), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
示例#9
0
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            for class_name in f.readlines():
                name = class_name.split(',')[1].rstrip('\n')

                if name == 'label':
                    continue

                if opt['data.augmented']:
                    class_names.extend([
                        name + '/rot000', name + '/rot090', name + '/rot180',
                        name + '/rot270'
                    ])
                else:
                    class_names.append(name)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret