def _load_dataset(path, impl, dict): if impl == 'raw': raise NotImplementedError("No such {} dataset implementation.".format(impl)) elif impl == 'mmap': dataset = indexed_dataset.MMapIndexedDataset(path=path) else: raise NotImplementedError("No such {} dataset implementation.".format(impl)) return dataset
def _load_dataset(path, impl, dict): if impl == 'mmap': # mmap dataset has been numberized, no need for dict src_dataset = indexed_dataset.MMapIndexedDataset(path=path) else: raise NotImplementedError( "No such {} dataset implementation.".format(impl)) return src_dataset
def _load_dataset(paths, impl, dict=None): if impl == 'raw': raise NotImplementedError(impl) elif impl == 'mmap': # mmap dataset has been numberized, no need for dict dataset = [indexed_dataset.MMapIndexedDataset(path=path) for path in paths] else: raise NotImplementedError("No such {} dataset implementation.".format(impl)) return dataset
def load_lang_dataset_denoising(path, impl, dict): if impl == 'raw': src_dataset = IndexedRawTextDataset(path=path, dictionary=dict) elif impl == 'mmap': # mmap dataset has been numberized, no need for dict src_dataset = indexed_dataset.MMapIndexedDataset(path=path) else: raise NotImplementedError( "No such {} dataset implementation.".format(impl)) return src_dataset
def _load_dataset(paths, impl, dict=None, sample_portion=None): if impl == 'raw': raise NotImplementedError(impl) elif impl == 'mmap': # mmap dataset has been numberized, no need for dict prev_paths, cur_path = paths[:-1], paths[-1] dataset = [indexed_dataset.MMapIndexedDataset(path=cur_path)] if sample_portion is not None and len(prev_paths) > 0: sample_size_per_task = int( len(dataset[0]) * sample_portion // len(prev_paths)) for p_path in prev_paths: p_dataset = indexed_dataset.MMapIndexedDataset(path=p_path) dataset.append( SliceDataset(p_dataset, end=sample_size_per_task)) else: for p_path in prev_paths: p_dataset = indexed_dataset.MMapIndexedDataset(path=p_path) dataset.append(p_dataset) else: raise NotImplementedError( "No such {} dataset implementation.".format(impl)) return dataset
def _load_dataset(path, impl, dict): if impl == 'raw': raise NotImplementedError("No such {} dataset implementation.".format(impl)) elif impl == 'mmap': if str.endswith(path, 'bin_ast'): from dgl.data.utils import load_graphs src_dataset, _ = load_graphs(f'{path}.mmap') src_dataset = GraphDataset(src_dataset) else: src_dataset = indexed_dataset.MMapIndexedDataset(path=path) else: raise NotImplementedError("No such {} dataset implementation.".format(impl)) return src_dataset
def load_mmap_dataset(dataset): return indexed_dataset.MMapIndexedDataset(dataset)