Exemplo n.º 1
0
def _multi_instances_parallel_dataloader_worker():
    dataset = init_dataset()

    for divide_flag in [True, False]:
        train_dataloader = DataLoader(
            dataset,
            sampler=RandomSampler(dataset, batch_size=4, drop_last=False),
            num_workers=2,
            divide=divide_flag,
            preload=True,
        )
        val_dataloader = DataLoader(
            dataset,
            sampler=RandomSampler(dataset, batch_size=10, drop_last=False),
            num_workers=2,
            divide=divide_flag,
            preload=True,
        )
        for idx, (data, label) in enumerate(train_dataloader):
            assert data._tuple_shape == (4, 1, 32, 32)
            assert label._tuple_shape == (4,)
            if idx % 5 == 0:
                for val_data, val_label in val_dataloader:
                    assert val_data._tuple_shape == (10, 1, 32, 32)
                    assert val_label._tuple_shape == (10,)
Exemplo n.º 2
0
def test_dataloader_init():
    dataset = init_dataset()
    with pytest.raises(ValueError):
        dataloader = DataLoader(dataset, num_workers=2, divide=True)
    with pytest.raises(ValueError):
        dataloader = DataLoader(dataset, num_workers=-1)
    with pytest.raises(ValueError):
        dataloader = DataLoader(dataset, timeout=-1)
    with pytest.raises(ValueError):
        dataloader = DataLoader(dataset, num_workers=0, divide=True)

    dataloader = DataLoader(dataset)
    assert isinstance(dataloader.sampler, SequentialSampler)
    assert isinstance(dataloader.transform, PseudoTransform)
    assert isinstance(dataloader.collator, Collator)

    dataloader = DataLoader(dataset,
                            sampler=RandomSampler(dataset,
                                                  batch_size=6,
                                                  drop_last=False))
    assert len(dataloader) == 17
    dataloader = DataLoader(dataset,
                            sampler=RandomSampler(dataset,
                                                  batch_size=6,
                                                  drop_last=True))
    assert len(dataloader) == 16
Exemplo n.º 3
0
def test_dataloader_parallel():
    # set max shared memory to 100M
    os.environ["MGE_PLASMA_MEMORY"] = "100000000"

    dataset = init_dataset()
    dataloader = DataLoader(
        dataset,
        sampler=RandomSampler(dataset, batch_size=4, drop_last=False),
        num_workers=2,
        divide=False,
        preload=True,
    )
    for (data, label) in dataloader:
        assert data._tuple_shape == (4, 1, 32, 32)
        assert label._tuple_shape == (4,)

    dataloader = DataLoader(
        dataset,
        sampler=RandomSampler(dataset, batch_size=4, drop_last=False),
        num_workers=2,
        divide=True,
        preload=True,
    )
    for (data, label) in dataloader:
        assert data._tuple_shape == (4, 1, 32, 32)
        assert label._tuple_shape == (4,)
Exemplo n.º 4
0
def test_RandomSampler():
    indices = list(range(20))
    indices_copy = copy.deepcopy(indices)
    sampler = RandomSampler(ArrayDataset(indices_copy))
    sample_indices = sampler
    assert indices != list(each[0] for each in sample_indices)
    assert indices == sorted(list(each[0] for each in sample_indices))
Exemplo n.º 5
0
def test_random_sampler_seed():
    seed = [0, 1]
    indices = list(range(20))
    indices_copy1 = copy.deepcopy(indices)
    indices_copy2 = copy.deepcopy(indices)
    indices_copy3 = copy.deepcopy(indices)
    sampler1 = RandomSampler(ArrayDataset(indices_copy1), seed=seed[0])
    sampler2 = RandomSampler(ArrayDataset(indices_copy2), seed=seed[0])
    sampler3 = RandomSampler(ArrayDataset(indices_copy3), seed=seed[1])
    assert indices != list(each[0] for each in sampler1)
    assert indices != list(each[0] for each in sampler2)
    assert indices != list(each[0] for each in sampler3)
    assert indices == sorted(list(each[0] for each in sampler1))
    assert indices == sorted(list(each[0] for each in sampler2))
    assert indices == sorted(list(each[0] for each in sampler3))
    assert list(each[0] for each in sampler1) == list(each[0] for each in sampler2)
    assert list(each[0] for each in sampler1) != list(each[0] for each in sampler3)
Exemplo n.º 6
0
def test_dataloader_serial():
    dataset = init_dataset()
    dataloader = DataLoader(
        dataset, sampler=RandomSampler(dataset, batch_size=4, drop_last=False)
    )
    for (data, label) in dataloader:
        assert data.shape == (4, 1, 32, 32)
        assert label.shape == (4,)
Exemplo n.º 7
0
def test_dataloader_parallel_worker_exception():
    dataset = init_dataset()

    class FakeErrorTransform(Transform):
        def __init__(self):
            pass

        def apply(self, input):
            y = x + 1
            return input

    dataloader = DataLoader(
        dataset,
        sampler=RandomSampler(dataset, batch_size=4, drop_last=False),
        transform=FakeErrorTransform(),
        num_workers=2,
    )
    with pytest.raises(RuntimeError, match=r"worker.*died"):
        data_iter = iter(dataloader)
        batch_data = next(data_iter)
Exemplo n.º 8
0
 def get_dataloader(self, examples, batch_size, is_random=False):
     features = convert_examples_to_features(
         examples, self.label_list, self.args.max_seq_length, self.tokenizer
     )
     all_input_ids, all_input_mask, all_segment_ids, all_label_ids = self.to_inputs(
         features
     )
     dataset = ArrayDataset(
         all_input_ids, all_input_mask, all_segment_ids, all_label_ids
     )
     if is_random:
         sampler = RandomSampler(
             dataset=dataset, batch_size=batch_size, drop_last=True
         )
     else:
         sampler = SequentialSampler(
             dataset=dataset, batch_size=batch_size, drop_last=True
         )
     dataloader = DataLoader(dataset=dataset, sampler=sampler,)
     return dataloader, len(features)
Exemplo n.º 9
0
def test_dataloader_parallel_timeout():
    dataset = init_dataset()

    class TimeoutTransform(Transform):
        def __init__(self):
            pass

        def apply(self, input):
            time.sleep(10)
            return input

    dataloader = DataLoader(
        dataset,
        sampler=RandomSampler(dataset, batch_size=4, drop_last=False),
        transform=TimeoutTransform(),
        num_workers=2,
        timeout=2,
    )
    with pytest.raises(RuntimeError, match=r".*timeout.*"):
        data_iter = iter(dataloader)
        batch_data = next(data_iter)
Exemplo n.º 10
0
def fetch_dataloader(params):
    input_transform = fetch_input_transform()
    spatial_transform = fetch_spatial_transform(params)

    benchmark_path_gof_clean = "dataset/GOF_Clean.npy"
    benchmark_path_gof_final = "dataset/GOF_Final.npy"

    if params.dataset_type == "GOF":
        train_ds = BaseDataset(input_transform, spatial_transform)
        val_ds = TestDataset(benchmark_path_gof_clean, input_transform)
        test_ds = ConcatDataset(
            [TestDataset(benchmark_path_gof_clean, input_transform),
             TestDataset(benchmark_path_gof_final, input_transform)])

    dataloaders = {}
    # add defalt train data loader
    train_sampler = RandomSampler(train_ds, batch_size=params.train_batch_size, drop_last=True)
    train_dl = DataLoader(train_ds, train_sampler, num_workers=params.num_workers)
    dataloaders["train"] = train_dl

    # chosse val or test data loader for evaluate
    for split in ["val", "test"]:
        if split in params.eval_type:
            if split == "val":
                val_sampler = SequentialSampler(val_ds, batch_size=params.eval_batch_size)
                dl = DataLoader(val_ds, val_sampler, num_workers=params.num_workers)
            elif split == "test":
                test_sampler = SequentialSampler(test_ds, batch_size=params.eval_batch_size)
                dl = DataLoader(test_ds, test_sampler, num_workers=params.num_workers)
            else:
                raise ValueError("Unknown eval_type in params, should in [val, test]")
            dataloaders[split] = dl
        else:
            dataloaders[split] = None

    return dataloaders
Exemplo n.º 11
0
from megengine.data.dataset import MNIST

from megengine.data import DataLoader
from megengine.data.transform import ToMode, Pad, Normalize, Compose
from megengine.data.sampler import RandomSampler, SequentialSampler

# 如果使用 MegStudio 环境,请将 MNIST_DATA_PATH 为 /home/megstudio/dataset/MNIST/
MNIST_DATA_PATH = "./datasets/MNIST/"

# 获取训练数据集,如果本地没有数据集,请将 download 参数设置为 True
train_dataset = MNIST(root=MNIST_DATA_PATH, train=True, download=False)
test_dataset = MNIST(root=MNIST_DATA_PATH, train=False, download=False)

batch_size = 64
# 创建 Sampler
train_sampler = RandomSampler(train_dataset, batch_size=batch_size)
test_sampler = SequentialSampler(test_dataset, batch_size=batch_size)

# 数据预处理方式
transform = Compose([
    Normalize(mean=0.1307 * 255, std=0.3081 * 255),
    Pad(2),
    ToMode('CHW'),
])

# 创建 Dataloader
train_dataloader = DataLoader(train_dataset, train_sampler, transform)
test_dataloader = DataLoader(test_dataset, test_sampler, transform)

for X, y in train_dataloader:
    print("Shape of X: ", X.shape)  # [N, C, H, W]