Exemplo n.º 1
0
def get_loader(dataset, cfg, mode='train'):
    assert mode in ('train', 'eval')
    if mode == 'train':
        sampler = RandomSampler(dataset,
                                batch_size=cfg.data.samples_per_gpu,
                                drop_last=True,
                                seed=0)
        loader = DataLoader(dataset,
                            sampler,
                            num_workers=cfg.data.workers_per_gpu)
    else:
        samples_per_gpu = cfg.data.get('eval_samples_per_gpu',
                                       cfg.data.samples_per_gpu)
        workers_per_gpu = cfg.data.get('eval_workers_per_gpu',
                                       cfg.data.workers_per_gpu)
        if cfg.evaluation.multi_process is True:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False)
        else:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False,
                                        world_size=1,
                                        rank=0)
        loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu)
    return loader
Exemplo n.º 2
0
def build_dataloader(rank, world_size, data_root, ann_file):
    val_dataset = COCOJoints(data_root,
                             ann_file,
                             image_set="val2017",
                             order=("image", "boxes", "info"))
    val_sampler = SequentialSampler(val_dataset,
                                    1,
                                    world_size=world_size,
                                    rank=rank)
    val_dataloader = DataLoader(
        val_dataset,
        sampler=val_sampler,
        num_workers=4,
        transform=T.Compose(
            transforms=[
                T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
                ExtendBoxes(
                    cfg.test_x_ext,
                    cfg.test_y_ext,
                    cfg.input_shape[1] / cfg.input_shape[0],
                    random_extend_prob=0,
                ),
                RandomBoxAffine(
                    degrees=(0, 0),
                    scale=(1, 1),
                    output_shape=cfg.input_shape,
                    rotate_prob=0,
                    scale_prob=0,
                ),
                T.ToMode(),
            ],
            order=("image", "boxes", "info"),
        ),
    )
    return val_dataloader
Exemplo n.º 3
0
def get_loader(dataset, cfg):
    samples_per_gpu = cfg.data.test_samples_per_gpu
    workers_per_gpu = cfg.data.test_workers_per_gpu
    sampler = SequentialSampler(dataset,
                                batch_size=samples_per_gpu,
                                drop_last=False)
    loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu)
    return loader
Exemplo n.º 4
0
def build_dataloader(rank, world_size, data_dir):
    val_dataset = COCODataset(
        os.path.join(data_dir, "val2017"),
        os.path.join(data_dir, "annotations/instances_val2017.json"),
        order=["image", "info"],
    )
    val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank)
    val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2)
    return val_dataloader
Exemplo n.º 5
0
def build_dataset(root=Path('/home/zqh/data/omniglot-py'),
                  nway=5,
                  kshot=1,
                  kquery=1,
                  batch_size=32):

    train_ds = OmniglotDataset(root, nway, kshot, kquery, mode='train')
    train_smp = SequentialSampler(train_ds,
                                  drop_last=True,
                                  batch_size=batch_size)
    train_loader = DataLoader(train_ds, sampler=train_smp, num_workers=4)

    val_ds = OmniglotDataset(root, nway, kshot, kquery, mode='val')
    val_smp = SequentialSampler(train_ds,
                                drop_last=True,
                                batch_size=batch_size)
    val_loader = DataLoader(val_ds, sampler=val_smp, num_workers=4)

    return train_loader, val_loader
Exemplo n.º 6
0
def build_dataloader(rank, world_size, data_dir, cfg):
    val_dataset = data_mapper[cfg.test_dataset["name"]](
        os.path.join(data_dir, cfg.test_dataset["name"],
                     cfg.test_dataset["root"]),
        os.path.join(data_dir, cfg.test_dataset["name"],
                     cfg.test_dataset["ann_file"]),
        order=["image", "info"],
    )
    val_sampler = SequentialSampler(val_dataset,
                                    1,
                                    world_size=world_size,
                                    rank=rank)
    val_dataloader = DataLoader(val_dataset,
                                sampler=val_sampler,
                                num_workers=2)
    return val_dataloader
Exemplo n.º 7
0
from megengine.data.transform import RandomResizedCrop, Normalize, ToMode, Pad, Compose
import megengine.optimizer as optim

mge.set_log_file('log.txt')
logger = mge.get_logger(__name__)

#logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")

# dataset
root_dir = '/data/.cache/dataset/MNIST'
mnist_train_dataset = MNIST(root=root_dir, train=True, download=False)

mnist_test_dataset = MNIST(root=root_dir, train=False, download=False)

random_sampler = RandomSampler(dataset=mnist_train_dataset, batch_size=256)
sequential_sampler = SequentialSampler(dataset=mnist_test_dataset,
                                       batch_size=256)

mnist_train_dataloader = DataLoader(
    dataset=mnist_train_dataset,
    sampler=random_sampler,
    transform=Compose([
        RandomResizedCrop(output_size=28),
        # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255
        #Normalize(mean=0.1307*255, std=0.3081*255),
        #Pad(2),
        # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式
        #ToMode('CHW'),
    ]))
mnist_test_dataloader = DataLoader(
    dataset=mnist_test_dataset,
    sampler=sequential_sampler,
Exemplo n.º 8
0
print(len(train_dataset), len(train_label))
print(len(test_dataset), len(test_label))

from typing import Tuple
from megengine.data.dataset import Dataset

class BostonTrainDataset(Dataset):
    def __init__(self):
        self.data = train_dataset
        self.label = train_label

    def __getitem__(self, index: int) -> Tuple:
        return self.data[index], self.label[index]

    def __len__(self) -> int:
        return len(self.data)

boston_train_dataset = BostonTrainDataset()
print(len(boston_train_dataset))

from megengine.data import DataLoader
from megengine.data import SequentialSampler

sequential_sampler = SequentialSampler(dataset=boston_train_dataset, batch_size=100)
train_dataloader = DataLoader(dataset=boston_train_dataset, sampler=sequential_sampler)

for batch_data, batch_label in train_dataloader:
    print(batch_data.shape, batch_label.shape, len(train_dataloader))
    break
Exemplo n.º 9
0
# 静态图比动态图快了很多
trace.enabled = True

if __name__ == '__main__':
    np.random.seed(39)
    train_dataset = XORDataset(30000)
    test_dataset = XORDataset(10000)

    # 这里为什么要传两次train_dataset
    train_sampler = RandomSampler(dataset=train_dataset,
                                  batch_size=32,
                                  drop_last=True)
    train_loader = DataLoader(train_dataset, sampler=train_sampler)

    test_sampler = SequentialSampler(dataset=test_dataset,
                                     batch_size=32,
                                     drop_last=False)
    test_loader = DataLoader(test_dataset, sampler=test_sampler)

    # draw_dataset(train_dataset)
    model = build_model()

    optimizer = optim.SGD(
        model.parameters(),
        lr=0.01,
    )

    data = mge.tensor()
    label = mge.tensor(dtype='int32')

    total_epochs = 10