def make_loader(
        file,
        batch_size,
        device,
        world_size,
        rank,
        nworkers,  # this is important for faster computation, increasing this uses more cores of the cpu to process the images
        size=96,
        image_rng=None,
        image_params=None,
        gpu_augmentation=False,
        nsamples=10000):

    # total images are 94502424

    # basically an iterator which cycles over the samples of msgpack
    reader = YFCC100mReader()
    # these many iterations for each dataloader on gpu
    iters = int(ceil(len(reader) / world_size / batch_size))

    # now reader seeks to the iterator index because dataloader will have images from this index depending on the rank
    #print('Number of iters ', iters)
    #reader.seek(iters * batch_size * rank)
    #print('\n start_iteration for device {} is {}'.format(device ,iters * batch_size * rank))

    cycler = Cycler(reader)

    # crumpets workers expect data to be in msgpack packed dictionaries, this should be done using datalodings library
    worker = FCNWorker(
        ((9 * 3, size, size), np.uint8, IMAGENET_MEAN),
        ((9 * 3, size, size), np.uint8, IMAGENET_MEAN),
        image_params=image_params,
        target_image_params=None,
        image_rng=image_rng,
    )

    return TorchTurboDataLoader(
        cycler.rawiter(),
        batch_size,
        worker,
        nworkers,
        gpu_augmentation=gpu_augmentation,
        length=iters * batch_size,
        device=device,  #this is the device corresponding to the current process
    )
Exemplo n.º 2
0
def make_loader(file,
                batch_size,
                device,
                world_size,
                rank,
                nworkers,
                size=96,
                image_rng=None,
                image_params=None,
                gpu_augmentation=False,
                nsamples=1000000):

    # total images are 94502424
    # basically an iterator which cycles over the samples of msgpack
    reader = YFCC100mReader()
    # these many iterations for each dataloader on gpu
    iters = int(floor(len(reader) / world_size))

    # now reader seeks to the iterator index because dataloader will have images from this index depending on the rank
    reader.seek(iters * rank)
    print('\n start_iteration for device {} is {}'.format(rank, iters * rank))

    cycler = Cycler(reader)

    # crumpets workers expect data to be in msgpack packed dictionaries, this should be done using datalodings library
    worker = FCNWorker(
        ((9 * 3, 96, 96), np.uint8, IMAGENET_MEAN),
        ((9 * 3, 96, 96), np.uint8, IMAGENET_MEAN),
        image_params=image_params,
        target_image_params=None,
        image_rng=image_rng,
    )

    return TorchTurboDataLoader(
        cycler.rawiter(),
        batch_size,
        worker,
        nworkers,
        gpu_augmentation=gpu_augmentation,
        length=nsamples,
        device=device,
    )