def load_dataset(subset: Subset) -> batches.BatchGenerator: dataset = PetsDataset('../data/cifar-10-batches-py', subset) op = ops.chain([ ops.hwc2chw(), ops.add(-127.5), ops.mul(1 / 127.5), ops.type_cast(np.float32) ]) return batches.BatchGenerator(dataset, 128, True, op)
def load_dataset(subset: Subset, augment=False) -> batches.BatchGenerator: dataset = PetsDataset('../data/cifar-10-batches-py', subset) ops_list = [] if augment: ops_list += [ops.hflip(), ops.rcrop(32, 12, 'constant')] ops_list += [ ops.mul(1 / 255), ops.type_cast(np.float32), # Imagenet: # ops.normalize( mean=np.array([0.485, 0.456, 0.406]), # std=np.array([0.229, 0.224, 0.225])), # Cifar-10: ops.normalize(mean=np.array([0.41477802, 0.45935813, 0.49693552]), std=np.array([0.25241926, 0.24699265, 0.25279155])), ops.hwc2chw() ] op = ops.chain(ops_list) return batches.BatchGenerator(dataset, 128, True, op)
import numpy as np from dlvc.dataset import Subset from dlvc.datasets.pets import PetsDataset from dlvc import ops, batches dataset = PetsDataset('../data/cifar-10-batches-py', Subset.TRAINING) op = ops.chain([ops.mul(1 / 255), ops.type_cast(np.float32)]) batch_generator = batches.BatchGenerator(dataset, 7959, True, op) training_images = [] for batch in batch_generator: training_images.append(batch.data) training_images = np.array(training_images, dtype=np.float32) training_images = training_images.reshape(training_images.shape[1:]) train_mean = np.mean(training_images, axis=(0, 1, 2)) train_std = np.std(training_images, axis=(0, 1, 2)) print(train_mean, train_std)
def load_dataset(subset: Subset) -> batches.BatchGenerator: dataset = PetsDataset('../data/cifar-10-batches-py', subset) op = ops.chain([ops.vectorize(), ops.type_cast(np.float32)]) return batches.BatchGenerator(dataset, len(dataset), True, op)
test_sample = dataset[1] print(f'Index of test sample: {test_sample.idx}') print(f'Label of test sample: {test_sample.label}') #cv2.imshow('Sample Image', test_sample.data) #cv2.waitKey(0) #cv2.destroyAllWindows() op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) #The number of training batches is 1 if the batch size is set to the number of samples in the dataset generator = batches.BatchGenerator(dataset, len(dataset), True, op) print(len(generator)) #The number of training batches is 16 if the batch size is set to 500 generator = batches.BatchGenerator(dataset, 500, True, op) print(len(generator)) #The data and label shapes are (500, 3072) and (500,), respectively, unless for the last batch #The data type is always np.float32 and the label type is integral (one of the np.int and np.uint variants) #generator = batches.BatchGenerator(dataset, 20, False, op) for i in iter(generator): pass # print(i.idx.shape) # print(i.data) # print(i.data.dtype) # print(i.label.shape)