def camvid_get_datasets_s80(data, load_train=True, load_test=True, num_classes=33): """ Load the CamVid dataset in 3x80x80 size. The dataset originally includes 33 keywords. A dataset is formed with 4 or 34 classes which includes 3, or 33 of the original keywords and the rest of the dataset is used to form the last class, i.e class of the others. The dataset is split into training+validation and test sets. 90:10 training+validation:test split is used by default. """ (data_dir, args) = data if num_classes == 3: classes = ['Building', 'Sky', 'Tree'] elif num_classes == 33: classes = None else: raise ValueError(f'Unsupported num_classes {num_classes}') if load_train: train_transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) train_dataset = CamVidDataset(root_dir=os.path.join( data_dir, 'CamVid'), d_type='train', im_size=[80, 80], im_overlap=[20, 20], classes=classes, download=True, transform=train_transform) else: train_dataset = None if load_test: test_transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) test_dataset = CamVidDataset(root_dir=os.path.join(data_dir, 'CamVid'), d_type='test', im_size=[80, 80], im_overlap=[20, 20], classes=classes, download=True, transform=test_transform) if args.truncate_testset: test_dataset.img_list = test_dataset.img_list[:1] else: test_dataset = None return train_dataset, test_dataset
def cifar10_get_datasets(data, load_train=True, load_test=True): """ Load the CIFAR10 dataset. The original training dataset is split into training and validation sets (code is inspired by https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb). By default we use a 90:10 (45K:5K) training:validation split. The output of torchvision datasets are PIL Image images of range [0, 1]. We transform them to Tensors of normalized range [-128/128, +127/128] https://github.com/pytorch/tutorials/blob/master/beginner_source/blitz/cifar10_tutorial.py Data augmentation: 4 pixels are padded on each side, and a 32x32 crop is randomly sampled from the padded image or its horizontal flip. This is similar to [1] and some other work that use CIFAR10. [1] C.-Y. Lee, S. Xie, P. Gallagher, Z. Zhang, and Z. Tu. Deeply Supervised Nets. arXiv:1409.5185, 2014 """ (data_dir, args) = data if load_train: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ai8x.normalize(args=args) ]) train_dataset = torchvision.datasets.CIFAR10(root=os.path.join( data_dir, 'CIFAR10'), train=True, download=True, transform=train_transform) else: train_dataset = None if load_test: test_transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) test_dataset = torchvision.datasets.CIFAR10(root=os.path.join( data_dir, 'CIFAR10'), train=False, download=True, transform=test_transform) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def AISegment_get_datasets(data, load_train=True, load_test=True, im_size=(80, 80), fold_ratio=1, use_memory=True): """ Load the AISegment dataset. The dataset includes matting files of each image: alpha pixel value 0 for background and RGB value for portrait. As the AISegment dataset does not have explicit test/train separation, the dataset is split into training + validation and test sets using given ratio (90:10 by default). Images will have `im_size` resolution and the data loader will use memory or disk as set by `use_memory`. """ (data_dir, args) = data if load_train: train_transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) train_dataset = AISegment(root_dir=data_dir, d_type='train', transform=train_transform, im_size=im_size, fold_ratio=fold_ratio, use_memory=use_memory) print('Train dataset length: %s\n' % train_dataset.__len__()) else: train_dataset = None if load_test: test_transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) test_dataset = AISegment(root_dir=data_dir, d_type='test', transform=test_transform, im_size=im_size, fold_ratio=fold_ratio, use_memory=use_memory) print('Test dataset length: %s\n' % test_dataset.__len__()) else: test_dataset = None return train_dataset, test_dataset
def face_points_get_datasets(data, load_train=True, load_test=True): (data_dir, args) = data train_data_dir = "/home/geffen/Documents/Source/AI/ai8x-training/data/face_points/train" test_data_dir = "/home/geffen/Documents/Source/AI/ai8x-training/data/face_points/test" test_csv_file = "/home/geffen/Documents/Source/AI/ai8x-training/data/face_points/test/test_points.csv" train_csv_file = "/home/geffen/Documents/Source/AI/ai8x-training/data/face_points/train/train_points.csv" transform = transforms.Compose([ ai8x.normalize(args=args) ]) if load_train: train_dataset = FacePointsDataset(csv_file=train_csv_file, root_dir=train_data_dir, transform=transform) else: train_dataset = None if load_test: test_dataset = FacePointsDataset(csv_file=test_csv_file, root_dir=test_data_dir, transform=transform) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def bb_get_datasets(data, load_train=True, load_test=True): (data_dir, args) = data train_data_dir = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_box_dataset/train" test_data_dir = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_box_dataset/test" test_csv_file = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_box_dataset/test/bb.csv" train_csv_file = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_box_dataset/train/bb.csv" transform = transforms.Compose([ transforms.Resize((80, 80)), ai8x.normalize(args=args) ]) if load_train: train_dataset = BBDataset(csv_file=train_csv_file, root_dir=train_data_dir, transform=transform) else: train_dataset = None if load_test: test_dataset = BBDataset(csv_file=test_csv_file, root_dir=test_data_dir, transform=transform) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def speechcom_get_datasets(data, load_train=True, load_test=True, num_classes=6): """ Load the SpeechCom v0.02 dataset (https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz). The dataset originally includes 30 keywords. A dataset is formed with 7 classes which includes 6 of the original keywords ('up', 'down', 'left', 'right', 'stop', 'go') and the rest of the dataset is used to form the last class, i.e class of the others. The dataset is split into training, validation and test sets. 80:10:10 training:validation:test split is used by default. Data is augmented to 5x duplicate data by randomly stretch, shift and randomly add noise where the stretching coefficient, shift amount and noise variance are randomly selected between 0.8 and 1.3, -0.1 and 0.1, 0 and 1, respectively. """ (data_dir, args) = data if num_classes == 6: classes = ['up', 'down', 'left', 'right', 'stop', 'go'] # 6 keywords elif num_classes == 20: classes = [ 'up', 'down', 'left', 'right', 'stop', 'go', 'yes', 'no', 'on', 'off', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'zero' ] else: raise ValueError(f'Unsupported num_classes {num_classes}') transform = transforms.Compose( [transforms.ToTensor(), ai8x.normalize(args=args)]) if load_train: train_dataset = SpeechCom(root=data_dir, classes=classes, d_type='train', n_augment=4, transform=transform, download=True) else: train_dataset = None if load_test: test_dataset = SpeechCom(root=data_dir, classes=classes, d_type='val', n_augment=4, transform=transform, download=True) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def KWS_get_datasets(data, load_train=True, load_test=True, num_classes=6): """ Load the folded 1D version of SpeechCom dataset The dataset is loaded from the archive file, so the file is required for this version. The dataset originally includes 30 keywords. A dataset is formed with 7 or 21 classes which includes 6 or 20 of the original keywords and the rest of the dataset is used to form the last class, i.e class of the others. The dataset is split into training+validation and test sets. 90:10 training+validation:test split is used by default. Data is augmented to 3x duplicate data by random stretch/shift and randomly adding noise where the stretching coefficient, shift amount and noise variance are randomly selected between 0.8 and 1.3, -0.1 and 0.1, 0 and 1, respectively. """ (data_dir, args) = data transform = transforms.Compose([ai8x.normalize(args=args)]) if num_classes in (6, 20): classes = next((e for _, e in enumerate(datasets) if len(e['output']) - 1 == num_classes))['output'][:-1] else: raise ValueError(f'Unsupported num_classes {num_classes}') augmentation = {'aug_num': 2} quantization_scheme = {'compand': False, 'mu': 10} if load_train: train_dataset = KWS(root=data_dir, classes=classes, d_type='train', transform=transform, t_type='keyword', quantization_scheme=quantization_scheme, augmentation=augmentation, download=True) else: train_dataset = None if load_test: test_dataset = KWS(root=data_dir, classes=classes, d_type='test', transform=transform, t_type='keyword', quantization_scheme=quantization_scheme, augmentation=augmentation, download=True) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def KWS_get_datasets(data, load_train=True, load_test=True, num_classes=6): """ Load the folded 1D version of SpeechCom dataset The dataset is loaded from the archive file, so the file is required for this version. The dataset originally includes 30 keywords. A dataset is formed with 7 or 21 classes which includes 6 or 20 of the original keywords and the rest of the dataset is used to form the last class, i.e class of the others. The dataset is split into training, validation and test sets. 80:10:10 training:validation:test split is used by default. Data is augmented to 3x duplicate data by randomly stretch, shift and randomly add noise where the stretching coefficient, shift amount and noise variance are randomly selected between 0.8 and 1.3, -0.1 and 0.1, 0 and 1, respectively. """ (data_dir, args) = data transform = transforms.Compose([ai8x.normalize(args=args)]) if num_classes == 6: classes = ['up', 'down', 'left', 'right', 'stop', 'go'] elif num_classes == 20: classes = [ 'up', 'down', 'left', 'right', 'stop', 'go', 'yes', 'no', 'on', 'off', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'zero' ] else: raise ValueError(f'Unsupported num_classes {num_classes}') if load_train: train_dataset = KWS(root=data_dir, classes=classes, d_type='train', transform=transform, t_type='keyword', download=True) else: train_dataset = None if load_test: test_dataset = KWS(root=data_dir, classes=classes, d_type='val', transform=transform, t_type='keyword', download=True) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def face_classifier_get_datasets_80x80(data, load_train=True, load_test=True): (data_dir, args) = data training_data_path = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_classifier_dataset/train/" test_data_path = "/home/geffen/Desktop/Face_Detector/assemble_face_dataset_utils/face_classifier_dataset/test/" if load_train: train_transform = transforms.Compose([ transforms.Resize((80, 80)), transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomAffine(degrees=5, scale=(0.75, 1.25), translate=(0.25, 0.25)), #torchvision.transforms.RandomCrop(90), ai8x.normalize(args=args) ]) train_dataset = torchvision.datasets.ImageFolder( root=training_data_path, transform=train_transform) print(train_dataset.classes) else: train_dataset = None if load_test: test_transform = transforms.Compose([ transforms.Resize((80, 80)), transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), ai8x.normalize(args=args) ]) test_dataset = torchvision.datasets.ImageFolder( root=test_data_path, transform=test_transform) else: test_dataset = None return train_dataset, test_dataset
def faceid_get_datasets(data, load_train=True, load_test=True): """ Load the faceID dataset The dataset is loaded from the archive file, so the file is required for this version. The dataset consists of actually 2 different datasets, VGGFace2 for training and YouTubeFaces for the test. The reason of this is proof-of-concept models are obtained by this way and the losses At YTFaces are tracked for the sake of benchmarking. The images are all 3-color 160x120 sized and consist the face image. """ (data_dir, args) = data # These are hard coded for now, need to come from above in future. train_resample_subj = 1 train_resample_img_per_subj = 6 test_resample_subj = 1 test_resample_img_per_subj = 2 train_data_dir = os.path.join(data_dir, 'VGGFace-2') test_data_dir = os.path.join(data_dir, 'YouTubeFaces') transform = transforms.Compose([ai8x.normalize(args=args)]) if load_train: train_dataset = VGGFace2Dataset( root_dir=train_data_dir, d_type='train', transform=transform, resample_subj=train_resample_subj, resample_img_per_subj=train_resample_img_per_subj) else: train_dataset = None if load_test: test_dataset = YouTubeFacesDataset( root_dir=test_data_dir, d_type='test', transform=transform, resample_subj=test_resample_subj, resample_img_per_subj=test_resample_img_per_subj) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def MSnoise_get_datasets(data, load_train=True, load_test=True): """ Load the folded 1D version of MS Scalable Noisy Speech dataset (MS-SNSD) The dataset is loaded from the archive file, so the file is required for this version. The dataset originally includes 26 different noise types. 15 of them are chosen classification, others are labeled as the unkown class. """ (data_dir, args) = data classes = [ 'AirConditioner', 'AirportAnnouncements', 'Babble', 'Bus', 'CafeTeria', 'Car', 'CopyMachine', 'Metro', 'Office', 'Restaurant', 'ShuttingDoor', 'Traffic', 'Typing', 'VacuumCleaner', 'Washing' ] remove_unkowns = True transform = transforms.Compose([ai8x.normalize(args=args)]) quantize = True if load_train: train_dataset = MSnoise(root=data_dir, classes=classes, d_type='train', remove_unknowns=remove_unkowns, transform=transform, quantize=quantize, download=True) else: train_dataset = None if load_test: test_dataset = MSnoise(root=data_dir, classes=classes, d_type='test', remove_unknowns=remove_unkowns, transform=transform, quantize=quantize, download=True) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def afsk_get_datasets(data, load_train=True, load_test=True): """ Load AFSK dataset. """ (data_dir, args) = data transform = transforms.Compose([ai8x.normalize(args=args)]) if load_train: train_dataset = AFSK(root=data_dir, train=True, transform=transform) else: train_dataset = None if load_test: test_dataset = AFSK(root=data_dir, train=False, transform=transform) else: test_dataset = None return train_dataset, test_dataset
def catsdogs_get_datasets(data, load_train=True, load_test=True): """ Load Cats & Dogs dataset """ (data_dir, args) = data path = data_dir dataset_path = os.path.join(path, "cats_vs_dogs") is_dir = os.path.isdir(dataset_path) if not is_dir: path = os.getcwd() dataset_path = os.path.join(path, "dogs-vs-cats") is_dir = os.path.isdir(dataset_path) if not is_dir: print("******************************************") print("Please follow instructions below:") print( "Download the dataset in the current working directory by visiting this link" "\'https://www.kaggle.com/c/dogs-vs-cats/data\'") print("and click the \'Download all\' button") print("If you do not have a Kaggle account, sign-up first.") print( "Unzip \'dogs-vs-cats.zip\' and you will see train.zip, test1.zip and .csv " " file. Unzip the train.zip file and re-run the script") print("******************************************") sys.exit("Dataset not found..") else: # check if train set exists path = os.getcwd() dataset_path = os.path.join(path, "dogs-vs-cats", "train") is_dir = os.path.isdir(dataset_path) if not is_dir: sys.exit( "Unzip \'train.zip\' file from dogs-vs-cats directory") # create directories dataset_home = os.path.join(data_dir, "cats_vs_dogs") newdir = os.path.join(dataset_home, "train", "dogs") makedirs(newdir, exist_ok=True) newdir = os.path.join(dataset_home, "train", "cats") makedirs(newdir, exist_ok=True) newdir = os.path.join(dataset_home, "test", "dogs") makedirs(newdir, exist_ok=True) newdir = os.path.join(dataset_home, "test", "cats") makedirs(newdir, exist_ok=True) # define ratio of pictures to use for test set test_ratio = 0.2 # copy training dataset images into subdirectories src_directory = os.path.join(path, "dogs-vs-cats", "train") for file in listdir(src_directory): src = os.path.join(src_directory, file) dst_dir = os.path.join(dataset_home, "train") if random() < test_ratio: dst_dir = os.path.join(dataset_home, "test") if file.startswith("cat"): dst = os.path.join(dst_dir, "cats", file) shutil.copyfile(src, dst) elif file.startswith("dog"): dst = os.path.join(dst_dir, "dogs", file) shutil.copyfile(src, dst) shutil.rmtree("dogs-vs-cats") training_data_path = os.path.join(data_dir, "cats_vs_dogs") training_data_path = os.path.join(training_data_path, "train") test_data_path = os.path.join(data_dir, "cats_vs_dogs") test_data_path = os.path.join(test_data_path, "test") # Loading and normalizing train dataset if load_train: train_transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.RandomAffine(degrees=20, translate=(0.1, 0.1), shear=5), transforms.ToTensor(), ai8x.normalize(args=args) ]) train_dataset = torchvision.datasets.ImageFolder( root=training_data_path, transform=train_transform) else: train_dataset = None # Loading and normalizing test dataset if load_test: test_transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(), ai8x.normalize(args=args) ]) test_dataset = torchvision.datasets.ImageFolder( root=test_data_path, transform=test_transform) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset
def imagenet_get_datasets(data, load_train=True, load_test=True, input_size=224, folder=False): """ Load the ImageNet 2012 Classification dataset. The original training dataset is split into training and validation sets. By default we use a 90:10 (45K:5K) training:validation split. The output of torchvision datasets are PIL Image images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1] Data augmentation: 4 pixels are padded on each side, and a 224x224 crop is randomly sampled from the padded image or its horizontal flip. """ (data_dir, args) = data if load_train: train_transform = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ai8x.normalize(args=args), ]) if not folder: train_dataset = torchvision.datasets.ImageNet( data_dir, split='train', transform=train_transform, ) else: train_dataset = torchvision.datasets.ImageFolder( os.path.join(data_dir, 'train'), transform=train_transform, ) else: train_dataset = None if load_test: test_transform = transforms.Compose([ transforms.Resize(int(input_size / 0.875)), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ai8x.normalize(args=args), ]) if not folder: test_dataset = torchvision.datasets.ImageNet( data_dir, split='val', transform=test_transform, ) else: test_dataset = torchvision.datasets.ImageFolder( os.path.join(data_dir, 'val'), transform=test_transform, ) if args.truncate_testset: test_dataset.data = test_dataset.data[:1] else: test_dataset = None return train_dataset, test_dataset