def create_sinusoid_task_distribution(min_amplitude=0.1, max_amplitude=5.0, min_phase=0.0, max_phase=np.pi, min_x=-5.0, max_x=5.0, num_training_samples=10, num_test_samples=100, num_test_tasks=100, meta_batch_size=5): tasks_list = [ SinusoidTask(min_amplitude=min_amplitude, max_amplitude=max_amplitude, min_phase=min_phase, max_phase=max_phase, min_x=min_x, max_x=max_x, num_training_samples=num_training_samples, num_test_samples=num_test_samples) ] metatrain_task_distribution = TaskDistribution( tasks=tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metaval_task_distribution = TaskDistribution(tasks=tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metatest_task_distribution = TaskDistribution(tasks=tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
def create_omniglot_allcharacters_task_distribution(path_to_pkl, num_training_samples_per_class=10, num_test_samples_per_class=-1, num_training_classes=20, meta_batch_size=5): """ Returns a TaskDistribution that, on each reset, samples a different set of omniglot characters. Arguments: path_to_pkl: string Path to the pkl wrapped Omniglot dataset. This can be generated from the standard dataset using the supplied make_omniglot_dataset.py script. num_training_samples_per_class : int If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class for each class at each reset, and sample minibatches exclusively from them, until the next reset. This is useful for, e.g., k-shot classification. num_test_samples_per_class : int Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset(). num_training_classes : int If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at each reset, and sample minibatches exclusively from them, until the next reset. meta_batch_size : int Default size of the meta batch size. Returns: metatrain_task_distribution : TaskDistribution TaskDistribution object for use during training metaval_task_distribution : TaskDistribution TaskDistribution object for use during model validation metatest_task_distribution : TaskDistribution TaskDistribution object for use during testing """ with open(path_to_pkl, 'rb') as f: d = pickle.load(f) trainX_ = d['trainX'] trainY_ = d['trainY'] testX_ = d['testX'] testY_ = d['testY'] trainX_.extend(testX_) trainY_.extend(testY_) global charomniglot_trainX global charomniglot_trainY global charomniglot_testX global charomniglot_testY cutoff = 36 charomniglot_trainX = trainX_[:cutoff] charomniglot_trainY = trainY_[:cutoff] charomniglot_testX = trainX_[cutoff:] charomniglot_testY = trainY_[cutoff:] # Create a single large dataset with all characters, each for train and test, and rename the targets appropriately trX = [] trY = [] teX = [] teY = [] cur_label_start = 0 for alphabet_i in range(len(charomniglot_trainY)): charomniglot_trainY[alphabet_i] += cur_label_start trX.extend(charomniglot_trainX[alphabet_i]) trY.extend(charomniglot_trainY[alphabet_i]) cur_label_start += len(set(charomniglot_trainY[alphabet_i])) cur_label_start = 0 for alphabet_i in range(len(charomniglot_testY)): charomniglot_testY[alphabet_i] += cur_label_start teX.extend(charomniglot_testX[alphabet_i]) teY.extend(charomniglot_testY[alphabet_i]) cur_label_start += len(set(charomniglot_testY[alphabet_i])) trX = np.asarray(trX, dtype=np.float32) / 255.0 trY = np.asarray(trY, dtype=np.float32) teX = np.asarray(teX, dtype=np.float32) / 255.0 teY = np.asarray(teY, dtype=np.float32) charomniglot_trainX = trX charomniglot_testX = teX charomniglot_trainY = trY charomniglot_testY = teY metatrain_tasks_list = [ClassificationTask(charomniglot_trainX, charomniglot_trainY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1)] # defaults to num_train / (num_train+num_test) metatest_tasks_list = [ClassificationTask(charomniglot_testX, charomniglot_testY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1)] metatrain_task_distribution = TaskDistribution(tasks=metatrain_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) # TODO: split into validation and test! return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
def create_cifar100_task_distribution(num_training_samples_per_class=-1, num_test_samples_per_class=-1, num_training_classes=10, meta_train_test_split=0.7, meta_batch_size=5): """ Returns a TaskDistribution that, on each reset, samples a different set of CIFAR-100 classes. Note that the first time this function is called on a new system, it will download the CIFAR-100 dataset, which may take some time (usually less than 5 minutes). Arguments: num_training_samples_per_class : int If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class for each class at each reset, and sample minibatches exclusively from them, until the next reset. This is useful for, e.g., k-shot classification. num_test_samples_per_class : int Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset(). num_training_classes : int If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at each reset, and sample minibatches exclusively from them, until the next reset. meta_train_test_split : float Proportion of classes to use for the meta-training set. E.g., split=0.7 means int(0.7*100)=70 classes will be used for meta-training, while 100-70=30 classes will be used for meta-testing. meta_batch_size : int Default size of the meta batch size. Returns: metatrain_task_distribution : TaskDistribution TaskDistribution object for use during training metaval_task_distribution : TaskDistribution TaskDistribution object for use during model validation metatest_task_distribution : TaskDistribution TaskDistribution object for use during testing """ global cifar100_trainX global cifar100_trainY global cifar100_testX global cifar100_testY ((train_data, train_labels), (eval_data, eval_labels)) = tf.keras.datasets.cifar100.load_data(label_mode='fine') all_x = np.concatenate((train_data, eval_data), axis=0) all_y = np.concatenate((train_labels, eval_labels), axis=0) split_class = int(meta_train_test_split * 100) meta_train_classes = list(range(split_class)) meta_test_classes = list(range(split_class, 100)) meta_train_indices = [] for c in meta_train_classes: c_indices = np.where(all_y == c)[0] meta_train_indices.extend(c_indices) meta_test_indices = [] for c in meta_test_classes: c_indices = np.where(all_y == c)[0] meta_test_indices.extend(c_indices) # TODO: subtract mean of train images (over axis=0) from both trainX and testX """ from copy import copy import cv2 old_x = copy(all_x) all_x = np.ones([old_x.shape[0], 224, 224, 3], dtype=np.float32) for i in range(old_x.shape[0]): all_x[i,:,:,:] = cv2.resize(old_x[i,:,:,:], (224, 224)) """ cifar100_trainX = all_x[meta_train_indices, :].astype(np.float32) / 255.0 cifar100_trainY = np.squeeze(all_y[meta_train_indices]).astype(np.int64) cifar100_testX = all_x[meta_test_indices, :].astype(np.float32) / 255.0 cifar100_testY = np.squeeze(all_y[meta_test_indices]).astype(np.int64) metatrain_tasks_list = [ ClassificationTask(cifar100_trainX, cifar100_trainY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1) ] # defaults to num_train / (num_train+num_test) metatest_tasks_list = [ ClassificationTask(cifar100_testX, cifar100_testY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1) ] metatrain_task_distribution = TaskDistribution( tasks=metatrain_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) # TODO: split into validation and test! return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
def create_core50_from_npz_task_distribution(path_to_dataset, batch_size=32, num_training_samples_per_class=10, num_test_samples_per_class=-1, num_training_classes=20, meta_batch_size=5): imgs, paths = load_npz_file(path_to_dataset) global core50_images core50_images = imgs def get_session_objects(session_num, path_file): session_indexes = [] session_labels = [] for index, path in enumerate(path_file): splitted_path = path.split('/') if splitted_path[0] == 's' + str(session_num): for i in range(1, 51): if splitted_path[1] == 'o' + str(i): session_indexes.append(index) session_labels.append(i) return session_indexes, session_labels def dataset_from_npz(session_nums, path_file): # Object index numbers in npz file. X_indexes = [] # Object labels. y = [] # Background (session) labels. b = [] for session_num in session_nums: session_indexes, session_labels = get_session_objects( session_num, path_file) X_indexes.extend(session_indexes) y.extend(session_labels) for i in range(len(session_indexes)): b.append(session_num) X_indexes = np.asarray(X_indexes, dtype=np.int32) y = np.asarray(y, dtype=np.int32) b = np.asarray(b, dtype=np.int32) return X_indexes, y, b # Pre-define backround sessions to use. all_sessions = [] for i in range(1, 12): all_sessions.append(i) X_indexes, y, b = dataset_from_npz(session_nums=all_sessions, path_file=paths) # Split indexes: first 40 objects train set & last 10 objects for test set. train_indexes = np.where(y <= 40)[0] test_indexes = np.where(y > 40)[0] # Split the dataset. trainX = X_indexes[train_indexes] trainY = y[train_indexes] trainB = b[train_indexes] testX = X_indexes[test_indexes] testY = y[test_indexes] testB = b[test_indexes] # Create ClassificationTask objects metatrain_tasks_list = [ ClassificationTaskCORe50( trainX, trainY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1, input_parse_fn= process_npz_img, # defaults to num_train / (num_train+num_test) background_labels=trainB) ] metatest_tasks_list = [ ClassificationTaskCORe50(testX, testY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1, input_parse_fn=process_npz_img, background_labels=testB) ] # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks metatrain_task_distribution = TaskDistribution( tasks=metatrain_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) # TODO: split into validation and test! return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
def create_miniimagenet_task_distribution(path_to_pkl, num_training_samples_per_class=10, num_test_samples_per_class=15, num_training_classes=20, meta_batch_size=5): """ Returns a TaskDistribution that, on each reset, samples a different set of Mini-ImageNet classes. *** Data is loaded from a special pickle file. *** Arguments: path_to_pkl: string Path to the pkl wrapped Mini-ImageNet dataset. This can be generated from the standard dataset using the supplied make_miniimagenet_dataset.py script. num_training_samples_per_class : int If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class for each class at each reset, and sample minibatches exclusively from them, until the next reset. This is useful for, e.g., k-shot classification. num_test_samples_per_class : int Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset(). num_training_classes : int If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at each reset, and sample minibatches exclusively from them, until the next reset. meta_batch_size : int Default size of the meta batch size. Returns: metatrain_task_distribution : TaskDistribution TaskDistribution object for use during training metaval_task_distribution : TaskDistribution TaskDistribution object for use during model validation metatest_task_distribution : TaskDistribution TaskDistribution object for use during testing """ global miniimagenet_trainX global miniimagenet_trainY global miniimagenet_valX global miniimagenet_valY global miniimagenet_testX global miniimagenet_testY with open(path_to_pkl, 'rb') as f: d = pickle.load(f) miniimagenet_trainX, miniimagenet_trainY = d['train'] miniimagenet_valX, miniimagenet_valY = d['val'] miniimagenet_testX, miniimagenet_testY = d['test'] print(set(miniimagenet_trainY)) print(set(miniimagenet_valY)) print(set(miniimagenet_testY)) """ num_train = 100 num_val = 50 num_test = 50 classes = list(set(cub200_Y)) train_classes = classes[:num_train] val_classes = classes[num_train:(num_train+num_val)] test_classes = classes[(num_train+num_val):] train_indices = [] val_indices = [] test_indices = [] for i in range(len(cub200_Y)): if cub200_Y[i] in train_classes: train_indices.append(i) elif cub200_Y[i] in val_classes: val_indices.append(i) elif cub200_Y[i] in test_classes: test_indices.append(i) cub200_trainX = cub200_X[train_indices] cub200_trainY = cub200_Y[train_indices] cub200_valX = cub200_X[val_indices] cub200_valY = cub200_Y[val_indices] cub200_testX = cub200_X[test_indices] cub200_testY = cub200_Y[test_indices] """ miniimagenet_trainX = miniimagenet_trainX.astype(np.float32) / 255.0 miniimagenet_valX = miniimagenet_valX.astype(np.float32) / 255.0 miniimagenet_testX = miniimagenet_testX.astype(np.float32) / 255.0 del d train_tasks_list = [ ClassificationTask(miniimagenet_trainX, miniimagenet_trainY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5) ] # TODO: NOTE: HACK -- validation and test tasks use a fixed number of test-set samples, instead of the supplied # ones. This is because in MAML/FOMAML the test set is used to compute the meta-gradient, and a small number of # samples is used (in the philosophy of few-shot learning, where only few samples are available). # However, in this case we wish to use a few more test-samples to better estimate the accuracy of the model on the validation # and test tasks! num_test_samples_per_class = 50 validation_tasks_list = [ ClassificationTask(miniimagenet_valX, miniimagenet_valY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5) ] test_tasks_list = [ ClassificationTask(miniimagenet_valX, miniimagenet_valY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5) ] metatrain_task_distribution = TaskDistribution( tasks=train_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True, use_classes_only_once=True) metaval_task_distribution = TaskDistribution(tasks=validation_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True, use_classes_only_once=True) metatest_task_distribution = TaskDistribution(tasks=test_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True, use_classes_only_once=True) return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
def create_miniimagenet_from_files_task_distribution( path_to_dataset, num_training_samples_per_class=10, num_test_samples_per_class=15, num_training_classes=20, meta_batch_size=5): """ Returns a TaskDistribution that, on each reset, samples a different set of Mini-ImageNet classes. *** Data is loaded from individual images. *** Arguments: path_to_dataset: string Path to the dataset folder (this must have 3 subfolders, 'test', 'train', and 'val'). num_training_samples_per_class : int If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class for each class at each reset, and sample minibatches exclusively from them, until the next reset. This is useful for, e.g., k-shot classification. num_test_samples_per_class : int Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset(). num_training_classes : int If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at each reset, and sample minibatches exclusively from them, until the next reset. meta_batch_size : int Default size of the meta batch size. Returns: metatrain_task_distribution : TaskDistribution TaskDistribution object for use during training metaval_task_distribution : TaskDistribution TaskDistribution object for use during model validation metatest_task_distribution : TaskDistribution TaskDistribution object for use during testing """ def load_metadataset(path): allX = [] allY = [] classes = os.listdir(path) for index, c in enumerate(classes): X = [] Y = [] instances = os.listdir(os.path.join(path, c)) for s in instances: sample = os.path.join(path, c, s) if os.path.splitext(s)[1].lower() == '.png': """ image = cv2.imread(os.path.join(folder, alphabet, char, s), cv2.IMREAD_GRAYSCALE) if resize > 0: image = cv2.resize(image, (resize, resize)) """ X.append(sample) Y.append(index) allX.extend(X) allY.extend(Y) return allX, np.asarray(allY, dtype=np.int64) metatrain_filenames, metatrain_labels = load_metadataset( os.path.join(path_to_dataset, 'train')) metaval_filenames, metaval_labels = load_metadataset( os.path.join(path_to_dataset, 'val')) metatest_filenames, metatest_labels = load_metadataset( os.path.join(path_to_dataset, 'test')) # Create ClassificationTask objects metatrain_tasks_list = [ ClassificationTaskFromFiles(metatrain_filenames, metatrain_labels, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5, input_parse_fn=load_and_process_fn) ] # defaults to num_train / (num_train+num_test) metaval_tasks_list = [ ClassificationTaskFromFiles(metaval_filenames, metaval_labels, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5, input_parse_fn=load_and_process_fn) ] # defaults to num_train / (num_train+num_test) metatest_tasks_list = [ ClassificationTaskFromFiles(metatest_filenames, metatest_labels, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=0.5, input_parse_fn=load_and_process_fn) ] # defaults to num_train / (num_train+num_test) # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks metatrain_task_distribution = TaskDistribution( tasks=metatrain_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metaval_task_distribution = TaskDistribution(tasks=metaval_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True) return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
def create_omniglot_from_files_task_distribution( path_to_dataset, batch_size=32, num_training_samples_per_class=10, num_test_samples_per_class=-1, num_training_classes=20, meta_batch_size=5): """ Returns a TaskDistribution that, on each reset, samples a different set of omniglot characters. Arguments: path_to_dataset : string Path to the Omniglot dataset. The folder must contain the two standard subfolders 'images_background' (training classes) and 'images_evaluation' (test classes). Each of these subfolders should contain a number of subfolders, one for each Omniglot alphabet, and each of the alphabet folder must contain a folder with all the repetitions for the same character. batch:size : int Default size of minibatches generated by the tasks, if minibatches are sampled from them without specifying a batch size. num_training_samples_per_class : int If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class for each class at each reset, and sample minibatches exclusively from them, until the next reset. This is useful for, e.g., k-shot classification. num_test_samples_per_class : int Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset(). num_training_classes : int If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at each reset, and sample minibatches exclusively from them, until the next reset. meta_batch_size : int Default size of meta-batches generated by the tasks, if they are sampled from them without specifying a meta batch size. Returns: metatrain_task_distribution : TaskDistribution TaskDistribution object for use during training metaval_task_distribution : TaskDistribution TaskDistribution object for use during model validation metatest_task_distribution : TaskDistribution TaskDistribution object for use during testing """ # Pre-load all the filenames and their corresponding label (within each alphabet dataset). def load_metadataset(path): allX = [] allY = [] alphabets_folders = os.listdir(path) for alphabet_index, alphabet in enumerate(alphabets_folders): X = [] Y = [] characters_folders = os.listdir(os.path.join(path, alphabet)) for char_id, char in enumerate(characters_folders): samples = os.listdir(os.path.join(path, alphabet, char)) for s in samples: if os.path.splitext(s)[1] == '.png': """ image = cv2.imread(os.path.join(folder, alphabet, char, s), cv2.IMREAD_GRAYSCALE) if resize > 0: image = cv2.resize(image, (resize, resize)) """ X.append(os.path.join(path, alphabet, char, s)) Y.append(char_id) allX.append(X) allY.append(np.asarray(Y)) return allX, allY # metatrain_filenames[dataset_index][sample_index] (sample_index includes all classes and their repetitions) metatrain_filenames, metatrain_labels = load_metadataset( os.path.join(path_to_dataset, 'images_background')) metatest_filenames, metatest_labels = load_metadataset( os.path.join(path_to_dataset, 'images_evaluation')) # TODO: Possibly: merge lists, and re-split in different proportions? (e.g., current Omniglot 36-14 instead of 30-20) # Create a single large dataset with all sub-datasets' classes, each for train and test, and rename the targets # appropriately trX = [] trY = [] teX = [] teY = [] cur_label_start = 0 for alphabet_i in range(len(metatrain_labels)): metatrain_labels[alphabet_i] += cur_label_start trX.extend(metatrain_filenames[alphabet_i]) trY.extend(metatrain_labels[alphabet_i]) cur_label_start += len(set(metatrain_labels[alphabet_i])) cur_label_start = 0 for alphabet_i in range(len(metatest_labels)): metatest_labels[alphabet_i] += cur_label_start teX.extend(metatest_filenames[alphabet_i]) teY.extend(metatest_labels[alphabet_i]) cur_label_start += len(set(metatest_labels[alphabet_i])) trY = np.asarray(trY, dtype=np.int64) teY = np.asarray(teY, dtype=np.int64) # Create ClassificationTask objects metatrain_tasks_list = [ ClassificationTaskFromFiles(trX, trY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1, input_parse_fn=load_and_process_fn) ] # defaults to num_train / (num_train+num_test) metatest_tasks_list = [ ClassificationTaskFromFiles(teX, teY, num_training_samples_per_class, num_test_samples_per_class, num_training_classes, split_train_test=-1, input_parse_fn=load_and_process_fn) ] # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks metatrain_task_distribution = TaskDistribution( tasks=metatrain_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True, use_classes_only_once=True) metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list, task_probabilities=[1.0], batch_size=meta_batch_size, sample_with_replacement=True, use_classes_only_once=True) # TODO: split into validation and test! return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution