示例#1
0
def create_sinusoid_task_distribution(min_amplitude=0.1,
                                      max_amplitude=5.0,
                                      min_phase=0.0,
                                      max_phase=np.pi,
                                      min_x=-5.0,
                                      max_x=5.0,
                                      num_training_samples=10,
                                      num_test_samples=100,
                                      num_test_tasks=100,
                                      meta_batch_size=5):
    tasks_list = [
        SinusoidTask(min_amplitude=min_amplitude,
                     max_amplitude=max_amplitude,
                     min_phase=min_phase,
                     max_phase=max_phase,
                     min_x=min_x,
                     max_x=max_x,
                     num_training_samples=num_training_samples,
                     num_test_samples=num_test_samples)
    ]

    metatrain_task_distribution = TaskDistribution(
        tasks=tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True)

    metaval_task_distribution = TaskDistribution(tasks=tasks_list,
                                                 task_probabilities=[1.0],
                                                 batch_size=meta_batch_size,
                                                 sample_with_replacement=True)

    metatest_task_distribution = TaskDistribution(tasks=tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True)

    return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
示例#2
0
def create_omniglot_allcharacters_task_distribution(path_to_pkl,
                                                    num_training_samples_per_class=10,
                                                    num_test_samples_per_class=-1,
                                                    num_training_classes=20,
                                                    meta_batch_size=5):
    """
    Returns a TaskDistribution that, on each reset, samples a different set of omniglot characters.

    Arguments:
    path_to_pkl: string
        Path to the pkl wrapped Omniglot dataset. This can be generated from the standard dataset using the supplied
        make_omniglot_dataset.py script.
    num_training_samples_per_class : int
        If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class
        for each class at each reset, and sample minibatches exclusively from them, until the next reset.
        This is useful for, e.g., k-shot classification.
    num_test_samples_per_class : int
        Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset().
    num_training_classes : int
        If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at
        each reset, and sample minibatches exclusively from them, until the next reset.
    meta_batch_size : int
        Default size of the meta batch size.

    Returns:
    metatrain_task_distribution : TaskDistribution
        TaskDistribution object for use during training
    metaval_task_distribution : TaskDistribution
        TaskDistribution object for use during model validation
    metatest_task_distribution : TaskDistribution
        TaskDistribution object for use during testing
    """

    with open(path_to_pkl, 'rb') as f:
        d = pickle.load(f)
        trainX_ = d['trainX']
        trainY_ = d['trainY']
        testX_ = d['testX']
        testY_ = d['testY']
    trainX_.extend(testX_)
    trainY_.extend(testY_)

    global charomniglot_trainX
    global charomniglot_trainY
    global charomniglot_testX
    global charomniglot_testY

    cutoff = 36
    charomniglot_trainX = trainX_[:cutoff]
    charomniglot_trainY = trainY_[:cutoff]
    charomniglot_testX = trainX_[cutoff:]
    charomniglot_testY = trainY_[cutoff:]

    # Create a single large dataset with all characters, each for train and test, and rename the targets appropriately
    trX = []
    trY = []
    teX = []
    teY = []

    cur_label_start = 0
    for alphabet_i in range(len(charomniglot_trainY)):
        charomniglot_trainY[alphabet_i] += cur_label_start
        trX.extend(charomniglot_trainX[alphabet_i])
        trY.extend(charomniglot_trainY[alphabet_i])
        cur_label_start += len(set(charomniglot_trainY[alphabet_i]))

    cur_label_start = 0
    for alphabet_i in range(len(charomniglot_testY)):
        charomniglot_testY[alphabet_i] += cur_label_start
        teX.extend(charomniglot_testX[alphabet_i])
        teY.extend(charomniglot_testY[alphabet_i])
        cur_label_start += len(set(charomniglot_testY[alphabet_i]))

    trX = np.asarray(trX, dtype=np.float32) / 255.0
    trY = np.asarray(trY, dtype=np.float32)
    teX = np.asarray(teX, dtype=np.float32) / 255.0
    teY = np.asarray(teY, dtype=np.float32)

    charomniglot_trainX = trX
    charomniglot_testX = teX
    charomniglot_trainY = trY
    charomniglot_testY = teY

    metatrain_tasks_list = [ClassificationTask(charomniglot_trainX,
                                               charomniglot_trainY,
                                               num_training_samples_per_class,
                                               num_test_samples_per_class,
                                               num_training_classes,
                                               split_train_test=-1)] # defaults to num_train / (num_train+num_test)
    metatest_tasks_list = [ClassificationTask(charomniglot_testX,
                                              charomniglot_testY,
                                              num_training_samples_per_class,
                                              num_test_samples_per_class,
                                              num_training_classes,
                                              split_train_test=-1)]

    metatrain_task_distribution = TaskDistribution(tasks=metatrain_tasks_list,
                                                   task_probabilities=[1.0],
                                                   batch_size=meta_batch_size,
                                                   sample_with_replacement=True)

    metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True)

    # TODO: split into validation and test!
    return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
示例#3
0
def create_cifar100_task_distribution(num_training_samples_per_class=-1,
                                      num_test_samples_per_class=-1,
                                      num_training_classes=10,
                                      meta_train_test_split=0.7,
                                      meta_batch_size=5):
    """
    Returns a TaskDistribution that, on each reset, samples a different set of CIFAR-100 classes.

    Note that the first time this function is called on a new system, it will download the CIFAR-100 dataset, which
    may take some time (usually less than 5 minutes).

    Arguments:
    num_training_samples_per_class : int
        If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class
        for each class at each reset, and sample minibatches exclusively from them, until the next reset.
        This is useful for, e.g., k-shot classification.
    num_test_samples_per_class : int
        Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset().
    num_training_classes : int
        If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at
        each reset, and sample minibatches exclusively from them, until the next reset.
    meta_train_test_split : float
        Proportion of classes to use for the meta-training set. E.g., split=0.7 means int(0.7*100)=70 classes will
        be used for meta-training, while 100-70=30 classes will be used for meta-testing.
    meta_batch_size : int
        Default size of the meta batch size.

    Returns:
    metatrain_task_distribution : TaskDistribution
        TaskDistribution object for use during training
    metaval_task_distribution : TaskDistribution
        TaskDistribution object for use during model validation
    metatest_task_distribution : TaskDistribution
        TaskDistribution object for use during testing
    """

    global cifar100_trainX
    global cifar100_trainY
    global cifar100_testX
    global cifar100_testY

    ((train_data, train_labels),
     (eval_data,
      eval_labels)) = tf.keras.datasets.cifar100.load_data(label_mode='fine')

    all_x = np.concatenate((train_data, eval_data), axis=0)
    all_y = np.concatenate((train_labels, eval_labels), axis=0)

    split_class = int(meta_train_test_split * 100)

    meta_train_classes = list(range(split_class))
    meta_test_classes = list(range(split_class, 100))

    meta_train_indices = []
    for c in meta_train_classes:
        c_indices = np.where(all_y == c)[0]
        meta_train_indices.extend(c_indices)

    meta_test_indices = []
    for c in meta_test_classes:
        c_indices = np.where(all_y == c)[0]
        meta_test_indices.extend(c_indices)

    # TODO: subtract mean of train images (over axis=0) from both trainX and testX
    """
    from copy import copy
    import cv2
    old_x = copy(all_x)
    all_x = np.ones([old_x.shape[0], 224, 224, 3], dtype=np.float32)
    for i in range(old_x.shape[0]):
        all_x[i,:,:,:] = cv2.resize(old_x[i,:,:,:], (224, 224))
    """

    cifar100_trainX = all_x[meta_train_indices, :].astype(np.float32) / 255.0
    cifar100_trainY = np.squeeze(all_y[meta_train_indices]).astype(np.int64)
    cifar100_testX = all_x[meta_test_indices, :].astype(np.float32) / 255.0
    cifar100_testY = np.squeeze(all_y[meta_test_indices]).astype(np.int64)

    metatrain_tasks_list = [
        ClassificationTask(cifar100_trainX,
                           cifar100_trainY,
                           num_training_samples_per_class,
                           num_test_samples_per_class,
                           num_training_classes,
                           split_train_test=-1)
    ]  # defaults to num_train / (num_train+num_test)
    metatest_tasks_list = [
        ClassificationTask(cifar100_testX,
                           cifar100_testY,
                           num_training_samples_per_class,
                           num_test_samples_per_class,
                           num_training_classes,
                           split_train_test=-1)
    ]

    metatrain_task_distribution = TaskDistribution(
        tasks=metatrain_tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True)

    metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True)

    # TODO: split into validation and test!
    return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
def create_core50_from_npz_task_distribution(path_to_dataset,
                                             batch_size=32,
                                             num_training_samples_per_class=10,
                                             num_test_samples_per_class=-1,
                                             num_training_classes=20,
                                             meta_batch_size=5):

    imgs, paths = load_npz_file(path_to_dataset)

    global core50_images
    core50_images = imgs

    def get_session_objects(session_num, path_file):
        session_indexes = []
        session_labels = []
        for index, path in enumerate(path_file):
            splitted_path = path.split('/')
            if splitted_path[0] == 's' + str(session_num):
                for i in range(1, 51):
                    if splitted_path[1] == 'o' + str(i):
                        session_indexes.append(index)
                        session_labels.append(i)
        return session_indexes, session_labels

    def dataset_from_npz(session_nums, path_file):
        # Object index numbers in npz file.
        X_indexes = []

        # Object labels.
        y = []

        # Background (session) labels.
        b = []

        for session_num in session_nums:
            session_indexes, session_labels = get_session_objects(
                session_num, path_file)
            X_indexes.extend(session_indexes)
            y.extend(session_labels)
            for i in range(len(session_indexes)):
                b.append(session_num)

        X_indexes = np.asarray(X_indexes, dtype=np.int32)
        y = np.asarray(y, dtype=np.int32)
        b = np.asarray(b, dtype=np.int32)

        return X_indexes, y, b

    # Pre-define backround sessions to use.
    all_sessions = []
    for i in range(1, 12):
        all_sessions.append(i)

    X_indexes, y, b = dataset_from_npz(session_nums=all_sessions,
                                       path_file=paths)

    # Split indexes: first 40 objects train set & last 10 objects for test set.
    train_indexes = np.where(y <= 40)[0]
    test_indexes = np.where(y > 40)[0]

    # Split the dataset.
    trainX = X_indexes[train_indexes]
    trainY = y[train_indexes]
    trainB = b[train_indexes]

    testX = X_indexes[test_indexes]
    testY = y[test_indexes]
    testB = b[test_indexes]

    # Create ClassificationTask objects
    metatrain_tasks_list = [
        ClassificationTaskCORe50(
            trainX,
            trainY,
            num_training_samples_per_class,
            num_test_samples_per_class,
            num_training_classes,
            split_train_test=-1,
            input_parse_fn=
            process_npz_img,  # defaults to num_train / (num_train+num_test)
            background_labels=trainB)
    ]
    metatest_tasks_list = [
        ClassificationTaskCORe50(testX,
                                 testY,
                                 num_training_samples_per_class,
                                 num_test_samples_per_class,
                                 num_training_classes,
                                 split_train_test=-1,
                                 input_parse_fn=process_npz_img,
                                 background_labels=testB)
    ]

    # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks
    metatrain_task_distribution = TaskDistribution(
        tasks=metatrain_tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True)

    metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True)

    # TODO: split into validation and test!
    return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution
示例#5
0
def create_miniimagenet_task_distribution(path_to_pkl,
                                          num_training_samples_per_class=10,
                                          num_test_samples_per_class=15,
                                          num_training_classes=20,
                                          meta_batch_size=5):
    """
    Returns a TaskDistribution that, on each reset, samples a different set of Mini-ImageNet classes.

    *** Data is loaded from a special pickle file. ***

    Arguments:
    path_to_pkl: string
        Path to the pkl wrapped Mini-ImageNet dataset. This can be generated from the standard dataset using the
        supplied make_miniimagenet_dataset.py script.
    num_training_samples_per_class : int
        If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class
        for each class at each reset, and sample minibatches exclusively from them, until the next reset.
        This is useful for, e.g., k-shot classification.
    num_test_samples_per_class : int
        Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset().
    num_training_classes : int
        If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at
        each reset, and sample minibatches exclusively from them, until the next reset.
    meta_batch_size : int
        Default size of the meta batch size.

    Returns:
    metatrain_task_distribution : TaskDistribution
        TaskDistribution object for use during training
    metaval_task_distribution : TaskDistribution
        TaskDistribution object for use during model validation
    metatest_task_distribution : TaskDistribution
        TaskDistribution object for use during testing
    """

    global miniimagenet_trainX
    global miniimagenet_trainY

    global miniimagenet_valX
    global miniimagenet_valY

    global miniimagenet_testX
    global miniimagenet_testY

    with open(path_to_pkl, 'rb') as f:
        d = pickle.load(f)
        miniimagenet_trainX, miniimagenet_trainY = d['train']
        miniimagenet_valX, miniimagenet_valY = d['val']
        miniimagenet_testX, miniimagenet_testY = d['test']

    print(set(miniimagenet_trainY))
    print(set(miniimagenet_valY))
    print(set(miniimagenet_testY))
    """
    num_train = 100
    num_val = 50
    num_test = 50

    classes = list(set(cub200_Y))
    train_classes = classes[:num_train]
    val_classes = classes[num_train:(num_train+num_val)]
    test_classes = classes[(num_train+num_val):]

    train_indices = []
    val_indices = []
    test_indices = []

    for i in range(len(cub200_Y)):
        if cub200_Y[i] in train_classes:
            train_indices.append(i)
        elif cub200_Y[i] in val_classes:
            val_indices.append(i)
        elif cub200_Y[i] in test_classes:
            test_indices.append(i)

    cub200_trainX = cub200_X[train_indices]
    cub200_trainY = cub200_Y[train_indices]

    cub200_valX = cub200_X[val_indices]
    cub200_valY = cub200_Y[val_indices]

    cub200_testX = cub200_X[test_indices]
    cub200_testY = cub200_Y[test_indices]
    """

    miniimagenet_trainX = miniimagenet_trainX.astype(np.float32) / 255.0
    miniimagenet_valX = miniimagenet_valX.astype(np.float32) / 255.0
    miniimagenet_testX = miniimagenet_testX.astype(np.float32) / 255.0

    del d

    train_tasks_list = [
        ClassificationTask(miniimagenet_trainX,
                           miniimagenet_trainY,
                           num_training_samples_per_class,
                           num_test_samples_per_class,
                           num_training_classes,
                           split_train_test=0.5)
    ]

    # TODO: NOTE: HACK -- validation and test tasks use a fixed number of test-set samples, instead of the supplied
    # ones. This is because in MAML/FOMAML the test set is used to compute the meta-gradient, and a small number of
    # samples is used (in the philosophy of few-shot learning, where only few samples are available).
    # However, in this case we wish to use a few more test-samples to better estimate the accuracy of the model on the validation
    # and test tasks!
    num_test_samples_per_class = 50
    validation_tasks_list = [
        ClassificationTask(miniimagenet_valX,
                           miniimagenet_valY,
                           num_training_samples_per_class,
                           num_test_samples_per_class,
                           num_training_classes,
                           split_train_test=0.5)
    ]

    test_tasks_list = [
        ClassificationTask(miniimagenet_valX,
                           miniimagenet_valY,
                           num_training_samples_per_class,
                           num_test_samples_per_class,
                           num_training_classes,
                           split_train_test=0.5)
    ]

    metatrain_task_distribution = TaskDistribution(
        tasks=train_tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True,
        use_classes_only_once=True)

    metaval_task_distribution = TaskDistribution(tasks=validation_tasks_list,
                                                 task_probabilities=[1.0],
                                                 batch_size=meta_batch_size,
                                                 sample_with_replacement=True,
                                                 use_classes_only_once=True)

    metatest_task_distribution = TaskDistribution(tasks=test_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True,
                                                  use_classes_only_once=True)

    return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
示例#6
0
def create_miniimagenet_from_files_task_distribution(
        path_to_dataset,
        num_training_samples_per_class=10,
        num_test_samples_per_class=15,
        num_training_classes=20,
        meta_batch_size=5):
    """
    Returns a TaskDistribution that, on each reset, samples a different set of Mini-ImageNet classes.

    *** Data is loaded from individual images. ***

    Arguments:
    path_to_dataset: string
        Path to the dataset folder (this must have 3 subfolders, 'test', 'train', and 'val').
    num_training_samples_per_class : int
        If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class
        for each class at each reset, and sample minibatches exclusively from them, until the next reset.
        This is useful for, e.g., k-shot classification.
    num_test_samples_per_class : int
        Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset().
    num_training_classes : int
        If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at
        each reset, and sample minibatches exclusively from them, until the next reset.
    meta_batch_size : int
        Default size of the meta batch size.

    Returns:
    metatrain_task_distribution : TaskDistribution
        TaskDistribution object for use during training
    metaval_task_distribution : TaskDistribution
        TaskDistribution object for use during model validation
    metatest_task_distribution : TaskDistribution
        TaskDistribution object for use during testing
    """
    def load_metadataset(path):
        allX = []
        allY = []

        classes = os.listdir(path)
        for index, c in enumerate(classes):
            X = []
            Y = []

            instances = os.listdir(os.path.join(path, c))
            for s in instances:
                sample = os.path.join(path, c, s)
                if os.path.splitext(s)[1].lower() == '.png':
                    """
                    image = cv2.imread(os.path.join(folder, alphabet, char, s), cv2.IMREAD_GRAYSCALE)
                    if resize > 0:
                        image = cv2.resize(image, (resize, resize))
                    """
                    X.append(sample)
                    Y.append(index)

            allX.extend(X)
            allY.extend(Y)

        return allX, np.asarray(allY, dtype=np.int64)

    metatrain_filenames, metatrain_labels = load_metadataset(
        os.path.join(path_to_dataset, 'train'))
    metaval_filenames, metaval_labels = load_metadataset(
        os.path.join(path_to_dataset, 'val'))
    metatest_filenames, metatest_labels = load_metadataset(
        os.path.join(path_to_dataset, 'test'))

    # Create ClassificationTask objects
    metatrain_tasks_list = [
        ClassificationTaskFromFiles(metatrain_filenames,
                                    metatrain_labels,
                                    num_training_samples_per_class,
                                    num_test_samples_per_class,
                                    num_training_classes,
                                    split_train_test=0.5,
                                    input_parse_fn=load_and_process_fn)
    ]  # defaults to num_train / (num_train+num_test)
    metaval_tasks_list = [
        ClassificationTaskFromFiles(metaval_filenames,
                                    metaval_labels,
                                    num_training_samples_per_class,
                                    num_test_samples_per_class,
                                    num_training_classes,
                                    split_train_test=0.5,
                                    input_parse_fn=load_and_process_fn)
    ]  # defaults to num_train / (num_train+num_test)
    metatest_tasks_list = [
        ClassificationTaskFromFiles(metatest_filenames,
                                    metatest_labels,
                                    num_training_samples_per_class,
                                    num_test_samples_per_class,
                                    num_training_classes,
                                    split_train_test=0.5,
                                    input_parse_fn=load_and_process_fn)
    ]  # defaults to num_train / (num_train+num_test)

    # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks
    metatrain_task_distribution = TaskDistribution(
        tasks=metatrain_tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True)
    metaval_task_distribution = TaskDistribution(tasks=metaval_tasks_list,
                                                 task_probabilities=[1.0],
                                                 batch_size=meta_batch_size,
                                                 sample_with_replacement=True)
    metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True)

    return metatrain_task_distribution, metaval_task_distribution, metatest_task_distribution
def create_omniglot_from_files_task_distribution(
        path_to_dataset,
        batch_size=32,
        num_training_samples_per_class=10,
        num_test_samples_per_class=-1,
        num_training_classes=20,
        meta_batch_size=5):
    """
    Returns a TaskDistribution that, on each reset, samples a different set of omniglot characters.

    Arguments:
    path_to_dataset : string
        Path to the Omniglot dataset. The folder must contain the two standard subfolders 'images_background'
        (training classes) and 'images_evaluation' (test classes). Each of these subfolders should contain a number
        of subfolders, one for each Omniglot alphabet, and each of the alphabet folder must contain a folder with all
        the repetitions for the same character.
    batch:size : int
        Default size of minibatches generated by the tasks, if minibatches are sampled from them without specifying
        a batch size.
    num_training_samples_per_class : int
        If -1, sample from the whole dataset. If >=1, the dataset will re-sample num_training_samples_per_class
        for each class at each reset, and sample minibatches exclusively from them, until the next reset.
        This is useful for, e.g., k-shot classification.
    num_test_samples_per_class : int
        Same as `num_training_samples_per_class'. Used to generate test sets for tasks on reset().
    num_training_classes : int
        If -1, use all the classes in `y'. If >=1, the dataset will re-sample `num_training_classes' at
        each reset, and sample minibatches exclusively from them, until the next reset.
    meta_batch_size : int
        Default size of meta-batches generated by the tasks, if they are sampled from them without specifying
        a meta batch size.

    Returns:
    metatrain_task_distribution : TaskDistribution
        TaskDistribution object for use during training
    metaval_task_distribution : TaskDistribution
        TaskDistribution object for use during model validation
    metatest_task_distribution : TaskDistribution
        TaskDistribution object for use during testing
    """

    # Pre-load all the filenames and their corresponding label (within each alphabet dataset).
    def load_metadataset(path):
        allX = []
        allY = []

        alphabets_folders = os.listdir(path)
        for alphabet_index, alphabet in enumerate(alphabets_folders):
            X = []
            Y = []

            characters_folders = os.listdir(os.path.join(path, alphabet))
            for char_id, char in enumerate(characters_folders):
                samples = os.listdir(os.path.join(path, alphabet, char))
                for s in samples:
                    if os.path.splitext(s)[1] == '.png':
                        """
                        image = cv2.imread(os.path.join(folder, alphabet, char, s), cv2.IMREAD_GRAYSCALE)
                        if resize > 0:
                            image = cv2.resize(image, (resize, resize))
                        """
                        X.append(os.path.join(path, alphabet, char, s))
                        Y.append(char_id)

            allX.append(X)
            allY.append(np.asarray(Y))

        return allX, allY

    # metatrain_filenames[dataset_index][sample_index] (sample_index includes all classes and their repetitions)
    metatrain_filenames, metatrain_labels = load_metadataset(
        os.path.join(path_to_dataset, 'images_background'))
    metatest_filenames, metatest_labels = load_metadataset(
        os.path.join(path_to_dataset, 'images_evaluation'))

    # TODO: Possibly: merge lists, and re-split in different proportions? (e.g., current Omniglot 36-14 instead of 30-20)

    # Create a single large dataset with all sub-datasets' classes, each for train and test, and rename the targets
    # appropriately
    trX = []
    trY = []
    teX = []
    teY = []

    cur_label_start = 0
    for alphabet_i in range(len(metatrain_labels)):
        metatrain_labels[alphabet_i] += cur_label_start
        trX.extend(metatrain_filenames[alphabet_i])
        trY.extend(metatrain_labels[alphabet_i])
        cur_label_start += len(set(metatrain_labels[alphabet_i]))

    cur_label_start = 0
    for alphabet_i in range(len(metatest_labels)):
        metatest_labels[alphabet_i] += cur_label_start
        teX.extend(metatest_filenames[alphabet_i])
        teY.extend(metatest_labels[alphabet_i])
        cur_label_start += len(set(metatest_labels[alphabet_i]))

    trY = np.asarray(trY, dtype=np.int64)
    teY = np.asarray(teY, dtype=np.int64)

    # Create ClassificationTask objects
    metatrain_tasks_list = [
        ClassificationTaskFromFiles(trX,
                                    trY,
                                    num_training_samples_per_class,
                                    num_test_samples_per_class,
                                    num_training_classes,
                                    split_train_test=-1,
                                    input_parse_fn=load_and_process_fn)
    ]  # defaults to num_train / (num_train+num_test)
    metatest_tasks_list = [
        ClassificationTaskFromFiles(teX,
                                    teY,
                                    num_training_samples_per_class,
                                    num_test_samples_per_class,
                                    num_training_classes,
                                    split_train_test=-1,
                                    input_parse_fn=load_and_process_fn)
    ]

    # Create TaskDistribution objects that wrap the ClassificationTask objects to produce meta-batches of tasks
    metatrain_task_distribution = TaskDistribution(
        tasks=metatrain_tasks_list,
        task_probabilities=[1.0],
        batch_size=meta_batch_size,
        sample_with_replacement=True,
        use_classes_only_once=True)

    metatest_task_distribution = TaskDistribution(tasks=metatest_tasks_list,
                                                  task_probabilities=[1.0],
                                                  batch_size=meta_batch_size,
                                                  sample_with_replacement=True,
                                                  use_classes_only_once=True)

    # TODO: split into validation and test!
    return metatrain_task_distribution, metatest_task_distribution, metatest_task_distribution