def random_regression_datasets(n_samples, features=100, outs=1, informative=.1, partition_proportions=(.5, .3), rnd=None, **mk_rgr_kwargs): rnd_state = em.get_rand_state(rnd) X, Y, w = make_regression(n_samples, features, int(features * informative), outs, random_state=rnd_state, coef=True, **mk_rgr_kwargs) if outs == 1: Y = np.reshape(Y, (n_samples, 1)) print('range of Y', np.min(Y), np.max(Y)) info = utils.merge_dicts( { 'informative': informative, 'random_seed': rnd, 'w': w }, mk_rgr_kwargs) name = em.utils.name_from_dict(info, 'w') dt = em.Dataset(X, Y, name=name, info=info) datasets = em.Datasets.from_list(redivide_data([dt], partition_proportions)) print('conditioning of X^T X', np.linalg.cond(datasets.train.data.T @ datasets.train.data)) return datasets
def all_data(self, partition_proportions=None, seed=None): if not self._loaded_images: self.load_all_images() while not self.check_loaded_images(600): time.sleep(5) data, targets = [], [] for k, c in enumerate(sorted(self._loaded_images)): data += list(self._loaded_images[c].values()) targets += [k] * 600 if self.info['one_hot_enc']: targets = em.to_one_hot_enc(targets, dimension=len(self._loaded_images)) _dts = [ em.Dataset(data=np.stack(data), target=np.array(targets), name='MiniImagenet_full') ] if seed: np.random.seed(seed) if partition_proportions: _dts = redivide_data( _dts, partition_proportions=partition_proportions, shuffle=True) return em.Datasets.from_list(_dts)
def random_classification_datasets(n_samples, features=100, classes=2, informative=.1, partition_proportions=(.5, .3), rnd=None, one_hot=True, **mk_cls_kwargs): rnd_state = em.get_rand_state(rnd) X, Y = make_classification(n_samples, features, n_classes=classes, random_state=rnd_state, **mk_cls_kwargs) if one_hot: Y = utils.to_one_hot_enc(Y) print('range of Y', np.min(Y), np.max(Y)) info = utils.merge_dicts({ 'informative': informative, 'random_seed': rnd }, mk_cls_kwargs) name = em.utils.name_from_dict(info, 'w') dt = em.Dataset(X, Y, name=name, info=info) datasets = em.Datasets.from_list(redivide_data([dt], partition_proportions)) print('conditioning of X^T X', np.linalg.cond(datasets.train.data.T @ datasets.train.data)) return datasets
def opt(data_root_folder=None, one_hot=True, partitions=None, shuffle=False, seed=None): """ data_folder_name = 'mnist' if data_root_folder is None: data_root_folder = os.path.join(os.getcwd(), 'DATA') if not os.path.exists(data_root_folder): os.mkdir(data_root_folder) data_folder = os.path.join(data_root_folder, data_folder_name) """ #datasets = se.read_data_semeion() datasets = Digit.read_opt() train = em.Dataset(datasets.train.images, datasets.train.labels, name="opt") validation = em.Dataset(datasets.validation.images, datasets.validation.labels, name="opt") test = em.Dataset(datasets.test.images, datasets.test.labels, name="opt") res = [train, validation, test] if partitions: res = redivide_data(res, partition_proportions=partitions, shuffle=shuffle, seed=seed) return em.Datasets.from_list(res)
def mnist(folder=None, one_hot=True, partitions=None, filters=None, maps=None, shuffle=False): if not folder: folder = MNIST_DIR datasets = read_data_sets(folder, one_hot=one_hot) train = em.Dataset(datasets.train.images, datasets.train.labels, name='MNIST') validation = em.Dataset(datasets.validation.images, datasets.validation.labels, name='MNIST') test = em.Dataset(datasets.test.images, datasets.test.labels, name='MNIST') res = [train, validation, test] if partitions: res = redivide_data(res, partition_proportions=partitions, filters=filters, maps=maps, shuffle=shuffle) res += [None] * (3 - len(res)) return em.Datasets.from_list(res)