def random_classification_datasets(n_samples, features=100, classes=2, informative=0.1, partition_proportions=(0.5, 0.3), rnd=None, one_hot=True, **mk_cls_kwargs): rnd_state = dl.get_rand_state(rnd) X, Y = make_classification(n_samples, features, n_classes=classes, random_state=rnd_state, **mk_cls_kwargs) if one_hot: Y = to_one_hot_enc(Y) print("range of Y", np.min(Y), np.max(Y)) info = merge_dicts({ "informative": informative, "random_seed": rnd }, mk_cls_kwargs) name = dl.em_utils.name_from_dict(info, "w") dt = dl.Dataset(X, Y, name=name, info=info) datasets = dl.Datasets.from_list(redivide_data([dt], partition_proportions)) print( "conditioning of X^T X", np.linalg.cond(datasets.train.data.T @ datasets.train.data), ) return datasets
def generate_datasets(self, rand=None, num_classes=None, num_examples=None, wait_for_n_min=None): rand = dl.get_rand_state(rand) if wait_for_n_min: import time while not self.check_loaded_images(wait_for_n_min): time.sleep(5) if not num_examples: num_examples = self.kwargs["num_examples"] if not num_classes: num_classes = self.kwargs["num_classes"] clss = self._loaded_images if self._loaded_images else self.info[ "classes"] random_classes = rand.choice(list(clss.keys()), size=(num_classes, ), replace=False) rand_class_dict = {rnd: k for k, rnd in enumerate(random_classes)} _dts = [] for ns in dl.as_tuple_or_list(num_examples): classes = balanced_choice_wr(random_classes, ns, rand) all_images = {cls: list(clss[cls]) for cls in classes} data, targets, sample_info = [], [], [] for c in classes: rand.shuffle(all_images[c]) img_name = all_images[c][0] all_images[c].remove(img_name) sample_info.append({"name": img_name, "label": c}) if self._loaded_images: data.append(clss[c][img_name]) else: from imageio import imread data.append( np.array( Image.fromarray( imread( join(self.info["base_folder"], join(c, img_name)))).resize( size=(self.info["resize"], self.info["resize"]))) / 255.0) targets.append(rand_class_dict[c]) if self.info["one_hot_enc"]: targets = to_one_hot_enc(targets, dimension=num_classes) _dts.append( dl.Dataset( data=np.array(np.stack(data)), target=targets, sample_info=sample_info, info={"all_classes": random_classes}, )) return dl.Datasets.from_list(_dts)