Exemplo n.º 1
0
def random_classification_datasets(n_samples,
                                   features=100,
                                   classes=2,
                                   informative=0.1,
                                   partition_proportions=(0.5, 0.3),
                                   rnd=None,
                                   one_hot=True,
                                   **mk_cls_kwargs):
    rnd_state = dl.get_rand_state(rnd)
    X, Y = make_classification(n_samples,
                               features,
                               n_classes=classes,
                               random_state=rnd_state,
                               **mk_cls_kwargs)
    if one_hot:
        Y = to_one_hot_enc(Y)

    print("range of Y", np.min(Y), np.max(Y))
    info = merge_dicts({
        "informative": informative,
        "random_seed": rnd
    }, mk_cls_kwargs)
    name = dl.em_utils.name_from_dict(info, "w")
    dt = dl.Dataset(X, Y, name=name, info=info)
    datasets = dl.Datasets.from_list(redivide_data([dt],
                                                   partition_proportions))
    print(
        "conditioning of X^T X",
        np.linalg.cond(datasets.train.data.T @ datasets.train.data),
    )
    return datasets
Exemplo n.º 2
0
    def generate_datasets(self,
                          rand=None,
                          num_classes=None,
                          num_examples=None,
                          wait_for_n_min=None):

        rand = dl.get_rand_state(rand)

        if wait_for_n_min:
            import time

            while not self.check_loaded_images(wait_for_n_min):
                time.sleep(5)

        if not num_examples:
            num_examples = self.kwargs["num_examples"]
        if not num_classes:
            num_classes = self.kwargs["num_classes"]

        clss = self._loaded_images if self._loaded_images else self.info[
            "classes"]

        random_classes = rand.choice(list(clss.keys()),
                                     size=(num_classes, ),
                                     replace=False)
        rand_class_dict = {rnd: k for k, rnd in enumerate(random_classes)}

        _dts = []
        for ns in dl.as_tuple_or_list(num_examples):
            classes = balanced_choice_wr(random_classes, ns, rand)

            all_images = {cls: list(clss[cls]) for cls in classes}
            data, targets, sample_info = [], [], []
            for c in classes:
                rand.shuffle(all_images[c])
                img_name = all_images[c][0]
                all_images[c].remove(img_name)
                sample_info.append({"name": img_name, "label": c})

                if self._loaded_images:
                    data.append(clss[c][img_name])
                else:
                    from imageio import imread

                    data.append(
                        np.array(
                            Image.fromarray(
                                imread(
                                    join(self.info["base_folder"],
                                         join(c, img_name)))).resize(
                                             size=(self.info["resize"],
                                                   self.info["resize"]))) /
                        255.0)
                targets.append(rand_class_dict[c])

            if self.info["one_hot_enc"]:
                targets = to_one_hot_enc(targets, dimension=num_classes)

            _dts.append(
                dl.Dataset(
                    data=np.array(np.stack(data)),
                    target=targets,
                    sample_info=sample_info,
                    info={"all_classes": random_classes},
                ))
        return dl.Datasets.from_list(_dts)