Exemplo n.º 1
0
    def update(self, X, y=None, **fit_params):
        if "relations" not in fit_params:
            raise ValueError(
                "Aligned UMAP requires relations between data to be "
                "specified")

        new_dict_relations = fit_params["relations"]
        X = check_array(X)

        self.__dict__ = set_aligned_params(fit_params, self.__dict__,
                                           self.n_models_)
        self.n_models_ += 1

        new_mapper = UMAP(
            n_neighbors=get_nth_item_or_val(self.n_neighbors, self.n_models_),
            min_dist=get_nth_item_or_val(self.min_dist, self.n_models_),
            n_epochs=get_nth_item_or_val(self.n_epochs, self.n_models_),
            repulsion_strength=get_nth_item_or_val(self.repulsion_strength,
                                                   self.n_models_),
            learning_rate=get_nth_item_or_val(self.learning_rate,
                                              self.n_models_),
            spread=get_nth_item_or_val(self.spread, self.n_models_),
            negative_sample_rate=get_nth_item_or_val(self.negative_sample_rate,
                                                     self.n_models_),
            local_connectivity=get_nth_item_or_val(self.local_connectivity,
                                                   self.n_models_),
            set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio,
                                                 self.n_models_),
            unique=get_nth_item_or_val(self.unique, self.n_models_),
        ).fit(X)

        self.mappers_ += [new_mapper]

        # TODO: We can likely make this more efficient and not recompute each time
        self.dict_relations_ += [invert_dict(new_dict_relations)]

        if self.n_epochs is None:
            n_epochs = 200
        else:
            n_epochs = self.n_epochs

        indptr_list = numba.typed.List.empty_list(numba.types.int32[::1])
        indices_list = numba.typed.List.empty_list(numba.types.int32[::1])
        heads = numba.typed.List.empty_list(numba.types.int32[::1])
        tails = numba.typed.List.empty_list(numba.types.int32[::1])
        epochs_per_samples = numba.typed.List.empty_list(
            numba.types.float64[::1])

        for i, mapper in enumerate(self.mappers_):
            indptr_list.append(mapper.graph_.indptr)
            indices_list.append(mapper.graph_.indices)
            heads.append(mapper.graph_.tocoo().row)
            tails.append(mapper.graph_.tocoo().col)
            if i == len(self.mappers_) - 1:
                epochs_per_samples.append(
                    make_epochs_per_sample(mapper.graph_.tocoo().data,
                                           n_epochs))
            else:
                epochs_per_samples.append(
                    np.full(mapper.embedding_.shape[0],
                            n_epochs + 1,
                            dtype=np.float64))

        new_relations = expand_relations(self.dict_relations_)
        new_regularisation_weights = build_neighborhood_similarities(
            indptr_list,
            indices_list,
            new_relations,
        )

        new_embedding = init_from_existing(self.embeddings_[-1],
                                           new_mapper.graph_,
                                           new_dict_relations)

        random_state = check_random_state(self.random_state)
        rng_state = random_state.randint(INT32_MIN, INT32_MAX,
                                         3).astype(np.int64)

        self.embeddings_.append(new_embedding)

        self.embeddings_ = optimize_layout_aligned_euclidean(
            self.embeddings_,
            self.embeddings_,
            heads,
            tails,
            n_epochs,
            epochs_per_samples,
            new_regularisation_weights,
            new_relations,
            rng_state,
            lambda_=self.alignment_regularisation,
        )
Exemplo n.º 2
0
    def fit(self, X, y=None, **fit_params):
        if "relations" not in fit_params:
            raise ValueError(
                "Aligned UMAP requires relations between data to be "
                "specified")

        self.dict_relations_ = fit_params["relations"]
        assert type(self.dict_relations_) in (list, tuple)
        assert type(X) in (list, tuple, np.ndarray)
        assert (len(X) - 1) == (len(self.dict_relations_))

        # We need n_components to be constant or this won't work
        if type(self.n_components) in (list, tuple, np.ndarray):
            raise ValueError(
                "n_components must be a single integer, and cannot vary")

        self.n_models_ = len(X)

        self.mappers_ = [
            UMAP(
                n_neighbors=get_nth_item_or_val(self.n_neighbors, n),
                min_dist=get_nth_item_or_val(self.min_dist, n),
                n_epochs=get_nth_item_or_val(self.n_epochs, n),
                repulsion_strength=get_nth_item_or_val(self.repulsion_strength,
                                                       n),
                learning_rate=get_nth_item_or_val(self.learning_rate, n),
                spread=get_nth_item_or_val(self.spread, n),
                negative_sample_rate=get_nth_item_or_val(
                    self.negative_sample_rate, n),
                local_connectivity=get_nth_item_or_val(self.local_connectivity,
                                                       n),
                set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio, n),
                unique=get_nth_item_or_val(self.unique, n),
                n_components=self.n_components,
            ).fit(X[n]) for n in range(self.n_models_)
        ]

        if self.n_epochs is None:
            n_epochs = 200
        else:
            n_epochs = self.n_epochs

        window_size = fit_params.get("window_size", self.alignment_window_size)
        relations = expand_relations(self.dict_relations_, window_size)

        indptr_list = numba.typed.List.empty_list(numba.types.int32[::1])
        indices_list = numba.typed.List.empty_list(numba.types.int32[::1])
        heads = numba.typed.List.empty_list(numba.types.int32[::1])
        tails = numba.typed.List.empty_list(numba.types.int32[::1])
        epochs_per_samples = numba.typed.List.empty_list(
            numba.types.float64[::1])

        for mapper in self.mappers_:
            indptr_list.append(mapper.graph_.indptr)
            indices_list.append(mapper.graph_.indices)
            heads.append(mapper.graph_.tocoo().row)
            tails.append(mapper.graph_.tocoo().col)
            epochs_per_samples.append(
                make_epochs_per_sample(mapper.graph_.tocoo().data, n_epochs))

        regularisation_weights = build_neighborhood_similarities(
            indptr_list,
            indices_list,
            relations,
        )
        first_init = spectral_layout(
            self.mappers_[0]._raw_data,
            self.mappers_[0].graph_,
            self.n_components,
            np.random,
        )
        expansion = 10.0 / np.abs(first_init).max()
        first_embedding = (first_init * expansion).astype(
            np.float32,
            order="C",
        )

        embeddings = numba.typed.List.empty_list(numba.types.float32[:, ::1])
        embeddings.append(first_embedding)
        for i in range(1, self.n_models_):
            next_init = spectral_layout(
                self.mappers_[i]._raw_data,
                self.mappers_[i].graph_,
                self.n_components,
                np.random,
            )
            expansion = 10.0 / np.abs(next_init).max()
            next_embedding = (next_init * expansion).astype(
                np.float32,
                order="C",
            )
            anchor_data = relations[i][window_size - 1]
            left_anchors = anchor_data[anchor_data >= 0]
            right_anchors = np.where(anchor_data >= 0)[0]
            embeddings.append(
                procrustes_align(
                    embeddings[-1],
                    next_embedding,
                    np.vstack([left_anchors, right_anchors]),
                ))

        random_state = check_random_state(self.random_state)
        rng_state = random_state.randint(INT32_MIN, INT32_MAX,
                                         3).astype(np.int64)

        self.embeddings_ = optimize_layout_aligned_euclidean(
            embeddings,
            embeddings,
            heads,
            tails,
            n_epochs,
            epochs_per_samples,
            regularisation_weights,
            relations,
            rng_state,
            lambda_=self.alignment_regularisation,
        )

        return self