示例#1
0
def bonus():
    """
    Plots first eigenfunctions versus other via datafold package.
    """
    nr_samples = 5000
    # reduce number of points for plotting
    nr_samples_plot = 1000
    idx_plot = np.random.permutation(nr_samples)[0:nr_samples_plot]

    # generate point cloud
    X, X_color =make_swiss_roll(nr_samples, noise=0.0, random_state=None)

    X_pcm = pfold.PCManifold(X)
    X_pcm.optimize_parameters(result_scaling=0.5)
    print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}')

    dmap = dfold.DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon), n_eigenpairs=9,
                               dist_kwargs=dict(cut_off=X_pcm.cut_off))
    dmap = dmap.fit(X_pcm)
    evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_
    print(evecs.shape)
    print(evals.shape)

    plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_[idx_plot, :], n=1,
                              fig_params=dict(figsize=[15, 15]),
                              scatter_params=dict(cmap=plt.cm.Spectral, c=X_color[idx_plot]))
    plt.show()
def bonus_task(n=1000):
    """
    Load the swissroll dataset and perform datafold analysis
    """
    X = swissroll_dataset(n)
    X_color = swissroll_color(X)

    idx_plot = np.random.permutation(n)[0:n]
    # Optimize kernel parameters
    X_pcm = pfold.PCManifold(X)
    X_pcm.optimize_parameters()
    print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

    dmap = dfold.DiffusionMaps(
        kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
        n_eigenpairs=9,
        dist_kwargs=dict(cut_off=X_pcm.cut_off),
    )

    dmap = dmap.fit(X_pcm)
    evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_

    plot_pairwise_eigenvector(
        eigenvectors=dmap.eigenvectors_[idx_plot, :],
        n=1,
        fig_params=dict(figsize=[15, 15]),
        scatter_params=dict(cmap=plt.cm.Spectral, c=X_color[idx_plot]),
    )
    plt.show()
示例#3
0
    def forward(self, x):
        x = x.view(16, -1)
        X_pcm = pfold.PCManifold(x)
        X_pcm.optimize_parameters(result_scaling=2)
        dmap = DiffusionMaps(
            kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
            n_eigenpairs=6,
            dist_kwargs=dict(cut_off=X_pcm.cut_off))

        dmap = dmap.fit(X_pcm)
        dmap = dmap.set_coords([1, 2])
        X_pcm = dmap.transform(X_pcm)
        x = x.view(-1, 3 * 32 * 32)
        # print(x.shape)
        x = self.fc(x)
        return x
    def forward(self, x):
        #print(x.shape)
        x = x.view(500, -1)
        X_pcm = pfold.PCManifold(x)
        X_pcm.optimize_parameters(result_scaling=2)
        dmap = DiffusionMaps(
            kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
            n_eigenpairs=10,
            dist_kwargs=dict(cut_off=X_pcm.cut_off))

        dmap = dmap.fit(X_pcm)
        dmap = dmap.set_coords([1, 2, 3])
        X_pcm = dmap.transform(X_pcm)
        X_pcm = torch.from_numpy(X_pcm).float()
        #print(X_pcm.shape)

        x = X_pcm.view(-1, 3)
        x = self.fc(x)
        return x
def datafold_swiss_roll(n_samples):

    X, color = make_swiss_roll(n_samples)

    X_pcm = pfold.PCManifold(X)
    X_pcm.optimize_parameters(result_scaling=0.5)

    print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}')

    dmap = dfold.DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon), n_eigenpairs=9,
                               dist_kwargs=dict(cut_off=X_pcm.cut_off))
    dmap = dmap.fit(X_pcm)
    evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_

    plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_, n=1,
                              fig_params=dict(figsize=[15, 15]),
                              scatter_params=dict(cmap=plt.cm.Spectral, c=color))

    plt.savefig(f'T3_datafold_lib_{n_samples}.png')
    plt.show()
def Lift(method, X_trainingSet, X_testSet, eig_trainingSet, eig_Simulation,
         **kwargs):
    """
     Function to perform lifting

     :param method: available methods are
         'GH' : Geometric Harmonics
         'LP' : Laplacial Pyramids
         'KR' : Kriging (GPRs)
         'SI' : Simple knn interpolation
         'RBF' : Radial Basis Functions interpolation
     :param X_trainingSet: input high-dimensional space data (X), training set
     :param X_testSet: high-dimensional space data (X), test set
     :param eig_trainingSet: low-dimensional (embedded) space parsimonious eigenvectors (Y), training set
     :param eig_Simulation: low-dimensional (embedded) space parsimonious eigenvectors (Y), predicted (by a specific forecasting methodogy, e.g. VAR(3)) set
     :param knn: input neighbors used for the lifting
     :return: [extrapolatedPsi_to_X, mse, rmse, residual]
         extrapolatedPsi_to_X : lifted data
         mse : mean-squared error between the [extrapolatedPsi_to_X and X_testSet]
         rmse : corresponding roor-mean-squared-error
         residual : residuals of the lifting process
     """

    if "lift_optParams_knn" in kwargs:
        lift_optParams_knn = kwargs["lift_optParams_knn"]
    else:
        lift_optParams_knn = 50  #previously default was 25 (the one in datafold module as well)

    if "GH_epsilon" in kwargs:
        GH_epsilon = kwargs["GH_epsilon"]
    else:
        GH_epsilon = "opt"

    if "GH_cut_off" in kwargs:
        GH_cut_off = kwargs["GH_cut_off"]
    else:
        GH_cut_off = "opt"

    if method == 'GH':
        pcm = pfold.PCManifold(eig_trainingSet)
        pcm.optimize_parameters(random_state=0, k=lift_optParams_knn)
        if GH_epsilon == "opt":
            GH_epsilon = pcm.kernel.epsilon
        if GH_cut_off == "opt":
            GH_cut_off = pcm.cut_off
        #opt_n_eigenpairs = eig_trainingSet.shape[0]-1
        opt_n_eigenpairs = eig_trainingSet.shape[1]  # Official (Paper)
        gh_interpolant_psi_to_X = GHI(pfold.GaussianKernel(epsilon=GH_epsilon),
                                      n_eigenpairs=opt_n_eigenpairs,
                                      dist_kwargs=dict(cut_off=GH_cut_off))
        print("fit ... ")
        gh_interpolant_psi_to_X.fit(eig_trainingSet, X_trainingSet)
        residual = gh_interpolant_psi_to_X.score(eig_trainingSet,
                                                 X_trainingSet)
        print("predict ... ")
        extrapolatedPsi_to_X = gh_interpolant_psi_to_X.predict(eig_Simulation)
        print("extrapolatedPsi_to_X.shape = ", extrapolatedPsi_to_X.shape)
        # print("opt_epsilon = ", opt_epsilon)
        # print("opt_cutoff = ", opt_cutoff)

        "Optimize Parameters using BayesianCV"
        """
        n_iters = 5
        np.random.seed(random_state)

        train_indices, test_indices = train_test_split(
            np.random.permutation(X_trainingSet.shape[0]), train_size=2 / 3, test_size=1 / 3
        )

        class GHIGauss(GHI):
            def __init__(self, epsilon=1, n_eigenpairs=2, cut_off=np.inf):
                self.epsilon = epsilon
                self.n_eigenpairs = n_eigenpairs
                self.cut_off = cut_off

                super(GHIGauss, self).__init__(
                    kernel=pfold.GaussianKernel(self.epsilon),
                    n_eigenpairs=self.n_eigenpairs,
                    is_stochastic=False,
                    dist_kwargs=dict(cut_off=self.cut_off),
                )

        opt = BayesSearchCV(
            GHIGauss(),
            {
                "epsilon": Real(
                    pcm.kernel.epsilon / 2, pcm.kernel.epsilon * 2, prior="log-uniform"
                ),
                "cut_off": Real(pcm.cut_off / 2, pcm.cut_off * 2, prior="uniform"),
                "n_eigenpairs": Integer(10, 1000, prior="uniform"),
            },
            n_iter=n_iters,
            random_state=0,
            scoring=lambda estimator, x, y: estimator.score(
                x, y, multioutput="uniform_average"
            ),  # is to be maximized
            cv=[[train_indices, test_indices]],
            refit=False,  # we cannot refit to the entire dataset because this would alter the optimal kernel scale
        )

        # run the Bayesian optimization
        opt.fit(eig_trainingSet, X_trainingSet)

        # get best model and results from parameter search

        # refit best parameter set on training set (not entire dataset - the parameters are optimized for the training set!)
        optimal_GHI = GHIGauss(**opt.best_params_).fit(
            eig_trainingSet[train_indices, :], X_trainingSet[train_indices, :]
        )

        print(
            f"Previous epsilon: {pcm.kernel.epsilon}, cut-off: {pcm.cut_off}, #eigenpairs: {opt_n_eigenpairs}"
        )
        print(
            f"Optimal epsilon: {optimal_GHI.epsilon}, cut-off: {optimal_GHI.cut_off}, #eigenpairs: {optimal_GHI.n_eigenpairs}"
        )
        extrapolatedPsi_to_X = optimal_GHI.predict(eig_Simulation)
        """
    elif method == 'LP':
        lpyr_interpolant_psi_to_X = LPI(auto_adaptive=True)
        lpyr_interpolant_psi_to_X.fit(eig_trainingSet, X_trainingSet)
        residual = lpyr_interpolant_psi_to_X.score(eig_trainingSet,
                                                   X_trainingSet)
        extrapolatedPsi_to_X = lpyr_interpolant_psi_to_X.predict(
            eig_Simulation)
    elif method == 'KR':
        mainKernel_Kriging_GP = 1 * ConstantKernel() + 1 * ExpSineSquared(
        ) + 1 * Matern() + 1 * WhiteKernel()  # Official (29/8/2021)
        gpr_model = GaussianProcessRegressor(kernel=mainKernel_Kriging_GP,
                                             normalize_y=True)
        gpr_model_fit = gpr_model.fit(eig_trainingSet, X_trainingSet)
        residual = gpr_model_fit.score(eig_trainingSet, X_trainingSet)
        extrapolatedPsi_to_X = gpr_model_fit.predict(eig_Simulation)
    elif method == 'SI':  # Simple Linear ND Interpolator
        knn_interpolator = NearestNDInterpolator(eig_trainingSet,
                                                 X_trainingSet)
        extrapolatedPsi_to_X = knn_interpolator(eig_Simulation)
        residual = extrapolatedPsi_to_X - X_testSet
    elif method == "RBF":
        print("lift_optParams_knn = ", lift_optParams_knn)
        extrapolatedPsi_to_X = RBFInterpolator(eig_trainingSet,
                                               X_trainingSet,
                                               kernel="linear",
                                               degree=1,
                                               neighbors=lift_optParams_knn,
                                               epsilon=1)(eig_Simulation)
        residual = extrapolatedPsi_to_X - X_testSet

    return [extrapolatedPsi_to_X, residual]
def Embed(method, X_train_local, target_intrinsic_dim, **kwargs):
    """
    Function to embed input data using a specific embedding method

    :param method: DM, LLE or PCA
    :param X_train_local: X to embed
    :param target_intrinsic_dim: how many parsimonious coordinates
    :param kwargs: LLE_neighbors, dm_epsilon : either a specific value, or if zero it is internally optimized
    :return [target_mapping, parsimoniousEigs, X_pcm.kernel.epsilon, eigValsOut]:
    target_mapping --> the mapped data
    parsimoniousEigs (str) --> the indexes of the parsimonious coordinates
    X_pcm.kernel.epsilon --> optimized epsilon value
    eigValsOut --> corresponding eigenvalues
    """
    if "LLE_neighbors" in kwargs:
        LLE_neighbors = kwargs["LLE_neighbors"]
    else:
        LLE_neighbors = 50

    if "dm_optParams_knn" in kwargs:
        dm_optParams_knn = kwargs["dm_optParams_knn"]
    else:
        dm_optParams_knn = 50  #previously default was 25 (the one in datafold module as well)

    if "dm_epsilon" in kwargs:
        dm_epsilon = kwargs["dm_epsilon"]
    else:
        dm_epsilon = "opt"

    if "cut_off" in kwargs:
        cut_off = kwargs["cut_off"]
    else:
        cut_off = "opt"

    if "DM" in method:
        X_pcm = pfold.PCManifold(X_train_local)
        X_pcm.optimize_parameters(random_state=0, k=dm_optParams_knn)
        if dm_epsilon == "opt":
            dm_epsilon = X_pcm.kernel.epsilon
        if cut_off == "opt":
            cut_off = X_pcm.kernel.cut_off

        if "ComputeParsimonious" in method:
            if target_intrinsic_dim >= 10:
                n_eigenpairsIn = target_intrinsic_dim + 1
            else:
                n_eigenpairsIn = 10
        else:
            n_eigenpairsIn = target_intrinsic_dim

        dmap_local = dfold.DiffusionMaps(
            kernel=pfold.GaussianKernel(epsilon=dm_epsilon),
            n_eigenpairs=n_eigenpairsIn,
            dist_kwargs=dict(cut_off=cut_off))
        dmap_local = dmap_local.fit(X_pcm)
        # evecs_raw, evals_raw = dmap.eigenvectors_, dmap.eigenvalues_

        if "ComputeParsimonious" in method:
            if X_train_local.shape[0] < 500:
                n_subsampleIn = X_train_local.shape[0] - 1
            else:
                n_subsampleIn = 500

            selection = LocalRegressionSelection(
                intrinsic_dim=target_intrinsic_dim,
                n_subsample=n_subsampleIn,
                strategy="dim").fit(dmap_local.eigenvectors_)

            # print("selection.evec_indices_ = ", selection.evec_indices_)
            parsimoniousEigs = ",".join(
                [str(x) for x in selection.evec_indices_])

            target_mapping = selection.transform(dmap_local.eigenvectors_)
            # print("target_mapping.shape = ", target_mapping.shape)
            eigValsOut = dmap_local.eigenvalues_[selection.evec_indices_]
        else:
            parsimoniousEigs = "first"
            target_mapping = dmap_local.eigenvectors_
            eigValsOut = dmap_local.eigenvalues_

        out = [
            target_mapping, parsimoniousEigs, X_pcm.kernel.epsilon, eigValsOut
        ]
    elif "LLE" in method:

        lle = manifold.LocallyLinearEmbedding(
            n_neighbors=LLE_neighbors,
            n_components=target_intrinsic_dim,
            method="standard",
            n_jobs=-1)
        target_mapping = lle.fit_transform(X_train_local)

        out = [target_mapping, "none", 1, []]
    elif "PCA" in method:
        pca = PCA(n_components=target_intrinsic_dim)
        evecs = pca.fit_transform(X_train_local.T)
        evals = pca.singular_values_
        explainedVarianceRatio = pca.explained_variance_ratio_
        target_mapping = pca.components_.T

        out = [target_mapping, ",".join(evecs), explainedVarianceRatio, evals]

    return out
示例#8
0
                                   noise=0)

# plot
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(data[idx_plot, 0], data[idx_plot, 1], data[idx_plot, 2],
           c=data_color[idx_plot],
           cmap=plt.cm.Spectral)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
ax.set_title("point cloud on S-shaped manifold")
ax.view_init(10, 70)
plt.show()

data_pcm = pfold.PCManifold(data)
data_pcm.optimize_parameters(result_scaling=0.5)

# Plot eigen vectors pair-wise
dmap = dfold.DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=data_pcm.kernel.epsilon),
                           n_eigenpairs=9,
                           dist_kwargs=dict(cut_off=data_pcm.cut_off))
dmap = dmap.fit(data_pcm)
evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_

plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_[idx_plot, :],
                          n=1,
                          fig_params=dict(figsize=[15, 15]),
                          scatter_params=dict(cmap=plt.cm.Spectral,
                                              c=data_color[idx_plot]))
plt.show()