Пример #1
0
    def _get_Kmatrices(self, X, y):
        K = self._get_kernel_matrix(X, X)
        N = len(X)
        Kw = np.zeros((N, N))
        classLabels = np.unique(y)
        for label in classLabels:
            classIdx = np.argwhere(y == label).T[0]
            Nl = len(classIdx)
            xL = X[classIdx]
            Kl = self._get_kernel_matrix(X, xL)
            Kmul = np.sum(Kl, axis=1) / Nl  #vector
            Kmul = np.outer(Kmul, np.ones(Nl))  # matrix
            Klbar = Kl - Kmul
            Kw = Kw + np.inner(Klbar, Klbar)

        #centering
        KwCenterer = preprocessing.KernelCenterer()
        KwCenterer.fit(Kw)
        Kw = KwCenterer.transform(Kw)
        KCenterer = preprocessing.KernelCenterer()
        KCenterer.fit(K)
        Kbar = KCenterer.transform(K)
        Kbar2 = np.inner(Kbar, Kbar.T)
        Kb = Kbar2 - Kw

        return (K, Kbar, Kbar2, Kw, Kb)
Пример #2
0
 def KernelCenterer(self):
     '''
     1.4、中心化核矩阵
     如果有一个核K的核矩阵,通过定义的函数phi计算特征空间的点积,
     使用类KernelCenterer可以变换核矩阵,它包含去除特征空间均值后,再利用phi计算特征空间的内积
     '''
     transformer = preprocessing.KernelCenterer().fit(self.data)
     return transformer.transform(self.data)
def initialize_scalers_map(X):

    scalers = dict()
    #scalers['NoScaler'] = None
    scalers['Normalizer'] = preprocessing.Normalizer().fit(X)
    scalers['MaxAbsScaler'] = preprocessing.MaxAbsScaler().fit(X)
    scalers['MinMaxScaler'] = preprocessing.MinMaxScaler().fit(X)
    scalers['KernelCenterer'] = preprocessing.KernelCenterer().fit(X)
    scalers['StandardScaler'] = preprocessing.StandardScaler().fit(X)

    return scalers
Пример #4
0
    def transform_kernel_centerer_arr(self, dt: PandasDataFrame,
                                      method_args: Any, name: str):
        """Center a kernel matrix
        :param dt: the dataframe of features.
        :param method_args: other input arguments
        (it is a placeholder no argument is available).
        :param name: the name of the feature to be transformed.
        """
        if name in method_args[name] and "scale" in method_args[name].keys():
            scale = method_args[name]["scale"]
        else:
            scale = preprocessing.KernelCenterer()
            method_args[name] = {"scale": scale}

        arr = scale.fit_transform(dt[name])
        dt[name] = scale.transform(arr)
Пример #5
0
def test_isomap_reconstruction_error(n_neighbors, radius):
    # Same setup as in test_isomap_simple_grid, with an added dimension
    n_pts = 25
    X = create_sample_data(n_pts=n_pts, add_noise=True)

    # compute input kernel
    if n_neighbors is not None:
        G = neighbors.kneighbors_graph(X, n_neighbors,
                                       mode="distance").toarray()
    else:
        G = neighbors.radius_neighbors_graph(X, radius,
                                             mode="distance").toarray()
    centerer = preprocessing.KernelCenterer()
    K = centerer.fit_transform(-0.5 * G**2)

    for eigen_solver in eigen_solvers:
        for path_method in path_methods:
            clf = manifold.Isomap(
                n_neighbors=n_neighbors,
                radius=radius,
                n_components=2,
                eigen_solver=eigen_solver,
                path_method=path_method,
            )
            clf.fit(X)

            # compute output kernel
            if n_neighbors is not None:
                G_iso = neighbors.kneighbors_graph(clf.embedding_,
                                                   n_neighbors,
                                                   mode="distance")
            else:
                G_iso = neighbors.radius_neighbors_graph(clf.embedding_,
                                                         radius,
                                                         mode="distance")
            G_iso = G_iso.toarray()
            K_iso = centerer.fit_transform(-0.5 * G_iso**2)

            # make sure error agrees
            reconstruction_error = np.linalg.norm(K - K_iso) / n_pts
            assert_almost_equal(reconstruction_error,
                                clf.reconstruction_error())
def test_isomap_reconstruction_error():
    # Same setup as in test_isomap_simple_grid, with an added dimension
    N_per_side = 5
    Npts = N_per_side**2
    n_neighbors = Npts - 1

    # grid of equidistant points in 2D, n_components = n_dim
    X = np.array(list(product(range(N_per_side), repeat=2)))

    # add noise in a third dimension
    rng = np.random.RandomState(0)
    noise = 0.1 * rng.randn(Npts, 1)
    X = np.concatenate((X, noise), 1)

    # compute input kernel
    G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance").toarray()

    centerer = preprocessing.KernelCenterer()
    K = centerer.fit_transform(-0.5 * G**2)

    for eigen_solver in eigen_solvers:
        for path_method in path_methods:
            clf = manifold.Isomap(
                n_neighbors=n_neighbors,
                n_components=2,
                eigen_solver=eigen_solver,
                path_method=path_method,
            )
            clf.fit(X)

            # compute output kernel
            G_iso = neighbors.kneighbors_graph(clf.embedding_,
                                               n_neighbors,
                                               mode="distance").toarray()

            K_iso = centerer.fit_transform(-0.5 * G_iso**2)

            # make sure error agrees
            reconstruction_error = np.linalg.norm(K - K_iso) / Npts
            assert_almost_equal(reconstruction_error,
                                clf.reconstruction_error())
Пример #7
0
def KernelCenterer(train_df, test_df, HP):
    train_x = train_df.iloc[:, :-1]
    train_y = train_df.iloc[:, -1:]
    test_x = test_df.iloc[:, :-1]
    test_y = test_df.iloc[:, -1:]

    transformer = preprocessing.KernelCenterer()
    train_x_copy = train_x.copy()
    train_x_transformed = transformer.fit_transform(train_x_copy)
    test_x_copy = test_x.copy()
    test_x_transformed = transformer.transform(test_x_copy)  # TODO check here

    train_column_name = list(train_x_copy.columns)
    test_column_name = list(test_x_copy.columns)

    train_x_transformed_df = pd.DataFrame(train_x_transformed)
    train_x_transformed_df.columns = train_column_name
    train_df_transformed = train_x_transformed_df.assign(label=train_y.values)

    test_x_transformed_df = pd.DataFrame(test_x_transformed)
    test_x_transformed_df.columns = test_column_name
    test_df_transformed = test_x_transformed_df.assign(label=test_y.values)

    return train_df_transformed, test_df_transformed
Пример #8
0
print("====== Q2 results ======")
print("Best Score: ", best_score)
print("Best C: ", best_C)
print("Best Gamma: ", best_gamma)

# Q3
best_score = 0.0
preprocessors = []
X_trains = []
X_tests = []
from sklearn import preprocessing
preprocessors.append(preprocessing.Normalizer())
preprocessors.append(preprocessing.MaxAbsScaler())
preprocessors.append(preprocessing.MinMaxScaler())
preprocessors.append(preprocessing.KernelCenterer())
preprocessors.append(preprocessing.StandardScaler())
for i in range(0, 5):
    # Preprocessing fit and transform
    preprocessors[i].fit(X_train)
    X_trains.append(preprocessors[i].transform(X_train))
    X_tests.append(preprocessors[i].transform(X_test))
    # SVC fit and score
    for C in np.arange(0.05, 2, 0.05):
        for gamma in np.arange(0.001, 0.1, 0.001):
            svc = SVC(kernel='rbf', C=C, gamma=gamma)
            svc.fit(X_trains[i], y_train)
            score = svc.score(X_tests[i], y_test)
            if (best_score < score):
                best_score = score
                best_C = C
Пример #9
0
def GetSplits(stockdata, OneyearStatus):
    T = preprocessing.KernelCenterer().fit_transform(stockdata)
    x_train, x_test,y_train, y_test = train_test_split(T,OneyearStatus,test_size = 0.2, random_state = 7)
    return x_train, x_test,y_train, y_test
Пример #10
0
def train_model(X_train, y_train, X_test, y_test, lmd):
    """
    Train qboost model

    :param X_train: train input
    :param y_train: train label
    :param X_test: test input
    :param y_test: test label
    :param lmd: lmbda to control regularization term
    :return:
    """
    NUM_READS = 3000
    NUM_WEAK_CLASSIFIERS = 35
    # lmd = 0.5
    TREE_DEPTH = 3

    # define sampler
    dwave_sampler = DWaveSampler(solver={'qpu': True})
    # sa_sampler = micro.dimod.SimulatedAnnealingSampler()
    emb_sampler = EmbeddingComposite(dwave_sampler)

    N_train = len(X_train)
    N_test = len(X_test)

    print("\n======================================")
    print("Train#: %d, Test: %d" % (N_train, N_test))
    print('Num weak classifiers:', NUM_WEAK_CLASSIFIERS)
    print('Tree depth:', TREE_DEPTH)

    # input: dataset X and labels y (in {+1, -1}

    # Preprocessing data
    imputer = SimpleImputer()
    # scaler = preprocessing.MinMaxScaler()
    scaler = preprocessing.StandardScaler()
    normalizer = preprocessing.Normalizer()
    centerer = preprocessing.KernelCenterer()

    # X = imputer.fit_transform(X)
    X_train = scaler.fit_transform(X_train)
    X_train = normalizer.fit_transform(X_train)
    X_train = centerer.fit_transform(X_train)

    # X_test = imputer.fit_transform(X_test)
    X_test = scaler.fit_transform(X_test)
    X_test = normalizer.fit_transform(X_test)
    X_test = centerer.fit_transform(X_test)

    ## Adaboost
    print('\nAdaboost')

    clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS)

    # scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    print('fitting...')
    clf.fit(X_train, y_train)

    hypotheses_ada = clf.estimators_
    # clf.estimator_weights_ = np.random.uniform(0,1,size=NUM_WEAK_CLASSIFIERS)
    print('testing...')
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    print('accu (train): %5.2f' % (metric(y_train, y_train_pred)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_pred)))

    # Ensembles of Decision Tree
    print('\nDecision tree')

    clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS,
                           max_depth=TREE_DEPTH)
    clf2.fit(X_train, y_train)

    y_train_pred2 = clf2.predict(X_train)
    y_test_pred2 = clf2.predict(X_test)
    print(clf2.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train_pred2)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_pred2)))

    # Ensembles of Decision Tree
    print('\nQBoost')

    DW_PARAMS = {
        'num_reads': NUM_READS,
        'auto_scale': True,
        # "answer_mode": "histogram",
        'num_spin_reversal_transforms': 10,
        # 'annealing_time': 10,
        'postprocess': 'optimization',
    }

    clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS,
                            max_depth=TREE_DEPTH)
    clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS)

    y_train_dw = clf3.predict(X_train)
    y_test_dw = clf3.predict(X_test)

    print(clf3.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train_dw)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_dw)))

    # Ensembles of Decision Tree
    print('\nQBoostPlus')
    clf4 = QboostPlus([clf, clf2, clf3])
    clf4.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS)
    y_train4 = clf4.predict(X_train)
    y_test4 = clf4.predict(X_test)
    print(clf4.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train4)))
    print('accu (test): %5.2f' % (metric(y_test, y_test4)))

    print("=============================================")
    print("Method \t Adaboost \t DecisionTree \t Qboost \t QboostIt")
    print("Train\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" %
          (metric(y_train, y_train_pred), metric(y_train, y_train_pred2),
           metric(y_train, y_train_dw), metric(y_train, y_train4)))
    print("Test\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" %
          (metric(y_test, y_test_pred), metric(y_test, y_test_pred2),
           metric(y_test, y_test_dw), metric(y_test, y_test4)))
    print("=============================================")

    # plt.subplot(211)
    # plt.bar(range(len(y_test)), y_test)
    # plt.subplot(212)
    # plt.bar(range(len(y_test)), y_test_dw)
    # plt.show()

    return
Пример #11
0
 def _get_Kmatrices(self, X):
     K = _get_kernel_matrix(X, X)
     KCenterer = preprocessing.KernelCenterer()
     KCenterer.fit(K)
     Kbar = KCenterer.transform(K)
     return (K, Kbar)
Пример #12
0
def Process(df):
    df.dropna(axis=0, how='any', inplace=True)
    T = preprocessing.KernelCenterer().fit_transform(df.as_matrix())
    return T
Пример #13
0
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=7)
X_test = pd.DataFrame(X_test)

## Data is normalized using Standardisation
## I prefer standardisation over normalisation

from sklearn import preprocessing
stand = preprocessing.StandardScaler()
maxabs = preprocessing.MaxAbsScaler()
minmax = preprocessing.MinMaxScaler()
kernel = preprocessing.KernelCenterer()
normalise = preprocessing.Normalizer()
preprocess = [stand, maxabs, minmax, kernel, normalise]
preprocess_string = ['stand', 'maxabs', 'minmax', 'kernel', 'normalise']

from sklearn import manifold
from sklearn.decomposition import PCA
pca = PCA(n_components=4)
isomap = manifold.Isomap(n_components=4, n_neighbors=7)
dimension = [pca, isomap]
dimension_string = ['pca', 'isomap']

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
Пример #14
0
# which is the range between the 1st quartile and the 3rd quartile.
robust_scaler = preprocessing.RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
                                           with_scaling=True)
print(robust_scaler)

train_x_robust_scaler = robust_scaler.fit_transform(train_x)
test_x_robust_scaler = robust_scaler.transform(test_x)

nb_robust_scaler = naive_bayes(train_x_robust_scaler, train_y)
nb_robust_scaler_predictions = nb_robust_scaler.predict(test_x_robust_scaler)
result_statistics(nb_robust_scaler_predictions)

print("")
print("---------- Default Naive Bayes with Kernel Centerer----------")

kernel_center = preprocessing.KernelCenterer()
print(kernel_center)

train_x_kernel_center = kernel_center.fit_transform(train_x)
test_x_kernel_center = kernel_center.transform(test_x)

nb_kernel_center = naive_bayes(train_x_kernel_center, train_y)
nb_kernel_center_predictions = nb_kernel_center.predict(test_x_kernel_center)
result_statistics(nb_kernel_center_predictions)

print("")
print("---------- Default Naive Bayes with Quantile Transformation----------")

quantile_transformer = preprocessing.QuantileTransformer(copy=True, n_quantiles=1000, output_distribution='normal',
                                                         random_state=0)
print(quantile_transformer)