def _get_Kmatrices(self, X, y): K = self._get_kernel_matrix(X, X) N = len(X) Kw = np.zeros((N, N)) classLabels = np.unique(y) for label in classLabels: classIdx = np.argwhere(y == label).T[0] Nl = len(classIdx) xL = X[classIdx] Kl = self._get_kernel_matrix(X, xL) Kmul = np.sum(Kl, axis=1) / Nl #vector Kmul = np.outer(Kmul, np.ones(Nl)) # matrix Klbar = Kl - Kmul Kw = Kw + np.inner(Klbar, Klbar) #centering KwCenterer = preprocessing.KernelCenterer() KwCenterer.fit(Kw) Kw = KwCenterer.transform(Kw) KCenterer = preprocessing.KernelCenterer() KCenterer.fit(K) Kbar = KCenterer.transform(K) Kbar2 = np.inner(Kbar, Kbar.T) Kb = Kbar2 - Kw return (K, Kbar, Kbar2, Kw, Kb)
def KernelCenterer(self): ''' 1.4、中心化核矩阵 如果有一个核K的核矩阵,通过定义的函数phi计算特征空间的点积, 使用类KernelCenterer可以变换核矩阵,它包含去除特征空间均值后,再利用phi计算特征空间的内积 ''' transformer = preprocessing.KernelCenterer().fit(self.data) return transformer.transform(self.data)
def initialize_scalers_map(X): scalers = dict() #scalers['NoScaler'] = None scalers['Normalizer'] = preprocessing.Normalizer().fit(X) scalers['MaxAbsScaler'] = preprocessing.MaxAbsScaler().fit(X) scalers['MinMaxScaler'] = preprocessing.MinMaxScaler().fit(X) scalers['KernelCenterer'] = preprocessing.KernelCenterer().fit(X) scalers['StandardScaler'] = preprocessing.StandardScaler().fit(X) return scalers
def transform_kernel_centerer_arr(self, dt: PandasDataFrame, method_args: Any, name: str): """Center a kernel matrix :param dt: the dataframe of features. :param method_args: other input arguments (it is a placeholder no argument is available). :param name: the name of the feature to be transformed. """ if name in method_args[name] and "scale" in method_args[name].keys(): scale = method_args[name]["scale"] else: scale = preprocessing.KernelCenterer() method_args[name] = {"scale": scale} arr = scale.fit_transform(dt[name]) dt[name] = scale.transform(arr)
def test_isomap_reconstruction_error(n_neighbors, radius): # Same setup as in test_isomap_simple_grid, with an added dimension n_pts = 25 X = create_sample_data(n_pts=n_pts, add_noise=True) # compute input kernel if n_neighbors is not None: G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance").toarray() else: G = neighbors.radius_neighbors_graph(X, radius, mode="distance").toarray() centerer = preprocessing.KernelCenterer() K = centerer.fit_transform(-0.5 * G**2) for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap( n_neighbors=n_neighbors, radius=radius, n_components=2, eigen_solver=eigen_solver, path_method=path_method, ) clf.fit(X) # compute output kernel if n_neighbors is not None: G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance") else: G_iso = neighbors.radius_neighbors_graph(clf.embedding_, radius, mode="distance") G_iso = G_iso.toarray() K_iso = centerer.fit_transform(-0.5 * G_iso**2) # make sure error agrees reconstruction_error = np.linalg.norm(K - K_iso) / n_pts assert_almost_equal(reconstruction_error, clf.reconstruction_error())
def test_isomap_reconstruction_error(): # Same setup as in test_isomap_simple_grid, with an added dimension N_per_side = 5 Npts = N_per_side**2 n_neighbors = Npts - 1 # grid of equidistant points in 2D, n_components = n_dim X = np.array(list(product(range(N_per_side), repeat=2))) # add noise in a third dimension rng = np.random.RandomState(0) noise = 0.1 * rng.randn(Npts, 1) X = np.concatenate((X, noise), 1) # compute input kernel G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance").toarray() centerer = preprocessing.KernelCenterer() K = centerer.fit_transform(-0.5 * G**2) for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap( n_neighbors=n_neighbors, n_components=2, eigen_solver=eigen_solver, path_method=path_method, ) clf.fit(X) # compute output kernel G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance").toarray() K_iso = centerer.fit_transform(-0.5 * G_iso**2) # make sure error agrees reconstruction_error = np.linalg.norm(K - K_iso) / Npts assert_almost_equal(reconstruction_error, clf.reconstruction_error())
def KernelCenterer(train_df, test_df, HP): train_x = train_df.iloc[:, :-1] train_y = train_df.iloc[:, -1:] test_x = test_df.iloc[:, :-1] test_y = test_df.iloc[:, -1:] transformer = preprocessing.KernelCenterer() train_x_copy = train_x.copy() train_x_transformed = transformer.fit_transform(train_x_copy) test_x_copy = test_x.copy() test_x_transformed = transformer.transform(test_x_copy) # TODO check here train_column_name = list(train_x_copy.columns) test_column_name = list(test_x_copy.columns) train_x_transformed_df = pd.DataFrame(train_x_transformed) train_x_transformed_df.columns = train_column_name train_df_transformed = train_x_transformed_df.assign(label=train_y.values) test_x_transformed_df = pd.DataFrame(test_x_transformed) test_x_transformed_df.columns = test_column_name test_df_transformed = test_x_transformed_df.assign(label=test_y.values) return train_df_transformed, test_df_transformed
print("====== Q2 results ======") print("Best Score: ", best_score) print("Best C: ", best_C) print("Best Gamma: ", best_gamma) # Q3 best_score = 0.0 preprocessors = [] X_trains = [] X_tests = [] from sklearn import preprocessing preprocessors.append(preprocessing.Normalizer()) preprocessors.append(preprocessing.MaxAbsScaler()) preprocessors.append(preprocessing.MinMaxScaler()) preprocessors.append(preprocessing.KernelCenterer()) preprocessors.append(preprocessing.StandardScaler()) for i in range(0, 5): # Preprocessing fit and transform preprocessors[i].fit(X_train) X_trains.append(preprocessors[i].transform(X_train)) X_tests.append(preprocessors[i].transform(X_test)) # SVC fit and score for C in np.arange(0.05, 2, 0.05): for gamma in np.arange(0.001, 0.1, 0.001): svc = SVC(kernel='rbf', C=C, gamma=gamma) svc.fit(X_trains[i], y_train) score = svc.score(X_tests[i], y_test) if (best_score < score): best_score = score best_C = C
def GetSplits(stockdata, OneyearStatus): T = preprocessing.KernelCenterer().fit_transform(stockdata) x_train, x_test,y_train, y_test = train_test_split(T,OneyearStatus,test_size = 0.2, random_state = 7) return x_train, x_test,y_train, y_test
def train_model(X_train, y_train, X_test, y_test, lmd): """ Train qboost model :param X_train: train input :param y_train: train label :param X_test: test input :param y_test: test label :param lmd: lmbda to control regularization term :return: """ NUM_READS = 3000 NUM_WEAK_CLASSIFIERS = 35 # lmd = 0.5 TREE_DEPTH = 3 # define sampler dwave_sampler = DWaveSampler(solver={'qpu': True}) # sa_sampler = micro.dimod.SimulatedAnnealingSampler() emb_sampler = EmbeddingComposite(dwave_sampler) N_train = len(X_train) N_test = len(X_test) print("\n======================================") print("Train#: %d, Test: %d" % (N_train, N_test)) print('Num weak classifiers:', NUM_WEAK_CLASSIFIERS) print('Tree depth:', TREE_DEPTH) # input: dataset X and labels y (in {+1, -1} # Preprocessing data imputer = SimpleImputer() # scaler = preprocessing.MinMaxScaler() scaler = preprocessing.StandardScaler() normalizer = preprocessing.Normalizer() centerer = preprocessing.KernelCenterer() # X = imputer.fit_transform(X) X_train = scaler.fit_transform(X_train) X_train = normalizer.fit_transform(X_train) X_train = centerer.fit_transform(X_train) # X_test = imputer.fit_transform(X_test) X_test = scaler.fit_transform(X_test) X_test = normalizer.fit_transform(X_test) X_test = centerer.fit_transform(X_test) ## Adaboost print('\nAdaboost') clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS) # scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy') print('fitting...') clf.fit(X_train, y_train) hypotheses_ada = clf.estimators_ # clf.estimator_weights_ = np.random.uniform(0,1,size=NUM_WEAK_CLASSIFIERS) print('testing...') y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) print('accu (train): %5.2f' % (metric(y_train, y_train_pred))) print('accu (test): %5.2f' % (metric(y_test, y_test_pred))) # Ensembles of Decision Tree print('\nDecision tree') clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf2.fit(X_train, y_train) y_train_pred2 = clf2.predict(X_train) y_test_pred2 = clf2.predict(X_test) print(clf2.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train_pred2))) print('accu (test): %5.2f' % (metric(y_test, y_test_pred2))) # Ensembles of Decision Tree print('\nQBoost') DW_PARAMS = { 'num_reads': NUM_READS, 'auto_scale': True, # "answer_mode": "histogram", 'num_spin_reversal_transforms': 10, # 'annealing_time': 10, 'postprocess': 'optimization', } clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS) y_train_dw = clf3.predict(X_train) y_test_dw = clf3.predict(X_test) print(clf3.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train_dw))) print('accu (test): %5.2f' % (metric(y_test, y_test_dw))) # Ensembles of Decision Tree print('\nQBoostPlus') clf4 = QboostPlus([clf, clf2, clf3]) clf4.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS) y_train4 = clf4.predict(X_train) y_test4 = clf4.predict(X_test) print(clf4.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train4))) print('accu (test): %5.2f' % (metric(y_test, y_test4))) print("=============================================") print("Method \t Adaboost \t DecisionTree \t Qboost \t QboostIt") print("Train\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" % (metric(y_train, y_train_pred), metric(y_train, y_train_pred2), metric(y_train, y_train_dw), metric(y_train, y_train4))) print("Test\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" % (metric(y_test, y_test_pred), metric(y_test, y_test_pred2), metric(y_test, y_test_dw), metric(y_test, y_test4))) print("=============================================") # plt.subplot(211) # plt.bar(range(len(y_test)), y_test) # plt.subplot(212) # plt.bar(range(len(y_test)), y_test_dw) # plt.show() return
def _get_Kmatrices(self, X): K = _get_kernel_matrix(X, X) KCenterer = preprocessing.KernelCenterer() KCenterer.fit(K) Kbar = KCenterer.transform(K) return (K, Kbar)
def Process(df): df.dropna(axis=0, how='any', inplace=True) T = preprocessing.KernelCenterer().fit_transform(df.as_matrix()) return T
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7) X_test = pd.DataFrame(X_test) ## Data is normalized using Standardisation ## I prefer standardisation over normalisation from sklearn import preprocessing stand = preprocessing.StandardScaler() maxabs = preprocessing.MaxAbsScaler() minmax = preprocessing.MinMaxScaler() kernel = preprocessing.KernelCenterer() normalise = preprocessing.Normalizer() preprocess = [stand, maxabs, minmax, kernel, normalise] preprocess_string = ['stand', 'maxabs', 'minmax', 'kernel', 'normalise'] from sklearn import manifold from sklearn.decomposition import PCA pca = PCA(n_components=4) isomap = manifold.Isomap(n_components=4, n_neighbors=7) dimension = [pca, isomap] dimension_string = ['pca', 'isomap'] from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import GradientBoostingClassifier
# which is the range between the 1st quartile and the 3rd quartile. robust_scaler = preprocessing.RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True, with_scaling=True) print(robust_scaler) train_x_robust_scaler = robust_scaler.fit_transform(train_x) test_x_robust_scaler = robust_scaler.transform(test_x) nb_robust_scaler = naive_bayes(train_x_robust_scaler, train_y) nb_robust_scaler_predictions = nb_robust_scaler.predict(test_x_robust_scaler) result_statistics(nb_robust_scaler_predictions) print("") print("---------- Default Naive Bayes with Kernel Centerer----------") kernel_center = preprocessing.KernelCenterer() print(kernel_center) train_x_kernel_center = kernel_center.fit_transform(train_x) test_x_kernel_center = kernel_center.transform(test_x) nb_kernel_center = naive_bayes(train_x_kernel_center, train_y) nb_kernel_center_predictions = nb_kernel_center.predict(test_x_kernel_center) result_statistics(nb_kernel_center_predictions) print("") print("---------- Default Naive Bayes with Quantile Transformation----------") quantile_transformer = preprocessing.QuantileTransformer(copy=True, n_quantiles=1000, output_distribution='normal', random_state=0) print(quantile_transformer)