def test_many_classifiers(X, y, classifiers, Kfold=5): ErrorClassifiers = np.zeros(len(classifiers)) rkf = RepeatedKFold(n_splits=Kfold, n_repeats=1) for train_index, test_index in rkf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] for i_clas in range(len(classifiers)): classifiers[i_clas].fit(X_train, y_train) ErrorClassifiers[i_clas] += 1 - \ classifiers[i_clas].score(X_test, y_test) ErrorClassifiers /= rkf.get_n_splits(X) return ErrorClassifiers
def mlpKFold(X, y, k, act, solve, alph, it): acc = 0 rkf = RepeatedKFold(n_splits=k) for train_index, test_index in rkf.split(X): X_train = [] X_test = [] y_train = [] y_test = [] for i in train_index: X_train.append(X[i]) y_train.append(y[i]) for i in test_index: X_test.append(X[i]) y_test.append(y[i]) mlp = MLPClassifier(activation=activationGlobal[act], solver=solverGlobal[solve], alpha=alphaGlobal[alph], max_iter=max_iterationsGlobal[it]) mlp = mlp.fit(X_train, y_train) acc += mlp.score(X_test, y_test) acc /= rkf.get_n_splits() return acc
import pandas import matplotlib.pyplot as plot from sklearn import metrics from sklearn.ensemble import AdaBoostRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import RepeatedKFold dataset = pandas.read_csv('salaryData.csv') x = dataset['YearsExperience'].values y = dataset['Salary'].values X = x.reshape(len(x), 1) Y = y.reshape(len(y), 1) kf = RepeatedKFold(n_splits=2, n_repeats=1, random_state=200) kf.get_n_splits(X) for train_index, test_index in kf.split(X): xTrain, xTest = X[train_index], X[test_index] yTrain, yTest = Y[train_index], Y[test_index] regressor = DecisionTreeRegressor() regressor.fit(xTrain, yTrain) regr = AdaBoostRegressor() regr.fit(X, y) yPrediction = regressor.predict(xTest) yPred = regr.predict(xTest) df = pandas.DataFrame({
def Xgboost_RepeatedKFold(self): i = 0 self.d_test = self.CreateDMatrix(DF=self.Test_df, is_test=True) self.Train_df.reset_index(inplace=True, drop=True) kf = RepeatedKFold(n_splits=self.nbr_fold, random_state=self.random_state, n_repeats=self.nbr_RepeatedKFold) kf.get_n_splits(self.Train_df) self.Pred_train = np.zeros((len(self.Train_df))) self.Pred_test = np.zeros((len(self.Test_df))) List_validation_fold = [] List_Train_fold = [] self.logs = [] for (train_index, val_index) in (kf.split(self.Train_df)): i += 1 self.logs.append("#" * 50 + "fold:" + str(i) + "#" * 50) List_train_run = [] List_validation_run = [] Train_fold, Val_flod = self.Train_df.loc[ train_index, :], self.Train_df.loc[val_index, :] for run in range(self.nbr_run): clear_output() if self.nbr_run > 0: self.params["seed"] = random.randint(1, 10000) self.print_log(self.logs) self.xgboost = self.fit(Train_fold, Val_flod) train_metrics, val_metrics, Val_pred, Test_pred = self.eval_xgboost( ) List_train_run.append(train_metrics) List_validation_run.append(val_metrics) self.logs.append("run " + str(run) + " train metrics :" + str(train_metrics) + " val metrics : " + str(val_metrics)) self.Pred_train[val_index] += Val_pred self.Pred_test += Test_pred clear_output() List_validation_fold.append(np.mean(List_validation_run)) List_Train_fold.append(np.mean(List_train_run)) self.logs.append("\n" + "fold-" + str(i) + " train metrics :" + str(np.mean(List_train_run)) + " val metrics : " + str(np.mean(List_validation_run))) clear_output() self.print_log(self.logs) Val_mtercis = np.mean(List_validation_fold) Train_mtercis = np.mean(List_Train_fold) self.Pred_test /= (self.nbr_fold * self.nbr_run * self.nbr_RepeatedKFold) self.Pred_train /= (self.nbr_run * self.nbr_RepeatedKFold) print("End Training with train metrics :" + str(Train_mtercis) + " val metrics : " + str(Val_mtercis)) return self.get_output(self.Pred_test, self.Pred_train)
def train_and_test(classifier, csv_train_path, csv_test_path, selected_feature_names=None, swap_traintest=False, cv_fold_and_repeat=None, balance=False, standardize=True, shuffle=False, categorical_feature_mapping=None, one_hot=False, prob_scores=False): """ Function to train and test a new model on specified ntrain and test sets :param classifier: sklearn object of the model (not yet fitted) to be used :param csv_train_path: .csv path of train data :param csv_test_path: .csv path of test data :param selected_feature_names: List of names of the selected features :param swap_traintest: If True, swap the specified train and test sets (i.e. use csv_test_path as train set) :param cv_fold_and_repeat: Set this to a tuple (k, n) to perform cross validation. (k=k-fold CV, n=number of repetitions) :param balance: Set True to balance the data. (#samples same for all classes) :param standardize: Set to True to standardize data, or privide a path to a .pickle file containing a stored standardizer :param shuffle: Set to True to activate shuffling for cross validation :param categorical_feature_mapping: Dictionary to map categorical features to numerical values (see doc of function ml_helper.load_dataset()) :param one_hot: If categorical features are present, set this parameter to True to enable one hot encoding :param prob_scores: Set true to include score predictions (e.g. probabilities) into reported performance metrics --> necessary for precision-recall curves :return: """ if cv_fold_and_repeat != None: if csv_test_path != None: X, Y = ml_helpers.load_dataset_seperate(csv_train_path, csv_test_path, selected_feature_names=selected_feature_names, balance=balance, standardize=standardize, merge=True, categorical_feature_mapping=categorical_feature_mapping, one_hot=one_hot) else: X, Y, _, _ = ml_helpers.load_dataset(csv_train_path, 1.0, selected_feature_names=selected_feature_names, balance=balance, standardize=standardize, categorical_feature_mapping=categorical_feature_mapping, one_hot=one_hot) kf = RepeatedKFold(n_splits=cv_fold_and_repeat[0], n_repeats=cv_fold_and_repeat[1]) # kf = KFold(n_splits=cv_fold_and_repeat[0], shuffle=shuffle) kf.get_n_splits(X) metrics = [] print('Cross Validation ...\n') for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] clf = clone(classifier) # yields a new estimator with the same parameters that has not been fit on any data. clf.fit(X_train, Y_train) Y_predicted = clf.predict(X_test) metrics.append(performance_metrics(Y_test, Y_predicted, report=True)) return metrics # scores = cross_val_score(clf, X_train, Y_train, cv=6) else: X_train, Y_train, X_test, Y_test = ml_helpers.load_dataset_seperate(csv_train_path, csv_test_path, selected_feature_names=selected_feature_names, balance=balance, standardize=standardize, swap_traintest=swap_traintest, categorical_feature_mapping=categorical_feature_mapping, one_hot=one_hot) print('Training the model ...') start = time.time() classifier.fit(X_train, Y_train) end = time.time() train_time = end - start print('... Training took {}s'.format(train_time)) print('Performing predictions on testset ...') start = time.time() Y_predicted = classifier.predict(X_test) end = time.time() inference_time = end - start print('... Inference took {}s'.format(inference_time)) if prob_scores: Y_scores = classifier.predict_proba(X_test) metrics = performance_metrics(Y_test, Y_predicted, Y_scores, report=True) else: metrics = performance_metrics(Y_test, Y_predicted, report=True) metrics['train_time'] = train_time metrics['inference_time'] = inference_time metrics['nr_train_samples'] = len(Y_train) metrics['nr_test_samples'] = len(Y_test) return metrics
def fit(self, X, y, labels=None, dist=None, importance_weights=None, cv_indices=None, dist_savename=None): t = time.time() if y.ndim < 2: y = y.reshape(-1, 1) if self.n_components is not None: if self.verbose > 0: elapsed = time.time() - t print('PCA [%dmin %dsec]' % (int(elapsed / 60), int(elapsed % 60))) sys.stdout.flush() self.pca = PCA(n_components=self.n_components, svd_solver='arpack') y_ = self.pca.fit_transform(y) if self.verbose > 0: print('Lost %.1f%% information ' % (self.pca.noise_variance_) + '[%dmin %dsec]' % (int(elapsed / 60), int(elapsed % 60))) elapsed = time.time() - t else: y_ = y if labels is not None: raise RuntimeError('Not implemented.') if cv_indices is None: cv_indices = np.arange(X.shape[0]) if self.cv_type is None: kfold = RepeatedKFold(n_splits=self.cv_nfolds, n_repeats=self.cv_shuffles) cv_folds = kfold.split(X[cv_indices]) n_cv_folds = kfold.get_n_splits() elif self.cv_type == 'iter': cv_folds = self.cv_groups n_cv_folds = len(self.cv_groups) elif self.cv_type == 'group': groups = self.cv_groups if self.cv_nfolds is None: self.cv_nfolds = len(np.unique(groups)) kfold = GroupKFold(n_splits=self.cv_nfolds) cv_folds = kfold.split(X[cv_indices], y[cv_indices], groups) n_cv_folds = kfold.get_n_splits() else: raise Exception('Cross-validation type not supported') add_train_inds = np.setdiff1d(np.arange(X.shape[0]), cv_indices) cv_folds = list(cv_folds) cv_folds = [(np.concatenate((train_fold, add_train_inds)), test_fold) for train_fold, test_fold in cv_folds] if self.verbose > 0: elapsed = time.time() - t print('Computing distance matrix [%dmin %dsec]' % (int(elapsed / 60), int(elapsed % 60))) sys.stdout.flush() if dist is None: dist = euclidean_distances(X, None, squared=self.squared_dist) if dist_savename is not None: if self.verbose > 0: print('Saving distance matrix to file:', dist_savename) np.save(dist_savename, dist) if importance_weights is None: self.krr_param_grid['lambda'] = [0] importance_weights = np.ones((X.shape[0], )) importance_weights = importance_weights**(0.5) errors = [] if 'v' in self.krr_param_grid: for fold_i, (train_i, test_i) in enumerate(cv_folds): fold_errors = np.empty( (len(self.krr_param_grid['v']), len(self.krr_param_grid['gamma']), 1, len(self.krr_param_grid['alpha']), y_.shape[1])) if self.verbose > 0: elapsed = time.time() - t print('CV %d of %d [%dmin %dsec]' % (fold_i + 1, n_cv_folds, int( elapsed / 60), int(elapsed % 60))) sys.stdout.flush() for v_i, v in enumerate(self.krr_param_grid['v']): for gamma_i, gamma in enumerate( self.krr_param_grid['gamma']): for lamb_i, lamb in enumerate( self.krr_param_grid['lambda']): iw = importance_weights**lamb iw = iw[:, None] K_train = self.kernel.apply_to_dist(dist[np.ix_( train_i, train_i)], gamma=gamma) K_train *= np.outer(iw[train_i], iw[train_i]) K_test = self.kernel.apply_to_dist(dist[np.ix_( test_i, train_i)], gamma=gamma) if self.verbose > 0: sys.stdout.write('.') sys.stdout.flush() for alpha_i, alpha in enumerate( self.krr_param_grid['alpha']): if self.verbose > 0: sys.stdout.write(',') sys.stdout.flush() for y_i in np.arange(y_.shape[1]): K_train_ = K_train.copy() alpha_add = get_alpha_add( self.n_basis, self.n_grid, self.delta, v) K_train_.flat[::K_train_.shape[0] + 1] += alpha * alpha_add[y_i] try: L_ = cholesky(K_train_, lower=True) x = solve_triangular(L_, y_[train_i, y_i], lower=True) dual_coef_ = solve_triangular(L_.T, x) pred_mean = np.dot(K_test, dual_coef_) if self.mae: e = np.mean( np.abs(pred_mean - y_[test_i, y_i]), 0) else: e = np.mean((pred_mean - y_[test_i, y_i])**2, 0) except np.linalg.LinAlgError: e = np.inf fold_errors[v_i, gamma_i, 0, alpha_i, y_i] = e if self.verbose > 0: sys.stdout.write('\n') sys.stdout.flush() errors.append(fold_errors) errors = np.array(errors) errors = np.mean(errors, 0) # average over folds else: for fold_i, (train_i, test_i) in enumerate(cv_folds): fold_errors = np.empty( (len(self.krr_param_grid['gamma']), len(self.krr_param_grid['lambda']), len(self.krr_param_grid['alpha']), y_.shape[1])) if self.verbose > 0: elapsed = time.time() - t print('CV %d of %d [%dmin %dsec]' % (fold_i + 1, n_cv_folds, int( elapsed / 60), int(elapsed % 60))) sys.stdout.flush() for gamma_i, gamma in enumerate(self.krr_param_grid['gamma']): if self.verbose > 0: sys.stdout.write('.') sys.stdout.flush() for lamb_i, lamb in enumerate( self.krr_param_grid['lambda']): iw = importance_weights**lamb iw = iw[:, None] K_train = self.kernel.apply_to_dist(dist[np.ix_( train_i, train_i)], gamma=gamma) K_train *= np.outer(iw[train_i], iw[train_i]) K_test = self.kernel.apply_to_dist(dist[np.ix_( test_i, train_i)], gamma=gamma) for alpha_i, alpha in enumerate( self.krr_param_grid['alpha']): if self.verbose > 0: sys.stdout.write(',') sys.stdout.flush() K_train_ = K_train.copy() K_train_.flat[::K_train_.shape[0] + 1] += alpha try: L_ = cholesky(K_train_, lower=True) x = solve_triangular(L_, iw[train_i] * y_[train_i], lower=True) dual_coef_ = iw[train_i] * solve_triangular( L_.T, x) pred_mean = np.dot(K_test, dual_coef_) if self.mae: e = np.mean( np.abs(pred_mean - y_[test_i]) * importance_weights[test_i, None]**2, 0) else: e = np.mean( ((pred_mean - y_[test_i])**2) * importance_weights[test_i, None]**2, 0) except np.linalg.LinAlgError: e = np.inf fold_errors[gamma_i, lamb_i, alpha_i] = e if self.verbose > 0: sys.stdout.write('\n') sys.stdout.flush() errors.append(fold_errors) errors = np.array(errors) errors = np.mean(errors, 0) # average over folds self.dual_coefs_ = np.empty((y_.shape[1], X.shape[0])) self.alphas_ = np.empty(y_.shape[1]) self.lambdas_ = np.empty(y_.shape[1]) self.gammas_ = np.empty(y_.shape[1]) if self.verbose > 0: elapsed = time.time() - t print('Refit [%dmin %dsec]' % (int(elapsed / 60), int(elapsed % 60))) sys.stdout.flush() print_count = 0 if not self.single_combo: for i in range(y_.shape[1]): min_params = np.argsort(errors[:, :, :, i], axis=None) # lin_alg_errors = 0 gamma_i, lamb_i, alpha_i = np.unravel_index( min_params[0], errors.shape[:2]) gamma = self.krr_param_grid['gamma'][gamma_i] lamb = self.krr_param_grid['lambda'][lamb_i] alpha = self.krr_param_grid['alpha'][alpha_i] self.alphas_[i] = alpha self.gammas_[i] = gamma self.lambdas_[i] = lamb if (gamma_i in (0, len(self.krr_param_grid['gamma']) - 1) or lamb_i in (0, len(self.krr_param_grid['lambda']) - 1) or alpha_i in (0, len(self.krr_param_grid['alpha']) - 1)): if print_count <= 200: fmtstr = '%d: gamma=%g\talpha=%g\tlambda=%g\terror=%g\tmean=%g' print(fmtstr % (i, gamma, alpha, lamb, errors[gamma_i, lamb_i, alpha_i, i], errors[gamma_i, lamb_i, alpha_i, i] / np.mean(np.abs(y_[:, i])))) print_count += 1 else: errors = np.mean(errors, -1) # average over outputs if self.verbose > 1: print('CV errors:') print(errors) print('Alpha params:') print(self.krr_param_grid['alpha']) print('Gamma params:') print(self.krr_param_grid['gamma']) print('Lambda params:') print(self.krr_param_grid['lambda']) if self.verbose > 0: print('Min error: ', np.min(errors)) # print np.log(errors) # plt.imshow(np.log(errors)) # plt.xticks(range(10), map('{:.1e}'.format, list(self.krr_param_grid['alpha']))) # plt.yticks(range(10), map('{:.1e}'.format, list(self.krr_param_grid['gamma']))) # plt.xlabel('alpha') # plt.ylabel('gamma') # plt.colorbar() # plt.show() min_params = np.argsort(errors, axis=None) if 'v' in self.krr_param_grid: v_i, gamma_i, lamb_i, alpha_i = np.unravel_index( min_params[0], errors.shape) else: gamma_i, lamb_i, alpha_i = np.unravel_index( min_params[0], errors.shape) if 'v' in self.krr_param_grid: v = self.krr_param_grid['v'][v_i] print('v=', v) gamma = self.krr_param_grid['gamma'][gamma_i] alpha = self.krr_param_grid['alpha'][alpha_i] lamb = self.krr_param_grid['lambda'][lamb_i] if 'v' in self.krr_param_grid: if v == self.krr_param_grid['v'][0]: print('v at lower edge.') if v == self.krr_param_grid['v'][-1]: print('v at upper edge.') if len(self.krr_param_grid['gamma']) > 1: if gamma == self.krr_param_grid['gamma'][0]: print('Gamma at lower edge.') if gamma == self.krr_param_grid['gamma'][-1]: print('Gamma at upper edge.') if len(self.krr_param_grid['alpha']) > 1: if alpha == self.krr_param_grid['alpha'][0]: print('Alpha at lower edge.') if alpha == self.krr_param_grid['alpha'][-1]: print('Alpha at upper edge.') if len(self.krr_param_grid['lambda']) > 1: if lamb == self.krr_param_grid['lambda'][0]: print('Lambda at lower edge.') if lamb == self.krr_param_grid['lambda'][-1]: print('Lambda at upper edge.') self.alphas_[:] = alpha self.gammas_[:] = gamma self.lambdas_[:] = lamb if 'v' in self.krr_param_grid: alpha_add = get_alpha_add(self.n_basis, self.n_grid, self.delta, v) self.alphas_ *= alpha_add combos = list(zip(self.alphas_, self.gammas_, self.lambdas_)) n_unique_combos = len(set(combos)) self.L_fit_ = [None] * n_unique_combos for i, (alpha, gamma, lamb) in enumerate(set(combos)): if self.verbose > 0: elapsed = time.time() - t print('Parameter combinations ' + '%d of %d [%dmin %dsec]' % (i + 1, n_unique_combos, int(elapsed / 60), int(elapsed % 60))) sys.stdout.flush() y_list = [ i for i in range(y_.shape[1]) if self.alphas_[i] == alpha and self.gammas_[i] == gamma and self.lambdas_[i] == lamb ] iw = importance_weights**lamb iw = iw[:, None] K = self.kernel.apply_to_dist(dist, gamma=gamma) K *= np.outer(iw, iw) # np.exp(K, K) while True: K.flat[::K.shape[0] + 1] += alpha - (alpha / 10) try: if self.verbose > 0: print('trying cholesky decomposition, alpha', alpha) L_ = cholesky(K, lower=True) self.L_fit_[i] = L_ x = solve_triangular(L_, iw * y_[:, y_list], lower=True) # x = solve_triangular(L_, y_[:, y_list], lower=True) dual_coef_ = solve_triangular(L_.T, x) self.dual_coefs_[y_list] = iw.T * dual_coef_.T.copy() break except np.linalg.LinAlgError: if self.verbose > 0: print('LinalgError, increasing alpha') alpha *= 10 self.alphas_[0] = alpha if self.copy_X: self.X_fit_ = X.copy() self.y_fit_ = y.copy() else: self.X_fit_ = X self.y_fit_ = y self.errors = errors if self.verbose > 0: elapsed = time.time() - t print('Done [%dmin %dsec]' % (int(elapsed / 60), int(elapsed % 60))) sys.stdout.flush()
def test_get_n_splits_for_repeated_kfold(): n_splits = 3 n_repeats = 4 rkf = RepeatedKFold(n_splits, n_repeats) expected_n_splits = n_splits * n_repeats assert_equal(expected_n_splits, rkf.get_n_splits())
del df XToScale = pd.DataFrame(scaler.fit_transform(XToScale), columns=colunasFeatures) X = XToScale X['destination_port'] = X['destination_port'].astype('category') Y = Y.astype('category') del XToScale rkf = RepeatedKFold(n_splits=numberOfFolds, n_repeats=numberOfRepeats, random_state=2652124) rkf.get_n_splits(X, Y) print(rkf) acc = np.zeros((1, numberOfFolds * numberOfRepeats)) f1 = np.zeros((1, numberOfFolds * numberOfRepeats)) pr = np.zeros((1, numberOfFolds * numberOfRepeats)) rc = np.zeros((1, numberOfFolds * numberOfRepeats)) tim = np.zeros((1, numberOfFolds * numberOfRepeats)) rod = 0 models = [] classReports = [] for train_index, test_index in rkf.split(X, Y): X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :] y_train, y_test = Y[train_index], Y[test_index] print("classes de treinamento", set(y_train)) print("classes de teste", set(y_test))
excelNome = 'Nearest-Centroid-Classifier.xlsx' elif classif == '5': clf = RandomForestClassifier() excelNome = 'Random-Forest-Classifier.xlsx' elif classif == '6': clf = svm.SVC() excelNome = 'Support-Vector-Machines-Classifier.xlsx' else: print('Erro!') excel = pd.ExcelWriter(excelNome, engine='xlsxwriter') #DEFININDO A DIVISÃO rkf = RepeatedKFold(n_splits=10, n_repeats=10, random_state=2652124) #RETORNA O NUMERO DE INTERAÇÕES DE DIVISÃO NO VALIDADOR CRUZADO rkf.get_n_splits(X, y) #print(rkf) #print(X.columns) #SEPARAND VARIAVEIS DE TREINO E TESTE for train_index, test_index in rkf.split(X, y): print(train_index, test_index) X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y[train_index], y[test_index] #CLASSIFICAÇÃO USANDO NEAREST-CENTROID inicio = time.time() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) fim = time.time() tempo = fim - inicio Tem.append(tempo)
def compare_datasets2(fig_fn=None): fig, axes = plt.subplots(3, 2) if not fig_fn is None: fig.set_size_inches(12, 8) # for k, type in enumerate (["linear", "quadratic", "open_circle"]): for k, type in enumerate(["quadratic"]): print("Type: %s" % type) X, target, d_X, d_target = create_artificial_dataset2(type=type, n=150) keys = list(d_X.keys()) visualize_dataset2(X[:, 0], X[:, 1], target, axes[k, 0], title="Dataset: %s" % type) target_pred = np.zeros(len(X)) mean_score = 0.0 param_grid = {"C": [1]} cv = RepeatedKFold(n_splits=10, n_repeats=1, random_state=1) for k_cv, (train_set, test_set) in enumerate(cv.split(keys)): print("Fold %d / %d" % (k_cv + 1, cv.get_n_splits())) keys_train = [keys[idx] for idx in train_set] keys_test = [keys[idx] for idx in test_set] d_X_train, d_target_train = OrderedDict(), OrderedDict() for key in keys_train: d_X_train[key] = d_X[key] d_target_train[key] = d_target[key] # d_X_train = {key: value for key, value in d_X.items() if key in keys_train} # d_target_train = {key: value for key, value in d_target.items() if key in keys_train} if type == "linear": ranksvm_kernel = KernelRankSVC( verbose=False, kernel="linear", feature_type="difference", slack_type="on_pairs", step_size_algorithm="diminishing_2", convergence_criteria="alpha_change_norm") elif type == "quadratic": ranksvm_kernel = KernelRankSVC(verbose=False, kernel="poly", feature_type="difference", slack_type="on_pairs") param_grid["degree"] = [2] elif type == "open_circle": ranksvm_kernel = KernelRankSVC(verbose=False, kernel="rbf", feature_type="difference", slack_type="on_pairs") param_grid["gamma"] = [3] else: raise ValueError("Invalid test data type: %s" % type) cv_inner = GroupKFold(n_splits=3) best_params, param_scores, n_pairs_train, best_estimator, _, _ = find_hparan_ranksvm( ranksvm_kernel, d_X_train, d_target_train, cv=cv_inner, param_grid=param_grid, pair_params={ "allow_overlap": True, "d_upper": 4, "d_lower": 0, "ireverse": True }, n_jobs=1) print(best_params) X_test = np.array([d_X[key] for key in keys_test]) target_test = np.array([d_target[key] for key in keys_test]) pairs_test = get_pairs_single_system(target_test, d_lower=0, d_upper=np.inf) target_pred[test_set] += best_estimator.map_values(X_test) score = best_estimator.score(X_test, pairs_test) print(score) mean_score += score target_pred /= cv.get_n_splits() mean_score /= cv.get_n_splits() print(mean_score) visualize_ranksvm([d_target[key] for key in keys], target_pred, axes[k, 1]) if not fig_fn is None: plt.tight_layout() plt.savefig(fig_fn) else: plt.show()