def _run_multiclass(self, multiclass, xval_params, expected_best): xval_splitter = CDataSplitter.create( 'kfold', num_folds=3, random_state=50000) # Set the best parameters inside the classifier best_params = multiclass.estimate_parameters( self.tr, xval_params, xval_splitter, 'accuracy', perf_evaluator='xval-multiclass', n_jobs=1) self.logger.info( "Multiclass SVM has now the following parameters: {:}".format( multiclass.get_params())) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual( clf.C, expected_best['C'][clf_idx]) self.assertEqual( clf.kernel.gamma, expected_best['kernel.gamma'][clf_idx]) # Final test: fit using best parameters multiclass.fit(self.tr) for clf_idx, clf in enumerate(multiclass._binary_classifiers): for param in best_params: self.assertEqual(clf.get_params()[param], best_params[param][clf_idx])
def test_nan_metric_value(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) self.logger.info("Testing metric with some nan") some_nan_metric = CMetricFirstNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters, pick='last') self.logger.info("best score : {:}".format(best_score)) # The xval should select the only one actual value (others are nan) self.assertEqual(best_score, 1.) self.logger.info("Testing metric with all nan") # This test case involves an all-nan slice self.logger.filterwarnings(action="ignore", message="All-NaN slice encountered", category=RuntimeWarning) all_nan_metric = CMetricAllNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric) perf_eval.verbose = 1 with self.assertRaises(ValueError): perf_eval.evaluate_params(self.svm, self.training_dataset, xval_parameters, pick='last')
def test_parameters_setting(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) # Set the best parameters inside the classifier self.svm.estimate_parameters(self.training_dataset, xval_parameters, xval_splitter, 'accuracy') self.logger.info("SVM has now the following parameters: {:}".format( self.svm.get_params())) self.assertEqual(self.svm.get_params()['C'], 1) self.assertEqual(self.svm.get_params()['kernel.gamma'], 50) # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, CMetric.create('accuracy')) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters) for param in xval_parameters: self.logger.info("Best '{:}' is: {:}".format( param, best_params[param])) self.assertEqual(best_params[param], self.svm.get_params()[param]) self.svm.verbose = 0 parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1], [100, 50]] par_comb_score = CArray.zeros(len(parameters_combination)) for comb in range(len(parameters_combination)): this_fold_score = [] num_xval_fold = len(xval_splitter.tr_idx) for f in range(num_xval_fold): self.svm.set("C", parameters_combination[comb][0]) self.svm.kernel.gamma = parameters_combination[comb][1] self.svm.fit( self.training_dataset[xval_splitter.tr_idx[f], :].X, self.training_dataset[xval_splitter.tr_idx[f], :].Y) this_fold_predicted = self.svm.predict( self.training_dataset[xval_splitter.ts_idx[f], :].X) this_fold_accuracy = skm.accuracy_score( self.training_dataset[ xval_splitter.ts_idx[f], :].Y.get_data(), this_fold_predicted.get_data()) this_fold_score.append(this_fold_accuracy) par_comb_score[comb] = (np.mean(this_fold_score)) self.logger.info("this fold mean: {:}".format( par_comb_score[comb])) max_combination_score = par_comb_score.max() better_param_comb = parameters_combination[par_comb_score.argmax()] self.logger.info("max combination score founded here: {:}".format( max_combination_score)) self.logger.info( "max comb score founded during xval {:}".format(best_score)) self.assertEqual(max_combination_score, best_score) # set parameters found by xval and check if are the same chosen here self.logger.info("the parameters selected by own xval are:") self.svm.set_params(best_params) self.logger.info("C: {:}".format(self.svm.C)) self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma)) # check c self.assertEqual(better_param_comb[0], self.svm.C) # check gamma self.assertEqual(better_param_comb[1], self.svm.kernel.gamma)
def __init__(self, splitter, metric): self.splitter = CDataSplitter.create(splitter) self.metric = CMetric.create(metric)