def modelselection_grid_search_kernel(num_subsets, num_vectors, dim_vectors): # init seed for reproducability Math.init_random(1) random.seed(1) # create some (non-sense) data matrix = random.rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features = RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels = BinaryLabels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i % 2 == 0 else -1) # create svm classifier = LibSVM() # splitting strategy splitting_strategy = StratifiedCrossValidationSplitting( labels, num_subsets) # accuracy evaluation evaluation_criterion = ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list #classifier.print_modsel_params() # model parameter selection param_tree = create_param_tree() #param_tree.print_tree() grid_search = GridSearchModelSelection(cross, param_tree) print_state = False best_combination = grid_search.select_model(print_state) #print("best parameter(s):") #best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result = cross.evaluate() casted = CrossValidationResult.obtain_from_generic(result) #print "result mean:", casted.mean return classifier, result, casted.mean
def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors): # init seed for reproducability Math.init_random(1) random.seed(1); # create some (non-sense) data matrix=random.rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features=RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels=BinaryLabels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i%2==0 else -1) # create svm classifier=LibSVM() # splitting strategy splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets) # accuracy evaluation evaluation_criterion=ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list #classifier.print_modsel_params() # model parameter selection param_tree=create_param_tree() #param_tree.print_tree() grid_search=GridSearchModelSelection(cross, param_tree) print_state=False best_combination=grid_search.select_model(print_state) #print("best parameter(s):") #best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result=cross.evaluate() casted=CrossValidationResult.obtain_from_generic(result); #print "result mean:", casted.mean return classifier,result,casted.mean
def evaluation_cross_validation_regression(train_fname=traindat, label_fname=label_traindat, width=0.8, tau=1e-6): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError, CrossValidationSplitting from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile # training data features = RealFeatures(CSVFile(train_fname)) labels = RegressionLabels(CSVFile(label_fname)) # kernel and predictor kernel = GaussianKernel() predictor = KernelRidgeRegression(tau, kernel, labels) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but here, the std x-val is used splitting_strategy = CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = MeanSquaredError() # cross-validation instance cross_validation = CrossValidation(predictor, features, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # (optional) tell machine to precompute kernel matrix. speeds up. may not work predictor.data_lock(labels, features) # perform cross-validation and print(results) result = cross_validation.evaluate()
def evaluation_cross_validation_classification(traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # training data features = RealFeatures(traindat) labels = BinaryLabels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "CrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # perform cross-validation and print(results) result = cross_validation.evaluate()
def evaluation_cross_validation_classification (traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # training data features=RealFeatures(traindat) labels=BinaryLabels(label_traindat) # classifier classifier=LibLinear(L2R_L2LOSS_SVC) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "CrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # perform cross-validation and print(results) result=cross_validation.evaluate()
def evaluation_cross_validation_regression (train_fname=traindat,label_fname=label_traindat,width=0.8,tau=1e-6): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError, CrossValidationSplitting from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile # training data features=RealFeatures(CSVFile(train_fname)) labels=RegressionLabels(CSVFile(label_fname)) # kernel and predictor kernel=GaussianKernel() predictor=KernelRidgeRegression(tau, kernel, labels) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but here, the std x-val is used splitting_strategy=CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=MeanSquaredError() # cross-validation instance cross_validation=CrossValidation(predictor, features, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # (optional) tell machine to precompute kernel matrix. speeds up. may not work predictor.data_lock(labels, features) # perform cross-validation and print(results) result=cross_validation.evaluate()
def modelselection_grid_search_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\ width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError from modshogun import CrossValidationSplitting from modshogun import RegressionLabels from modshogun import RealFeatures from modshogun import KernelRidgeRegression from modshogun import GridSearchModelSelection from modshogun import ModelSelectionParameters # training data features_train=RealFeatures(traindat) features_test=RealFeatures(testdat) labels=RegressionLabels(label_traindat) # labels labels=RegressionLabels(label_train) # predictor, set tau=0 here, doesnt matter predictor=KernelRidgeRegression() # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=MeanSquaredError() # cross-validation instance cross_validation=CrossValidation(predictor, features_train, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val (set larger to get better estimates, at least two # for confidence intervals) cross_validation.set_num_runs(2) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #predictor.print_modsel_params() # build parameter tree to select regularization parameter param_tree_root=create_param_tree() # model selection instance model_selection=GridSearchModelSelection(cross_validation, param_tree_root) # perform model selection with selected methods #print "performing model selection of" #print "parameter tree:" #param_tree_root.print_tree() #print "starting model selection" # print the current parameter combination, if no parameter nothing is printed print_state=False best_parameters=model_selection.select_model(print_state) # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(predictor) result=cross_validation.evaluate()
def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\ width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError from modshogun import CrossValidationSplitting from modshogun import RegressionLabels from modshogun import RealFeatures from modshogun import GaussianKernel from modshogun import LibSVR from modshogun import GridSearchModelSelection from modshogun import ModelSelectionParameters, R_EXP from modshogun import ParameterCombination # training data features_train = RealFeatures(traindat) labels = RegressionLabels(label_traindat) # kernel kernel = GaussianKernel(features_train, features_train, width) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #kernel.print_modsel_params() labels = RegressionLabels(label_train) # predictor predictor = LibSVR(C, tube_epsilon, kernel, labels) predictor.set_epsilon(epsilon) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = MeanSquaredError() # cross-validation instance cross_validation = CrossValidation(predictor, features_train, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val (set larger to get better estimates, at least two # for confidence intervals) cross_validation.set_num_runs(2) # (optional) request 95% confidence intervals for results (not actually # needed for this toy example) cross_validation.set_conf_int_alpha(0.05) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #predictor.print_modsel_params() # build parameter tree to select C1 and C2 param_tree_root = ModelSelectionParameters() c1 = ModelSelectionParameters("C1") param_tree_root.append_child(c1) c1.build_values(-1.0, 0.0, R_EXP) c2 = ModelSelectionParameters("C2") param_tree_root.append_child(c2) c2.build_values(-1.0, 0.0, R_EXP) # model selection instance model_selection = GridSearchModelSelection(cross_validation, param_tree_root) # perform model selection with selected methods #print "performing model selection of" #print "parameter tree" #param_tree_root.print_tree() #print "starting model selection" # print the current parameter combination, if no parameter nothing is printed print_state = False # lock data before since model selection will not change the kernel matrix # (use with care) This avoids that the kernel matrix is recomputed in every # iteration of the model search predictor.data_lock(labels, features_train) best_parameters = model_selection.select_model(print_state) # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(predictor) result = cross_validation.evaluate()
def modelselection_grid_search_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\ width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError from modshogun import CrossValidationSplitting from modshogun import RegressionLabels from modshogun import RealFeatures from modshogun import KernelRidgeRegression from modshogun import GridSearchModelSelection from modshogun import ModelSelectionParameters # training data features_train = RealFeatures(traindat) features_test = RealFeatures(testdat) labels = RegressionLabels(label_traindat) # labels labels = RegressionLabels(label_train) # predictor, set tau=0 here, doesnt matter predictor = KernelRidgeRegression() # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = MeanSquaredError() # cross-validation instance cross_validation = CrossValidation(predictor, features_train, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val (set larger to get better estimates, at least two # for confidence intervals) cross_validation.set_num_runs(2) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #predictor.print_modsel_params() # build parameter tree to select regularization parameter param_tree_root = create_param_tree() # model selection instance model_selection = GridSearchModelSelection(cross_validation, param_tree_root) # perform model selection with selected methods #print "performing model selection of" #print "parameter tree:" #param_tree_root.print_tree() #print "starting model selection" # print the current parameter combination, if no parameter nothing is printed print_state = False best_parameters = model_selection.select_model(print_state) # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(predictor) result = cross_validation.evaluate()