示例#1
0
def init_svm(task_type, kernel, labels):
    """A factory for creating the right svm type"""
    C=1
    epsilon=1e-5
    if task_type == 'Binary Classification':
        svm = LibSVM(C, kernel, labels)
    elif task_type == 'Multi Class Classification':
        svm = LibSVMMultiClass(C, kernel, labels)
    elif task_type == 'Regression':
        tube_epsilon=1e-2
        svm=LibSVR(C, epsilon, kernel, labels)
        svm.set_tube_epsilon(tube_epsilon)
    else:
        print(task_type + ' unknown!')

    return svm
def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):

    from shogun.Features import Labels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    kernel = GaussianKernel(feats_train, feats_train, width)
    labels = Labels(label_train)

    svr = LibSVR(C, tube_epsilon, kernel, labels)
    svr.set_epsilon(epsilon)
    svr.train()

    kernel.init(feats_train, feats_test)
    out1 = svr.apply().get_labels()
    out2 = svr.apply(feats_test).get_labels()

    return out1, out2, kernel
def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):

	from shogun.Features import Labels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LibSVR

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)
	labels=Labels(label_train)

	svr=LibSVR(C, tube_epsilon, kernel, labels)
	svr.set_epsilon(epsilon)
	svr.train()

	kernel.init(feats_train, feats_test)
	out1=svr.apply().get_labels()
	out2=svr.apply(feats_test).get_labels()

	return out1,out2,kernel
示例#4
0
    def __init__(self, traininput,testinput,traintarget,width=0.5,C=1,epsilon=0.1,tube_epsilon=0.1):
    
        train = matrix(traininput, dtype=float64)
        test = matrix(testinput, dtype=float64)
        label_train = array(traintarget, dtype=float64)
        
        self.feats_train=RealFeatures(train)
        feats_test=RealFeatures(test)
        
        trainstart = time.time()
        self.kernel=GaussianKernel(self.feats_train, self.feats_train, width)
#        self.kernel = PolyKernel(self.feats_train, self.feats_train, 2, False)
        labels=Labels(label_train)
    
        self.svr=LibSVR(C, epsilon, self.kernel, labels)
        self.svr.set_tube_epsilon(tube_epsilon)
        self.svr.train()
        trainend = time.time()
        
        
        
        
        print 'SVR train time'
        print trainend-trainstart
def libsvr ():
	print 'LibSVR'
	from shogun.Features import Labels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LibSVR

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)
	width=2.1
	kernel=GaussianKernel(feats_train, feats_train, width)

	C=1
	epsilon=1e-5
	tube_epsilon=1e-2
	labels=Labels(label_train)

	svr=LibSVR(C, epsilon, kernel, labels)
	svr.set_tube_epsilon(tube_epsilon)
	svr.train()

	kernel.init(feats_train, feats_test)
	out1=svr.classify().get_labels()
	out2=svr.classify(feats_test).get_labels()
def modelselection_grid_search_libsvr_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import Labels
    from shogun.Features import RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination

    # training data
    features_train=RealFeatures(traindat)
    labels=Labels(label_traindat)

    # kernel
    kernel=GaussianKernel(features_train, features_train, width)
    
    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #kernel.print_modsel_params()
    
    labels=Labels(label_train)

    # predictor
    predictor=LibSVR(C, tube_epsilon, kernel, labels)
    predictor.set_epsilon(epsilon)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=MeanSquaredError()

    # cross-validation instance
    cross_validation=CrossValidation(predictor, features_train, labels,
	    splitting_strategy, evaluation_criterium)
	
    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select C1 and C2 
    param_tree_root=ModelSelectionParameters()
    c1=ModelSelectionParameters("C1");
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP);

    c2=ModelSelectionParameters("C2");
    param_tree_root.append_child(c2);
    c2.build_values(-2.0, 2.0, R_EXP);

    # model selection instance
    model_selection=GridSearchModelSelection(param_tree_root,
	    cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree"
    #param_tree_root.print_tree()
    
    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state=False
    # lock data before since model selection will not change the kernel matrix
    # (use with care) This avoids that the kernel matrix is recomputed in every
    # iteration of the model search
    predictor.data_lock(labels, features_train)
    best_parameters=model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result=cross_validation.evaluate()
示例#7
0
def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels
    from shogun.Features import RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination

    # training data
    features_train = RealFeatures(traindat)
    labels = RegressionLabels(label_traindat)

    # kernel
    kernel = GaussianKernel(features_train, features_train, width)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #kernel.print_modsel_params()

    labels = RegressionLabels(label_train)

    # predictor
    predictor = LibSVR(C, tube_epsilon, kernel, labels)
    predictor.set_epsilon(epsilon)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features_train, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select C1 and C2
    param_tree_root = ModelSelectionParameters()
    c1 = ModelSelectionParameters("C1")
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP)

    c2 = ModelSelectionParameters("C2")
    param_tree_root.append_child(c2)
    c2.build_values(-2.0, 2.0, R_EXP)

    # model selection instance
    model_selection = GridSearchModelSelection(param_tree_root,
                                               cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree"
    #param_tree_root.print_tree()

    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state = False
    # lock data before since model selection will not change the kernel matrix
    # (use with care) This avoids that the kernel matrix is recomputed in every
    # iteration of the model search
    predictor.data_lock(labels, features_train)
    best_parameters = model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result = cross_validation.evaluate()
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

	# reproducable results
	random.seed(seed)
	
	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range
	
	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var
	
	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	kernel=GaussianKernel(feats_train, feats_train, width)
	
	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.train()
	svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	svr_nu.train()

	# predictions
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	out1_nu=svr_epsilon.apply().get_labels()
	out2_nu=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
示例#9
0
class SVR:
    
    svr = None
    kernel = None
    feats_train = None
    
    def __init__(self, traininput,testinput,traintarget,width=0.5,C=1,epsilon=0.1,tube_epsilon=0.1):
    
        train = matrix(traininput, dtype=float64)
        test = matrix(testinput, dtype=float64)
        label_train = array(traintarget, dtype=float64)
        
        self.feats_train=RealFeatures(train)
        feats_test=RealFeatures(test)
        
        trainstart = time.time()
        self.kernel=GaussianKernel(self.feats_train, self.feats_train, width)
#        self.kernel = PolyKernel(self.feats_train, self.feats_train, 2, False)
        labels=Labels(label_train)
    
        self.svr=LibSVR(C, epsilon, self.kernel, labels)
        self.svr.set_tube_epsilon(tube_epsilon)
        self.svr.train()
        trainend = time.time()
        
        
        
        
        print 'SVR train time'
        print trainend-trainstart

    def svr_req(self,inputs):
           
        feat_inputs = RealFeatures(matrix(inputs, dtype=float64))
        
        teststart = time.time()
        self.kernel.init(self.feats_train, feat_inputs)   
        out = self.svr.classify(feat_inputs).get_labels()
            
#        feat_input0 = RealFeatures(matrix(inputs[0], dtype=float64))
#        feat_input1 = RealFeatures(matrix(inputs[1], dtype=float64))
#        feat_input2 = RealFeatures(matrix(inputs[2], dtype=float64))
#        feat_input3 = RealFeatures(matrix(inputs[3], dtype=float64))
#        
#        out.append(self.svr.classify(feat_input0).get_labels())
#        out.append(self.svr.classify(feat_input1).get_labels())
#        out.append(self.svr.classify(feat_input2).get_labels())
#        out.append(self.svr.classify(feat_input3).get_labels())
        testend = time.time()
        
        print 'SVR query response '
        print testend-teststart
        
        return out
    
    def calc_sme(self, testtarget, realtarget):
        result = 0.0
        for i in range(len(testtarget)):
            result += pow((realtarget[i] - testtarget[i]),2)
        result /= len(testtarget)
        
        return result
    
    def calc_mape(self, testtarget, realtarget):
        result = 0.0
        
        for i in range(len(testtarget)):
            result = abs(testtarget[i] - realtarget[i])/testtarget[i]
        
        return result/len(testtarget)
    
    def calc_rsqr(self, testtarget, realtarget):
        result_up = 0.0
        result_down = 0.0
        avg = sum(realtarget)/len(realtarget)
        
        for i in range(len(testtarget)):
            result_up += pow((realtarget[i] - testtarget[i]),2)
            result_down += (realtarget[i] - avg)
        
        return 1 - (result_up / result_down)
    
    def calc_pred(self, testtarget, realtarget, x):
        
        countx = 0.0
        
        for i in range(len(testtarget)):
            if ((testtarget[i]/realtarget[i]) - 1 < (realtarget[i] * (1-x))):
                countx += 1
        return countx / len(realtarget)