示例#1
0
文件: svr.py 项目: nehagup/benchmarks
    def RunSVRShogun(q):
      totalTimer = Timer()
      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)

      # Get all the parameters.
      c = re.search("-c (\d+\.\d+)", options)
      e = re.search("-e (\d+\.\d+)", options)
      g = re.search("-g (\d+\.\d+)", options)

      self.C = 1.0 if not c else float(c.group(1))
      self.epsilon = 1.0 if not e else float(e.group(1))
      g = 10.0 if not g else float(g.group(1))
      self.width = np.true_divide(1, g)

      data = RealFeatures(X.T)
      labels_train = RegressionLabels(y)
      self.kernel = GaussianKernel(data, data, self.width)

      try:
        with totalTimer:
          # Perform SVR.
          model = LibSVR(self.C, self.epsilon, self.kernel, labels_train)
          model.train()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
示例#2
0
    def RunSVRShogun():
      totalTimer = Timer()
      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)

      # Get all the parameters.
      self.C = 1.0
      self.epsilon = 1.0
      self.width = 0.1
      if "c" in options:
        self.C = float(options.pop("c"))
      if "epsilon" in options:
        self.epsilon = float(options.pop("epsilon"))
      if "gamma" in options:
        self.width = np.true_divide(1, float(options.pop("gamma")))

      if len(options) > 0:
        Log.Fatal("Unknown parameters: " + str(options))
        raise Exception("unknown parameters")

      data = RealFeatures(X.T)
      labels_train = RegressionLabels(y)
      self.kernel = GaussianKernel(data, data, self.width)

      try:
        with totalTimer:
          # Perform SVR.
          model = LibSVR(self.C, self.epsilon, self.kernel, labels_train)
          model.train()
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
示例#3
0
    def RunSVRShogun(q):
      totalTimer = Timer()
      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)

      # Get all the parameters.
      c = re.search("-c (\d+\.\d+)", options)
      e = re.search("-e (\d+\.\d+)", options)
      g = re.search("-g (\d+\.\d+)", options)

      self.C = 1.0 if not c else float(c.group(1))
      self.epsilon = 1.0 if not e else float(e.group(1))
      g = 10.0 if not g else float(g.group(1))
      self.width = np.true_divide(1, g)

      data = RealFeatures(X.T)
      labels_train = RegressionLabels(y)
      self.kernel = GaussianKernel(data, data, self.width)

      try:
        with totalTimer:
          # Perform SVR.
          model = LibSVR(self.C, self.epsilon, self.kernel, labels_train)
          model.train()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
    """
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
    shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
    #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    #kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    print "Ready to train!"
    svr_epsilon.train(feats_train)
    #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    #svr_nu.train(feats_train)

    # predictions
    print "Making predictions!"
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    #out1_nu=svr_epsilon.apply().get_labels()
    #out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
    fEpsilon = open(FNEPSILON, 'w+')
    #fNu = open(FNNU, 'w+')
    svr_epsilon.save(fEpsilon)
    #svr_nu.save(fNu)
    fEpsilon.close()
    #fNu.close()

    ##################################################

    #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
    return out1_epsilon, out2_epsilon, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
	"""
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
	feats_train = construct_features(train_xt)
	feats_test = construct_features(test_xt)

	max_len = len(train_xt[0])
	kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
	shifts_vector = np.ones(max_len, dtype=np.int32)*NUMSHIFTS
	kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
	use_sign = False
	kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
	#kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
	kernel = CombinedKernel()
	kernel.append_kernel(kernel_wdk)
	kernel.append_kernel(kernel_spec_1)
	#kernel.append_kernel(kernel_spec_2)

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	print "Ready to train!"
	svr_epsilon.train(feats_train)
	#svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	#svr_nu.train(feats_train)

	# predictions
	print "Making predictions!"
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	#out1_nu=svr_epsilon.apply().get_labels()
	#out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
	fEpsilon = open(FNEPSILON, 'w+')
	#fNu = open(FNNU, 'w+')
	svr_epsilon.save(fEpsilon)
	#svr_nu.save(fNu)
	fEpsilon.close()
	#fNu.close()

    ##################################################
	
	#return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
	return out1_epsilon,out2_epsilon,kernel
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

	# reproducable results
	random.seed(seed)

	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range

	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var

	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.train()
	svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	svr_nu.train()

	# predictions
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	out1_nu=svr_epsilon.apply().get_labels()
	out2_nu=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1):
	"""
	serialize svr with spectrum kernels
	"""

    ##################################################
    # set up svr
	charfeat_train = StringCharFeatures(train_xt, DNA)
	feats_train = StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	
	charfeat_test = StringCharFeatures(test_xt, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()
	
	kernel=CommWordStringKernel(feats_train, feats_train, False)
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	print "Ready to train!"
	svr_epsilon=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.io.set_loglevel(MSG_DEBUG)
	svr_epsilon.train()

	# predictions
	print "Making predictions!"
	out1_epsilon=svr_epsilon.apply(feats_train).get_labels()
	kernel.init(feats_train, feats_test)
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,kernel
示例#8
0
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1):
    """
	serialize svr with spectrum kernels
	"""

    ##################################################
    # set up svr
    charfeat_train = StringCharFeatures(train_xt, DNA)
    feats_train = StringWordFeatures(DNA)
    feats_train.obtain_from_char(charfeat_train, K - 1, K, GAP, False)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat_test = StringCharFeatures(test_xt, DNA)
    feats_test = StringWordFeatures(DNA)
    feats_test.obtain_from_char(charfeat_test, K - 1, K, GAP, False)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = CommWordStringKernel(feats_train, feats_train, False)
    kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    print "Ready to train!"
    svr_epsilon = LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.io.set_loglevel(MSG_DEBUG)
    svr_epsilon.train()

    # predictions
    print "Making predictions!"
    out1_epsilon = svr_epsilon.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1):
	"""
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
	feats_train = StringCharFeatures(train_xt, PROTEIN)
	feats_test = StringCharFeatures(test_xt, PROTEIN)

	kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, DEGREE)
	kernel.io.set_loglevel(MSG_DEBUG)
	kernel.set_shifts(NUMSHIFTS*ones(len(train_xt[0]), dtype=int32))
	kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64))

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	print "Ready to train!"
	svr_epsilon=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.io.set_loglevel(MSG_DEBUG)
	svr_epsilon.train()
	#svr_nu=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_NU_SVR)
	#svr_nu.train()

	# predictions
	print "Making predictions!"
	out1_epsilon=svr_epsilon.apply(feats_train).get_labels()
	kernel.init(feats_train, feats_test)
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	#out1_nu=svr_epsilon.apply(feats_train).get_labels()
	#out2_nu=svr_epsilon.apply(feats_test).get_labels()
	
	#return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
	return out1_epsilon,out2_epsilon,kernel
def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import MeanSquaredError
    from modshogun import CrossValidationSplitting
    from modshogun import RegressionLabels
    from modshogun import RealFeatures
    from modshogun import GaussianKernel
    from modshogun import LibSVR
    from modshogun import GridSearchModelSelection
    from modshogun import ModelSelectionParameters, R_EXP
    from modshogun import ParameterCombination

    # training data
    features_train = RealFeatures(traindat)
    labels = RegressionLabels(label_traindat)

    # kernel
    kernel = GaussianKernel(features_train, features_train, width)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #kernel.print_modsel_params()

    labels = RegressionLabels(label_train)

    # predictor
    predictor = LibSVR(C, tube_epsilon, kernel, labels)
    predictor.set_epsilon(epsilon)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features_train, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    #	 (optional) repeat x-val (set larger to get better estimates, at least two
    # for confidence intervals)
    cross_validation.set_num_runs(2)

    # (optional) request 95% confidence intervals for results (not actually
    # needed for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select C1 and C2
    param_tree_root = ModelSelectionParameters()
    c1 = ModelSelectionParameters("C1")
    param_tree_root.append_child(c1)
    c1.build_values(-1.0, 0.0, R_EXP)

    c2 = ModelSelectionParameters("C2")
    param_tree_root.append_child(c2)
    c2.build_values(-1.0, 0.0, R_EXP)

    # model selection instance
    model_selection = GridSearchModelSelection(cross_validation,
                                               param_tree_root)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree"
    #param_tree_root.print_tree()

    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state = False
    # lock data before since model selection will not change the kernel matrix
    # (use with care) This avoids that the kernel matrix is recomputed in every
    # iteration of the model search
    predictor.data_lock(labels, features_train)
    best_parameters = model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result = cross_validation.evaluate()
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel
    from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.train()
    svr_nu = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    svr_nu.train()

    # predictions
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    out1_nu = svr_epsilon.apply().get_labels()
    out2_nu = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, out1_nu, out2_nu, kernel