def evaluation_meansquarederror_modular (ground_truth, predicted): from modshogun import RegressionLabels from modshogun import MeanSquaredError ground_truth_labels = RegressionLabels(ground_truth) predicted_labels = RegressionLabels(predicted) evaluator = MeanSquaredError() mse = evaluator.evaluate(predicted_labels,ground_truth_labels) return mse
def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes): try: from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION from numpy import random except ImportError: print("Could not import Shogun and/or numpy modules") return random.seed(1) # form training dataset : y=x with noise X_train=random.rand(1,num_train)*x_range; Y_train=X_train+random.randn(num_train)*noise_var # form test dataset X_test=array([[float(i)/num_test*x_range for i in range(num_test)]]) # wrap features and labels into Shogun objects feats_train=RealFeatures(X_train) feats_test=RealFeatures(X_test) train_labels=RegressionLabels(Y_train[0]) # CART Tree formation c=CARTree(ft,PT_REGRESSION,5,True) c.set_labels(train_labels) c.train(feats_train) # Classify test data output=c.apply_regression(feats_test).get_labels() return c,output
def RunLARSShogun(q): totalTimer = Timer() # Load input dataset. try: Log.Info("Loading dataset", self.verbose) inputData = np.genfromtxt(self.dataset[0], delimiter=',') responsesData = np.genfromtxt(self.dataset[1], delimiter=',') inputFeat = RealFeatures(inputData.T) responsesFeat = RegressionLabels(responsesData) # Get all the parameters. lambda1 = None if "lambda1" in options: lambda1 = float(options.pop("lambda1")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") with totalTimer: # Perform LARS. model = LeastAngleRegression(False) if lambda1: model.set_max_l1_norm(lambda1) model.set_labels(responsesFeat) model.train(inputFeat) model.get_w_for_var(model.get_path_size() - 1) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunLinearRidgeRegressionShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) >= 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) tau = re.search("-t (\d+)", options) tau = 1.0 if not tau else int(tau.group(1)) try: with totalTimer: # Perform linear ridge regression. model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() if len(self.dataset) >= 2: model.apply_regression(RealFeatures(testSet.T)) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunLARSShogun(q): totalTimer = Timer() # Load input dataset. try: Log.Info("Loading dataset", self.verbose) inputData = np.genfromtxt(self.dataset[0], delimiter=',') responsesData = np.genfromtxt(self.dataset[1], delimiter=',') inputFeat = RealFeatures(inputData.T) responsesFeat = RegressionLabels(responsesData) # Get all the parameters. lambda1 = re.search("-l (\d+)", options) lambda1 = 0.0 if not lambda1 else int(lambda1.group(1)) with totalTimer: # Perform LARS. model = LeastAngleRegression(False) model.set_max_l1_norm(lambda1) model.set_labels(responsesFeat) model.train(inputFeat) model.get_w(model.get_path_size() - 1) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunLASSOShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Get all the parameters. lambda1 = re.search("-l (\d+)", options) lambda1 = 0.0 if not lambda1 else int(lambda1.group(1)) # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) with totalTimer: model = LeastAngleRegression(lasso=True) model.set_max_l1_norm(lambda1) model.set_labels(RegressionLabels(y)) model.train(RealFeatures(X.T)) except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunSVRShogun(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) # Get all the parameters. c = re.search("-c (\d+\.\d+)", options) e = re.search("-e (\d+\.\d+)", options) g = re.search("-g (\d+\.\d+)", options) self.C = 1.0 if not c else float(c.group(1)) self.epsilon = 1.0 if not e else float(e.group(1)) g = 10.0 if not g else float(g.group(1)) self.width = np.true_divide(1, g) data = RealFeatures(X.T) labels_train = RegressionLabels(y) self.kernel = GaussianKernel(data, data, self.width) try: with totalTimer: # Perform SVR. model = LibSVR(self.C, self.epsilon, self.kernel, labels_train) model.train() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunLinearRegressionShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) with totalTimer: # Perform linear regression. model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y)) model.train() b = model.get_w() if len(self.dataset) == 2: pred = classifier.apply(RealFeatures(testSet.T)) self.predictions = pred.get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def transfer_multitask_leastsquares_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat): from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup try: from modshogun import MultitaskLeastSquaresRegression except ImportError: print("MultitaskLeastSquaresRegression not available") exit(0) features = RealFeatures(traindat) labels = RegressionLabels(label_train) n_vectors = features.get_num_vectors() task_one = Task(0, n_vectors // 2) task_two = Task(n_vectors // 2, n_vectors) task_group = TaskGroup() task_group.append_task(task_one) task_group.append_task(task_two) mtlsr = MultitaskLeastSquaresRegression(0.1, features, labels, task_group) mtlsr.set_regularization(1) # use regularization ratio mtlsr.set_tolerance(1e-2) # use 1e-2 tolerance mtlsr.train() mtlsr.set_current_task(0) out = mtlsr.apply_regression().get_labels() return out
def RunSVRShogun(): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) # Get all the parameters. self.C = 1.0 self.epsilon = 1.0 self.width = 0.1 if "c" in options: self.C = float(options.pop("c")) if "epsilon" in options: self.epsilon = float(options.pop("epsilon")) if "gamma" in options: self.width = np.true_divide(1, float(options.pop("gamma"))) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") data = RealFeatures(X.T) labels_train = RegressionLabels(y) self.kernel = GaussianKernel(data, data, self.width) try: with totalTimer: # Perform SVR. model = LibSVR(self.C, self.epsilon, self.kernel, labels_train) model.train() except Exception as e: return -1 return totalTimer.ElapsedTime()
def RunLASSOShogun(): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) >= 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Get all the parameters. lambda1 = None if "lambda1" in options: lambda1 = float(options.pop("lambda1")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) with totalTimer: model = LeastAngleRegression(lasso=True) if lambda1: model.set_max_l1_norm(lambda1) model.set_labels(RegressionLabels(y)) model.train(RealFeatures(X.T)) except Exception as e: return -1 return totalTimer.ElapsedTime()
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \ width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel try: from modshogun import SVRLight except ImportError: print('No support for SVRLight available.') return feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=RegressionLabels(label_train) svr=SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) out = svr.apply().get_labels() return out, kernel
def RunMetrics(self, options): Log.Info("Perform Linear Ridge Regression.", self.verbose) results = self.LinearRidgeRegressionShogun(options) if results < 0: return results metrics = {'Runtime' : results} if len(self.dataset) >= 3: X, y = SplitTrainData(self.dataset) tau = re.search("-t (\d+)", options) tau = 1.0 if not tau else int(tau.group(1)) model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() testData = LoadDataset(self.dataset[1]) truelabels = LoadDataset(self.dataset[2]) predictedlabels = model.apply_regression(RealFeatures(testData.T)).get_labels() SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels) metrics['Simple MSE'] = SimpleMSE return metrics else: Log.Fatal("This method requires three datasets!")
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1): """ serialize svr with string kernels """ ################################################## # set up svr feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5) shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign) #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) #kernel.append_kernel(kernel_spec_2) # init kernel labels = RegressionLabels(train_lt) # two svr models: epsilon and nu svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) print "Ready to train!" svr_epsilon.train(feats_train) #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) #svr_nu.train(feats_train) # predictions print "Making predictions!" kernel.init(feats_train, feats_test) out1_epsilon = svr_epsilon.apply().get_labels() out2_epsilon = svr_epsilon.apply(feats_test).get_labels() #out1_nu=svr_epsilon.apply().get_labels() #out2_nu=svr_epsilon.apply(feats_test).get_labels() ################################################## # serialize to file fEpsilon = open(FNEPSILON, 'w+') #fNu = open(FNNU, 'w+') svr_epsilon.save(fEpsilon) #svr_nu.save(fNu) fEpsilon.close() #fNu.close() ################################################## #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel return out1_epsilon, out2_epsilon, kernel
def labels_io_modular(): from modshogun import RegressionLabels, CSVFile lab = RegressionLabels() f = CSVFile("../data/label_train_regression.dat", "r") f.set_delimiter(" ") lab.load(f) #print lab.get_labels() return lab
def _evaluate(indata): prefix = 'kernel_' feats = util.get_features(indata, prefix) kargs = util.get_args(indata, prefix) fun = eval(indata[prefix + 'name'] + 'Kernel') kernel = fun(feats['train'], feats['train'], *kargs) prefix = 'regression_' kernel.parallel.set_num_threads(indata[prefix + 'num_threads']) try: name = indata[prefix + 'name'] if (name == 'KERNELRIDGEREGRESSION'): name = 'KernelRidgeRegression' rfun = eval(name) except NameError as e: print("%s is disabled/unavailable!" % indata[prefix + 'name']) return False labels = RegressionLabels(double(indata[prefix + 'labels'])) if indata[prefix + 'type'] == 'svm': regression = rfun(indata[prefix + 'C'], indata[prefix + 'epsilon'], kernel, labels) elif indata[prefix + 'type'] == 'kernelmachine': regression = rfun(indata[prefix + 'tau'], kernel, labels) else: return False regression.parallel.set_num_threads(indata[prefix + 'num_threads']) if prefix + 'tube_epsilon' in indata: regression.set_tube_epsilon(indata[prefix + 'tube_epsilon']) regression.train() alphas = 0 bias = 0 sv = 0 if prefix + 'bias' in indata: bias = abs(regression.get_bias() - indata[prefix + 'bias']) if prefix + 'alphas' in indata: for item in regression.get_alphas().tolist(): alphas += item alphas = abs(alphas - indata[prefix + 'alphas']) if prefix + 'support_vectors' in indata: for item in inregression.get_support_vectors().tolist(): sv += item sv = abs(sv - indata[prefix + 'support_vectors']) kernel.init(feats['train'], feats['test']) classified = max( abs(regression.apply().get_labels() - indata[prefix + 'classified'])) return util.check_accuracy(indata[prefix + 'accuracy'], alphas=alphas, bias=bias, support_vectors=sv, classified=classified)
def regression_gaussian_process_modular (n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RealFeatures, RegressionLabels, GaussianKernel, Math try: from modshogun import GaussianLikelihood, ZeroMean, \ ExactInferenceMethod, GaussianProcessRegression except ImportError: print("Eigen3 needed for Gaussian Processes") return # reproducable results random.seed(seed) Math.init_random(17) # easy regression data: one dimensional noisy sine wave X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) # GP specification shogun_width = width * width * 2 kernel = GaussianKernel(10, shogun_width) zmean = ZeroMean() lik = GaussianLikelihood() lik.set_sigma(noise_var) inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik) # train GP gp = GaussianProcessRegression(inf) gp.train() # some things we can do alpha = inf.get_alpha() diagonal = inf.get_diagonal_vector() cholesky = inf.get_cholesky() # get mean and variance vectors mean = gp.get_mean_vector(feats_test) variance = gp.get_variance_vector(feats_test) # plot results #plot(X[0],Y[0],'x') # training observations #plot(X_test[0],Y_test[0],'-') # ground truth of test #plot(X_test[0],mean, '-') # mean predictions of test #fill_between(X_test[0],mean-1.96*sqrt(variance),mean+1.96*sqrt(variance),color='grey') # 95% confidence interval #legend(["training", "ground truth", "mean predictions"]) #show() return alpha, diagonal, round(variance, 12), round(mean, 12), cholesky
def regression_least_squares_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import LeastSquaresRegression ls=LeastSquaresRegression(RealFeatures(traindat), RegressionLabels(label_train)) ls.train() out = ls.apply(RealFeatures(fm_test)).get_labels() return out,ls
def regression_linear_ridge_modular(fm_train=traindat, fm_test=testdat, label_train=label_traindat, tau=1e-6): from modshogun import RegressionLabels, RealFeatures from modshogun import LinearRidgeRegression rr = LinearRidgeRegression(tau, RealFeatures(traindat), RegressionLabels(label_train)) rr.train() out = rr.apply(RealFeatures(fm_test)).get_labels() return out, rr
def get_labels(raw=False, type='binary'): data = concatenate( array( (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES, dtype=double)))) if raw: return data else: if type == 'binary': return BinaryLabels(data) if type == 'regression': return RegressionLabels(data) return None
def mkl_regression_modular(n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel, PolyKernel, CombinedKernel from modshogun import MKLRegression, SVRLight # reproducible results random.seed(seed) # easy regression data: one dimensional noisy sine wave n = 15 n_test = 100 x_range_test = 10 noise_var = 0.5 X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) # combined kernel kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 2)) kernel.append_kernel(GaussianKernel(10, 3)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_train) # constraint generator and MKLRegression svr_constraints = SVRLight() svr_mkl = MKLRegression(svr_constraints) svr_mkl.set_kernel(kernel) svr_mkl.set_labels(labels) svr_mkl.train() # predictions kernel.init(feats_train, feats_test) out = svr_mkl.apply().get_labels() return out, svr_mkl, kernel
def regression_kernel_ridge_modular (n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import KernelRidgeRegression # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n = 15 n_test = 100 x_range_test = 10 noise_var = 0.5 X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) kernel = GaussianKernel(feats_train, feats_train, width) krr = KernelRidgeRegression(tau, kernel, labels) krr.train(feats_train) kernel.init(feats_train, feats_test) out = krr.apply().get_labels() # plot results #plot(X[0],Y[0],'x') # training observations #plot(X_test[0],Y_test[0],'-') # ground truth of test #plot(X_test[0],out, '-') # mean predictions of test #legend(["training", "ground truth", "mean predictions"]) #show() return out, kernel, krr
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n = 15 n_test = 100 x_range_test = 10 noise_var = 0.5 X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) kernel = GaussianKernel(feats_train, feats_train, width) # two svr models: epsilon and nu svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) svr_epsilon.train() svr_nu = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) svr_nu.train() # predictions kernel.init(feats_train, feats_test) out1_epsilon = svr_epsilon.apply().get_labels() out2_epsilon = svr_epsilon.apply(feats_test).get_labels() out1_nu = svr_epsilon.apply().get_labels() out2_nu = svr_epsilon.apply(feats_test).get_labels() return out1_epsilon, out2_epsilon, out1_nu, out2_nu, kernel
def evaluation_cross_validation_regression(train_fname=traindat, label_fname=label_traindat, width=0.8, tau=1e-6): from modshogun import CrossValidation, CrossValidationResult from modshogun import MeanSquaredError, CrossValidationSplitting from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile # training data features = RealFeatures(CSVFile(train_fname)) labels = RegressionLabels(CSVFile(label_fname)) # kernel and predictor kernel = GaussianKernel() predictor = KernelRidgeRegression(tau, kernel, labels) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but here, the std x-val is used splitting_strategy = CrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = MeanSquaredError() # cross-validation instance cross_validation = CrossValidation(predictor, features, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # (optional) tell machine to precompute kernel matrix. speeds up. may not work predictor.data_lock(labels, features) # perform cross-validation and print(results) result = cross_validation.evaluate()
def transfer_multitask_group_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat): from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup, MultitaskLSRegression features = RealFeatures(traindat) labels = RegressionLabels(label_train) n_vectors = features.get_num_vectors() task_one = Task(0, n_vectors / 2) task_two = Task(n_vectors / 2, n_vectors) task_group = TaskGroup() task_group.add_task(task_one) task_group.add_task(task_two) mtlsr = MultitaskLSRegression(0.1, features, labels, task_group) mtlsr.train() mtlsr.set_current_task(0) out = mtlsr.apply_regression().get_labels() return out
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1): """ serialize svr with spectrum kernels """ ################################################## # set up svr charfeat_train = StringCharFeatures(train_xt, DNA) feats_train = StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat_train, K - 1, K, GAP, False) preproc = SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat_test = StringCharFeatures(test_xt, DNA) feats_test = StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat_test, K - 1, K, GAP, False) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel = CommWordStringKernel(feats_train, feats_train, False) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = RegressionLabels(train_lt) # two svr models: epsilon and nu print "Ready to train!" svr_epsilon = LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR) svr_epsilon.io.set_loglevel(MSG_DEBUG) svr_epsilon.train() # predictions print "Making predictions!" out1_epsilon = svr_epsilon.apply(feats_train).get_labels() kernel.init(feats_train, feats_test) out2_epsilon = svr_epsilon.apply(feats_test).get_labels() return out1_epsilon, out2_epsilon, kernel
def stochasticgbmachine_modular(train=traindat, train_labels=label_traindat, ft=feat_types): try: from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, StochasticGBMachine, SquaredLoss except ImportError: print("Could not import Shogun modules") return # wrap features and labels into Shogun objects feats = RealFeatures(CSVFile(train)) labels = RegressionLabels(CSVFile(train_labels)) # divide into training (90%) and test dataset (10%) p = np.random.permutation(labels.get_num_labels()) num = labels.get_num_labels() * 0.9 cart = CARTree() cart.set_feature_types(ft) cart.set_max_depth(1) loss = SquaredLoss() s = StochasticGBMachine(cart, loss, 500, 0.01, 0.6) # train feats.add_subset(np.int32(p[0:num])) labels.add_subset(np.int32(p[0:num])) s.set_labels(labels) s.train(feats) feats.remove_subset() labels.remove_subset() # apply feats.add_subset(np.int32(p[num:len(p)])) labels.add_subset(np.int32(p[num:len(p)])) output = s.apply_regression(feats) feats.remove_subset() labels.remove_subset() return s, output
def RunLinearRidgeRegressionShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) >= 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) if "alpha" in options: tau = float(options.pop("alpha")) else: Log.Fatal("Required parameter 'alpha' not specified!") raise Exception("missing parameter") if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") try: with totalTimer: # Perform linear ridge regression. model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() if len(self.dataset) >= 2: model.apply_regression(RealFeatures(testSet.T)) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunMetrics(self, options): if len(self.dataset) >= 3: X, y = SplitTrainData(self.dataset) tau = re.search("-t (\d+)", options) tau = 1.0 if not tau else int(tau.group(1)) model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() testData = LoadDataset(self.dataset[1]) truelabels = LoadDataset(self.dataset[2]) predictedlabels = model.apply_regression(RealFeatures( testData.T)).get_labels() SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels) metrics_dict = {} metrics_dict['Simple MSE'] = SimpleMSE return metrics_dict else: Log.Fatal("This method requires three datasets!")
def RunMetrics(self, options): Log.Info("Perform Linear Ridge Regression.", self.verbose) results = self.LinearRidgeRegressionShogun(options) if results < 0: return results metrics = {'Runtime': results} if len(self.dataset) >= 3: X, y = SplitTrainData(self.dataset) if "alpha" in options: tau = float(options.pop("alpha")) else: Log.Fatal("Required parameter 'alpha' not specified!") raise Exception("missing parameter") if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() testData = LoadDataset(self.dataset[1]) truelabels = LoadDataset(self.dataset[2]) predictedlabels = model.apply_regression(RealFeatures( testData.T)).get_labels() SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels) metrics['Simple MSE'] = SimpleMSE return metrics else: Log.Fatal("This method requires three datasets!")