def main(readcsv=read_csv, method='defaultDense'): nFeatures = 20 nClasses = 5 # read training data from file with nFeatures features per observation and 1 class label train_file = 'data/batch/svm_multi_class_train_dense.csv' train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create and configure algorithm object algorithm = d4p.multi_class_classifier_training(nClasses=nClasses, training=d4p.svm_training(method='thunder'), prediction=d4p.svm_prediction()) # Pass data to training. Training result provides model train_result = algorithm.compute(train_data, train_labels) assert train_result.model.NumberOfFeatures == nFeatures assert isinstance(train_result.model.TwoClassClassifierModel(0), d4p.svm_model) # Now the prediction stage # Read data pred_file = 'data/batch/svm_multi_class_test_dense.csv' pred_data = readcsv(pred_file, range(nFeatures)) pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object to predict multi-class SVM values algorithm = d4p.multi_class_classifier_prediction(nClasses, training=d4p.svm_training(method='thunder'), prediction=d4p.svm_prediction()) # Pass data to prediction. Prediction result provides prediction pred_result = algorithm.compute(pred_data, train_result.model) assert pred_result.prediction.shape == (train_data.shape[0], 1) return (pred_result, pred_labels)
def _daal4py_predict(self, X): X_fptype = getFPType(X) num_classes = len(self.classes_) kf = _daal4py_kf(self.kernel, X_fptype, gamma=self._gamma) svm_predict = daal4py.svm_prediction(fptype=X_fptype, method='defaultDense', kernel=kf) if num_classes == 2: alg = svm_predict else: alg = daal4py.multi_class_classifier_prediction( nClasses=num_classes, fptype=X_fptype, maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30), accuracyThreshold=float(self.tol), pmethod="voteBased", tmethod='oneAgainstOne', prediction=svm_predict) predictionRes = alg.compute(X, self.daal_model_) res = predictionRes.prediction res = res.ravel() if num_classes == 2: # Convert from Intel(R) DAAL format back to original classes np.greater(res, 0, out=res) return res
def compute(train_indep_data, train_dep_data, test_indep_data, method='defaultDense'): # Configure a SVM object to use linear kernel kernel_function = d4p.kernel_function_linear(fptype='float', method='defaultDense', k=1.0, b=0.0) train_algo = d4p.svm_training(fptype='float', method=method, kernel=kernel_function, C=1.0, accuracyThreshold=1e-3, tau=1e-8, cacheSize=600000000) train_result = train_algo.compute(train_indep_data, train_dep_data) # Create an algorithm object and call compute predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function) predict_result = predict_algo.compute(test_indep_data, train_result.model) decision_result = predict_result.prediction predict_labels = np.where(decision_result >= 0, 1, -1) return predict_labels, decision_result
def main(readcsv=read_csv, method='defaultDense'): # input data file infile = "./data/batch/svm_two_class_train_dense.csv" testfile = "./data/batch/svm_two_class_test_dense.csv" # Configure a SVM object to use rbf kernel (and adjusting cachesize) kern = d4p.kernel_function_linear( ) # need an object that lives when creating train_algo train_algo = d4p.svm_training(doShrinking=True, kernel=kern, cacheSize=600000000) # Read data. Let's use features per observation data = readcsv(infile, range(20)) labels = readcsv(infile, range(20, 21)) train_result = train_algo.compute(data, labels) # Now let's do some prediction predict_algo = d4p.svm_prediction(kernel=kern) # read test data (with same #features) pdata = readcsv(testfile, range(20)) plabels = readcsv(testfile, range(20, 21)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (predict_result, plabels)
def test_predict(X, training_result, params): fptype = getFPType(X) kf = kernel_function_linear(fptype=fptype) svm_predict = svm_prediction(fptype=fptype, method='defaultDense', kernel=kf) if params.n_classes == 2: prdct = svm_predict else: prdct = multi_class_classifier_prediction(nClasses=params.n_classes, fptype=fptype, maxIterations=params.maxiter, accuracyThreshold=params.tol, pmethod='voteBased', tmethod='oneAgainstOne', prediction=svm_predict) res = prdct.compute(X, training_result.model) if params.n_classes == 2: y_predict = np.greater(res.prediction.ravel(), 0) else: y_predict = res.prediction.ravel() return y_predict
def bench(meta_info, X_train, y_train, fit_samples, fit_repetitions, predict_samples, predict_repetitions, classes, cache_size, accuracy_threshold=1e-16, max_iterations=2000): kf = kernel_function_linear(fptype='double') if classes == 2: y_train[y_train == 0] = -1 else: y_train[y_train == -1] = 0 fit_times = [] for it in range(fit_samples): start = time() for __ in range(fit_repetitions): svm_train = svm_training(fptype='double', C=0.01, maxIterations=max_iterations, tau=1e-12, cacheSize=cache_size, accuracyThreshold=accuracy_threshold, doShrinking=True, kernel=kf) if classes == 2: clf = svm_train else: clf = multi_class_classifier_training( nClasses=classes, fptype='double', accuracyThreshold=accuracy_threshold, method='oneAgainstOne', maxIterations=max_iterations, training=svm_train) training_result = clf.compute(X_train, y_train) support = construct_dual_coefs(training_result.model, classes, X_train, y_train) indices = y_train.take(support, axis=0) if classes == 2: n_support_ = np.array( [np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: n_support_ = np.array([ np.sum(indices == c) for c in [-1] + list(range(1, classes)) ], dtype=np.int32) stop = time() fit_times.append(stop - start) predict_times = [] for it in range(predict_samples): svm_predict = svm_prediction(fptype='double', method='defaultDense', kernel=kf) if classes == 2: prdct = svm_predict else: prdct = multi_class_classifier_prediction( nClasses=classes, fptype='double', maxIterations=max_iterations, accuracyThreshold=accuracy_threshold, pmethod='voteBased', tmethod='oneAgainstOne', prediction=svm_predict) start = time() for __ in range(predict_repetitions): res = prdct.compute(X_train, training_result.model) stop = time() predict_times.append(stop - start) if classes == 2: y_predict = np.greater(res.prediction.ravel(), 0) y_train = np.greater(y_train, 0) else: y_predict = res.prediction.ravel() print("{meta_info},{fit_t:0.6g},{pred_t:0.6g},{acc:0.3f},{sv_len},{cl}". format(meta_info=meta_info, fit_t=min(fit_times) / fit_repetitions, pred_t=min(predict_times) / predict_repetitions, acc=100 * accuracy_score(y_train.ravel(), y_predict), sv_len=support.shape[0], cl=n_support_.shape[0]))