Пример #1
0
def main(readcsv=read_csv, method='defaultDense'):
    nFeatures = 20
    nClasses = 5

    # read training data from file with nFeatures features per observation and 1 class label
    train_file = 'data/batch/svm_multi_class_train_dense.csv'
    train_data = readcsv(train_file, range(nFeatures))
    train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1))

    # Create and configure algorithm object
    algorithm = d4p.multi_class_classifier_training(nClasses=nClasses,
                                                    training=d4p.svm_training(method='thunder'),
                                                    prediction=d4p.svm_prediction())
    
    # Pass data to training. Training result provides model
    train_result = algorithm.compute(train_data, train_labels)
    assert train_result.model.NumberOfFeatures == nFeatures
    assert isinstance(train_result.model.TwoClassClassifierModel(0), d4p.svm_model)

    # Now the prediction stage
    # Read data
    pred_file = 'data/batch/svm_multi_class_test_dense.csv'
    pred_data = readcsv(pred_file, range(nFeatures))
    pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1))
    
    # Create an algorithm object to predict multi-class SVM values
    algorithm = d4p.multi_class_classifier_prediction(nClasses,
                                                      training=d4p.svm_training(method='thunder'),
                                                      prediction=d4p.svm_prediction())
    # Pass data to prediction. Prediction result provides prediction
    pred_result = algorithm.compute(pred_data, train_result.model)
    assert pred_result.prediction.shape == (train_data.shape[0], 1)
    
    return (pred_result, pred_labels)
Пример #2
0
def compute(train_indep_data,
            train_dep_data,
            test_indep_data,
            method='defaultDense'):
    # Configure a SVM object to use linear kernel
    kernel_function = d4p.kernel_function_linear(fptype='float',
                                                 method='defaultDense',
                                                 k=1.0,
                                                 b=0.0)
    train_algo = d4p.svm_training(fptype='float',
                                  method=method,
                                  kernel=kernel_function,
                                  C=1.0,
                                  accuracyThreshold=1e-3,
                                  tau=1e-8,
                                  cacheSize=600000000)

    train_result = train_algo.compute(train_indep_data, train_dep_data)

    # Create an algorithm object and call compute
    predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function)
    predict_result = predict_algo.compute(test_indep_data, train_result.model)
    decision_result = predict_result.prediction
    predict_labels = np.where(decision_result >= 0, 1, -1)
    return predict_labels, decision_result
Пример #3
0
def main(readcsv=read_csv, method='defaultDense'):
    # input data file
    infile = "./data/batch/svm_two_class_train_dense.csv"
    testfile = "./data/batch/svm_two_class_test_dense.csv"

    # Configure a SVM object to use rbf kernel (and adjusting cachesize)
    kern = d4p.kernel_function_linear(
    )  # need an object that lives when creating train_algo
    train_algo = d4p.svm_training(doShrinking=True,
                                  kernel=kern,
                                  cacheSize=600000000)

    # Read data. Let's use features per observation
    data = readcsv(infile, range(20))
    labels = readcsv(infile, range(20, 21))
    train_result = train_algo.compute(data, labels)

    # Now let's do some prediction
    predict_algo = d4p.svm_prediction(kernel=kern)
    # read test data (with same #features)
    pdata = readcsv(testfile, range(20))
    plabels = readcsv(testfile, range(20, 21))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert (predict_result.prediction.shape == (pdata.shape[0], 1))

    return (predict_result, plabels)
Пример #4
0
def _daal4py_svm(fptype,
                 C,
                 accuracyThreshold,
                 tau,
                 maxIterations,
                 cacheSize,
                 doShrinking,
                 kernel,
                 nClasses=2):
    svm_train = daal4py.svm_training(method='thunder',
                                     fptype=fptype,
                                     C=C,
                                     accuracyThreshold=accuracyThreshold,
                                     tau=tau,
                                     maxIterations=maxIterations,
                                     cacheSize=cacheSize,
                                     doShrinking=doShrinking,
                                     kernel=kernel)
    if nClasses == 2:
        algo = svm_train
    else:
        algo = daal4py.multi_class_classifier_training(
            nClasses=nClasses,
            fptype=fptype,
            method='oneAgainstOne',
            training=svm_train,
        )

    return algo
Пример #5
0
def _daal4py_svm_compatibility(fptype,
                               C,
                               accuracyThreshold,
                               tau,
                               maxIterations,
                               cacheSize,
                               doShrinking,
                               kernel,
                               nClasses=2):
    svm_method = 'thunder' if daal_check_version(
        ((2020, 'P', 2), (2021, 'B', 108))) else 'boser'
    svm_train = daal4py.svm_training(method=svm_method,
                                     fptype=fptype,
                                     C=C,
                                     accuracyThreshold=accuracyThreshold,
                                     tau=tau,
                                     maxIterations=maxIterations,
                                     cacheSize=cacheSize,
                                     doShrinking=doShrinking,
                                     kernel=kernel)
    if nClasses == 2:
        algo = svm_train
    else:
        algo = daal4py.multi_class_classifier_training(
            nClasses=nClasses,
            fptype=fptype,
            method='oneAgainstOne',
            training=svm_train,
        )

    return algo
Пример #6
0
def test_fit(X, y, params):

    fptype = getFPType(X)
    kf = kernel_function_linear(fptype=fptype)

    if params.n_classes == 2:
        y[y == 0] = -1
    else:
        y[y == -1] = 0

    svm_train = svm_training(fptype=fptype,
                             C=params.C,
                             maxIterations=params.maxiter,
                             tau=params.tau,
                             cacheSize=params.cache_size_bytes,
                             accuracyThreshold=params.tol,
                             doShrinking=params.shrinking,
                             kernel=kf)

    if params.n_classes == 2:
        clf = svm_train
    else:
        clf = multi_class_classifier_training(fptype=fptype,
                                              nClasses=params.n_classes,
                                              accuracyThreshold=params.tol,
                                              method='oneAgainstOne',
                                              maxIterations=params.maxiter,
                                              training=svm_train)

    training_result = clf.compute(X, y)

    support = construct_dual_coefs(training_result.model, params.n_classes, X,
                                   y)
    indices = y.take(support, axis=0)
    if params.n_classes == 2:
        n_support_ = np.array([np.sum(indices == -1),
                               np.sum(indices == 1)],
                              dtype=np.int32)
    else:
        n_support_ = np.array([
            np.sum(indices == c)
            for c in [-1] + list(range(1, params.n_classes))
        ],
                              dtype=np.int32)

    return training_result, support, indices, n_support_
Пример #7
0
def _daal4py_fit(self, X, y_inp, kernel):

    if self.C <= 0:
        raise ValueError("C <= 0")

    y = make2d(y_inp)
    num_classes = len(self.classes_)

    if num_classes == 2:
        # Intel(R) DAAL requires binary classes to be 1 and -1. sklearn normalizes
        # the classes to 0 and 1, so we temporarily replace the 0s with -1s.
        y = y.copy()
        y[y == 0] = -1

    X_fptype = getFPType(X)

    kf = _daal4py_kf(kernel, X_fptype, gamma=self._gamma)

    svm_train = daal4py.svm_training(
        fptype=X_fptype,
        C=float(self.C),
        accuracyThreshold=float(self.tol),
        tau=1e-12,
        maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30),
        cacheSize=int(self.cache_size * 1024 * 1024),
        doShrinking=bool(self.shrinking),
        # shrinkingStep=,
        kernel=kf)

    if num_classes == 2:
        algo = svm_train
    else:
        algo = daal4py.multi_class_classifier_training(
            nClasses=num_classes,
            fptype=X_fptype,
            accuracyThreshold=float(self.tol),
            method='oneAgainstOne',
            maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30),
            training=svm_train,
            #prediction=svm_predict
        )

    res = algo.compute(X, y)
    model = res.model

    self.daal_model_ = model

    if num_classes == 2:
        # binary
        two_class_sv_ind_ = model.SupportIndices
        two_class_sv_ind_ = two_class_sv_ind_.ravel()

        # support indexes need permutation to arrange them into the same layout as that of Scikit-Learn
        tmp = np.empty(two_class_sv_ind_.shape,
                       dtype=np.dtype([('label', y.dtype),
                                       ('ind', two_class_sv_ind_.dtype)]))
        tmp['label'][:] = y[two_class_sv_ind_].ravel()
        tmp['ind'][:] = two_class_sv_ind_
        perm = np.argsort(tmp, order=['label', 'ind'])
        del tmp

        self.support_ = two_class_sv_ind_[perm]
        self.support_vectors_ = X[self.support_]

        self.dual_coef_ = model.ClassificationCoefficients.T
        self.dual_coef_ = self.dual_coef_[:, perm]
        self.intercept_ = np.array([model.Bias])

    else:
        # multi-class
        intercepts = []
        coefs = []
        num_models = model.NumberOfTwoClassClassifierModels
        sv_ind_by_clf = []
        label_indexes = []

        model_id = 0
        for i1 in range(num_classes):
            label_indexes.append(np.where(y == i1)[0])
            for i2 in range(i1):
                svm_model = model.TwoClassClassifierModel(model_id)

                # Indices correspond to input features with label i1 followed by input features with label i2
                two_class_sv_ind_ = svm_model.SupportIndices
                # Map these indexes to indexes of the training data
                sv_ind = np.take(
                    np.hstack((label_indexes[i1], label_indexes[i2])),
                    two_class_sv_ind_.ravel())
                sv_ind_by_clf.append(sv_ind)

                # svs_ = getArrayFromNumericTable(svm_model.getSupportVectors())
                # assert np.array_equal(svs_, X[sv_ind])

                intercepts.append(-svm_model.Bias)
                coefs.append(-svm_model.ClassificationCoefficients)
                model_id += 1

        # permute solutions to lexicographic ordering
        to_lex_perm = map_to_lexicographic(num_classes)
        sv_ind_by_clf = permute_list(sv_ind_by_clf, to_lex_perm)
        sv_coef_by_clf = permute_list(coefs, to_lex_perm)
        intercepts = permute_list(intercepts, to_lex_perm)

        self.dual_coef_, self.support_ = extract_dual_coef(
            num_classes,  # number of classes
            sv_ind_by_clf,  # support vector indexes by two-class classifiers
            sv_coef_by_clf,  # classification coefficients by two-class classifiers
            y.squeeze().astype(np.intp, copy=False)  # integer labels
        )
        self.support_vectors_ = X[self.support_]
        self.intercept_ = np.array(intercepts)

    indices = y.take(self.support_, axis=0)
    if num_classes == 2:
        self.n_support_ = np.array(
            [np.sum(indices == -1),
             np.sum(indices == 1)], dtype=np.int32)
    else:
        self.n_support_ = np.array(
            [np.sum(indices == i) for i, c in enumerate(self.classes_)],
            dtype=np.int32)

    self.probA_ = np.empty(0)
    self.probB_ = np.empty(0)

    return
Пример #8
0
def bench(meta_info,
          X_train,
          y_train,
          fit_samples,
          fit_repetitions,
          predict_samples,
          predict_repetitions,
          classes,
          cache_size,
          accuracy_threshold=1e-16,
          max_iterations=2000):

    kf = kernel_function_linear(fptype='double')

    if classes == 2:
        y_train[y_train == 0] = -1
    else:
        y_train[y_train == -1] = 0

    fit_times = []
    for it in range(fit_samples):
        start = time()
        for __ in range(fit_repetitions):
            svm_train = svm_training(fptype='double',
                                     C=0.01,
                                     maxIterations=max_iterations,
                                     tau=1e-12,
                                     cacheSize=cache_size,
                                     accuracyThreshold=accuracy_threshold,
                                     doShrinking=True,
                                     kernel=kf)

            if classes == 2:
                clf = svm_train
            else:
                clf = multi_class_classifier_training(
                    nClasses=classes,
                    fptype='double',
                    accuracyThreshold=accuracy_threshold,
                    method='oneAgainstOne',
                    maxIterations=max_iterations,
                    training=svm_train)

            training_result = clf.compute(X_train, y_train)

            support = construct_dual_coefs(training_result.model, classes,
                                           X_train, y_train)
            indices = y_train.take(support, axis=0)
            if classes == 2:
                n_support_ = np.array(
                    [np.sum(indices == -1),
                     np.sum(indices == 1)],
                    dtype=np.int32)
            else:
                n_support_ = np.array([
                    np.sum(indices == c)
                    for c in [-1] + list(range(1, classes))
                ],
                                      dtype=np.int32)
        stop = time()
        fit_times.append(stop - start)

    predict_times = []
    for it in range(predict_samples):
        svm_predict = svm_prediction(fptype='double',
                                     method='defaultDense',
                                     kernel=kf)
        if classes == 2:
            prdct = svm_predict
        else:
            prdct = multi_class_classifier_prediction(
                nClasses=classes,
                fptype='double',
                maxIterations=max_iterations,
                accuracyThreshold=accuracy_threshold,
                pmethod='voteBased',
                tmethod='oneAgainstOne',
                prediction=svm_predict)

        start = time()
        for __ in range(predict_repetitions):
            res = prdct.compute(X_train, training_result.model)
        stop = time()
        predict_times.append(stop - start)

    if classes == 2:
        y_predict = np.greater(res.prediction.ravel(), 0)
        y_train = np.greater(y_train, 0)
    else:
        y_predict = res.prediction.ravel()

    print("{meta_info},{fit_t:0.6g},{pred_t:0.6g},{acc:0.3f},{sv_len},{cl}".
          format(meta_info=meta_info,
                 fit_t=min(fit_times) / fit_repetitions,
                 pred_t=min(predict_times) / predict_repetitions,
                 acc=100 * accuracy_score(y_train.ravel(), y_predict),
                 sv_len=support.shape[0],
                 cl=n_support_.shape[0]))