Exemplo n.º 1
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))

    # set parameters and compute predictions
    # previous version has different interface
    from daal4py import __daal_link_version__ as dv
    daal_version = tuple(map(int, (dv[0:4], dv[4:8])))
    if daal_version < (2020, 0):
        predict_alg = d4p.logistic_regression_prediction(
            nClasses=nClasses,
            resultsToCompute=
            "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
        )
    else:
        predict_alg = d4p.logistic_regression_prediction(
            nClasses=nClasses,
            resultsToEvaluate=
            "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities"
        )
    predict_result = predict_alg.compute(predict_data, train_result.model)
    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.probabilities.shape == (predict_data.shape[0],
                                                  nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0],
                                                     nClasses)
    predict_labels = np.loadtxt(testfile,
                                usecols=range(nFeatures, nFeatures + 1),
                                delimiter=',',
                                ndmin=2)
    assert np.count_nonzero(predict_result.prediction -
                            predict_labels) / predict_labels.shape[0] < 0.025

    return (train_result, predict_result, predict_labels)
Exemplo n.º 2
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 2
    nFeatures = 20

    # read training data from file with 20 features per observation and 1 class label
    trainfile = "./data/batch/binary_cls_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 20 features per observation
    testfile = "./data/batch/binary_cls_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))
    predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    predict_result = predict_alg.compute(predict_data, train_result.model)

    # the prediction result provides prediction
    assert predict_result.prediction.shape == (predict_data.shape[0],
                                               train_labels.shape[1])

    return (train_result, predict_result, predict_labels)
Exemplo n.º 3
0
def main():
    nClasses = 2
    nFeatures = 20

    # read training data from file with 20 features per observation and 1 class label
    # and use only a chunk per process
    trainfile = "./data/batch/binary_cls_train.csv"
    train_data = np.split(read_csv(trainfile, range(nFeatures)), d4p.num_procs())[d4p.my_procid()]
    train_labels = np.split(read_csv(trainfile, range(nFeatures, nFeatures + 1)), d4p.num_procs())[d4p.my_procid()]

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True, distributed=True)
    train_result = train_alg.compute(train_data, train_labels)

    # Now let's do some prediction
    # It operates on the same data on each process
    # read testing data from file with 20 features per observation
    testfile = "./data/batch/binary_cls_test.csv"
    predict_data = read_csv(testfile, range(nFeatures))
    predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1))
    
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    predict_result = predict_alg.compute(predict_data, train_result.model)
    
    # the prediction result provides prediction
    assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1])
    
    return (train_result, predict_result, predict_labels)
Exemplo n.º 4
0
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        predict_algo = d4p.logistic_regression_prediction(
            nClasses=2,
            resultsToEvaluate=
            "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities"
        )
        predict_result = predict_algo.compute(test_df, train_result.model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)
    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
Exemplo n.º 5
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses,
                                                     resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities")
    predict_result = predict_alg.compute(predict_data, train_result.model)
    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses)
    predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2)
    assert np.count_nonzero(predict_result.prediction-predict_labels)/predict_labels.shape[0] < 0.025

    return (train_result, predict_result, predict_labels)
Exemplo n.º 6
0
 def prdct_impl(n, d, model):
     w = np.ones((n, d), dtype=np.double) - 22.5
     algo = d4p.logistic_regression_prediction(
         2,
         resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
     )
     return algo.compute(w, model)
def compute(train_data, train_labels, predict_data, nClasses):
    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    return predict_alg.compute(predict_data, train_result.model), train_result
Exemplo n.º 8
0
 def predict(self, predict_data, model):
     # set parameters and compute predictions
     predict_alg = d4p.logistic_regression_prediction(fptype = self.dtype, nClasses=self.nClasses,
                                                      resultsToCompute = self.resultsToCompute)
     predict_result = predict_alg.compute(predict_data, model)
     self.prediction = predict_result.prediction
     self.probabilities = predict_result.probabilities
     return self
Exemplo n.º 9
0
def compute(train_data, train_labels, predict_data, nClasses):
    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses,
                                                     resultsToEvaluate="computeClassLabels|computeClassProbabilities|computeClassLogProbabilities")
    return predict_alg.compute(predict_data, train_result.model), train_result
Exemplo n.º 10
0
    def test_breast_cancer_without_intercept(self):
        X, y = load_breast_cancer(return_X_y=True)
        n_classes=2
        clf = LogisticRegression(fit_intercept=False, max_iter=10000, random_state=0).fit(X, y)
        builder = d4p.logistic_regression_model_builder(n_classes=n_classes, n_features=X.shape[1])
        builder.set_beta(clf.coef_, clf.intercept_)

        alg_pred = d4p.logistic_regression_prediction(nClasses=n_classes)

        pred_daal = alg_pred.compute(X, builder.model).prediction.flatten()
        pred_sklearn = clf.predict(X)
        self.assertTrue(np.allclose(pred_daal, pred_sklearn))
Exemplo n.º 11
0
def main():
    X, y = load_iris(return_X_y=True)
    n_classes=3

    # set parameters and train
    clf = LogisticRegression(fit_intercept=True, max_iter=1000, random_state=0).fit(X, y)

    #set parameters and call model builder
    builder = d4p.logistic_regression_model_builder(n_classes=n_classes, n_features=X.shape[1])
    builder.set_beta(clf.coef_, clf.intercept_)

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=n_classes,
                                                     resultsToEvaluate="computeClassLabels")
    # set parameters and compute predictions
    predict_result_daal = predict_alg.compute(X, builder.model)
    predict_result_sklearn = clf.predict(X)
    assert np.allclose(predict_result_daal.prediction.flatten(), predict_result_sklearn)
    return (builder, predict_result_daal)
Exemplo n.º 12
0
def main():
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = read_csv(trainfile, range(nFeatures))
    train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = read_csv(testfile, range(nFeatures))
    predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(
        nClasses=nClasses,
        resultsToCompute=
        "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
    )
    predict_result = predict_alg.compute(predict_data, train_result.model)

    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.prediction.shape == (predict_data.shape[0],
                                               train_labels.shape[1])
    assert predict_result.probabilities.shape == (predict_data.shape[0],
                                                  nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0],
                                                     nClasses)

    return (train_result, predict_result, predict_labels)
Exemplo n.º 13
0
def daal4py_predict(self, X, resultsToEvaluate):
    check_is_fitted(self)
    X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32])
    try:
        fptype = getFPType(X)
    except ValueError:
        fptype = None

    daal_ready = self.multi_class in ["multinomial", "warn"] or \
        self.classes_.size == 2 or resultsToEvaluate == 'computeClassLabels'
    daal_ready = daal_ready and not sparse.issparse(X) and \
        not sparse.issparse(self.coef_) and fptype is not None
    if daal_ready:
        logging.info("sklearn.linear_model.LogisticRegression."
                     "predict: " + get_patch_message("daal"))
        n_features = self.coef_.shape[1]
        if X.shape[1] != n_features:
            raise ValueError(
                f'X has {X.shape[1]} features, '
                f'but LogisticRegression is expecting {n_features} features as input'
            )
        builder = d4p.logistic_regression_model_builder(
            X.shape[1], len(self.classes_))
        builder.set_beta(self.coef_, self.intercept_)
        predict = d4p.logistic_regression_prediction(
            nClasses=len(self.classes_),
            fptype=fptype,
            method='defaultDense',
            resultsToEvaluate=resultsToEvaluate)
        res = predict.compute(X, builder.model)
        if resultsToEvaluate == 'computeClassLabels':
            res = res.prediction
            if not np.array_equal(self.classes_, np.arange(0, len(self.classes_))) or \
                    self.classes_.dtype != X.dtype:
                res = self.classes_.take(np.asarray(res, dtype=np.intp))
        elif resultsToEvaluate == 'computeClassProbabilities':
            res = res.probabilities
        elif resultsToEvaluate == 'computeClassLogProbabilities':
            res = res.logProbabilities
        else:
            raise ValueError(
                'resultsToEvaluate must be in [computeClassLabels, \
                computeClassProbabilities, computeClassLogProbabilities]')
        if res.shape[1] == 1:
            res = np.ravel(res)
        return res

    if resultsToEvaluate == 'computeClassLabels':
        logging.info("sklearn.linear_model.LogisticRegression."
                     "predict: " + get_patch_message("sklearn"))
        return LogisticRegression_original.predict(self, X)
    if resultsToEvaluate == 'computeClassProbabilities':
        logging.info("sklearn.linear_model.LogisticRegression."
                     "predict_proba: " + get_patch_message("sklearn"))
        return LogisticRegression_original.predict_proba(self, X)
    if resultsToEvaluate == 'computeClassLogProbabilities':
        logging.info("sklearn.linear_model.LogisticRegression."
                     "predict_log_proba: " + get_patch_message("sklearn"))
        return LogisticRegression_original.predict_log_proba(self, X)
    raise ValueError('resultsToEvaluate must be in [computeClassLabels, \
        computeClassProbabilities, computeClassLogProbabilities]')
Exemplo n.º 14
0
def daal4py_predict(self, X, resultsToEvaluate):
    check_is_fitted(self)
    if sklearn_check_version('1.0'):
        self._check_feature_names(X, reset=False)
    X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32])
    try:
        fptype = getFPType(X)
    except ValueError:
        fptype = None

    if resultsToEvaluate == 'computeClassLabels':
        _function_name = 'predict'
    elif resultsToEvaluate == 'computeClassProbabilities':
        _function_name = 'predict_proba'
    elif resultsToEvaluate == 'computeClassLogProbabilities':
        _function_name = 'predict_log_proba'
    else:
        raise ValueError('resultsToEvaluate must be in [computeClassLabels, \
            computeClassProbabilities, computeClassLogProbabilities]')

    _patching_status = PatchingConditionsChain(
        f"sklearn.linear_model.LogisticRegression.{_function_name}")
    _patching_status.and_conditions(
        [(self.multi_class in ["multinomial", "warn"],
          f"{self.multi_class} multiclass option is not supported. "
          "Only 'multinomial' or 'warn' options are supported."),
         (self.classes_.size == 2, "Number of classes != 2."),
         (resultsToEvaluate == 'computeClassLabels',
          "resultsToEvaluate != 'computeClassLabels'.")],
        conditions_merging=any)
    _dal_ready = _patching_status.and_conditions([
        (not sparse.issparse(X),
         "X is sparse. Sparse input is not supported."),
        (not sparse.issparse(self.coef_),
         "self.coef_ is sparse. Sparse coefficients are not supported."),
        (fptype is not None, "Unable to get dtype.")
    ])

    _patching_status.write_log()
    if _dal_ready:
        n_features = self.coef_.shape[1]
        if X.shape[1] != n_features:
            raise ValueError(
                f'X has {X.shape[1]} features, '
                f'but LogisticRegression is expecting {n_features} features as input'
            )
        builder = d4p.logistic_regression_model_builder(
            X.shape[1], len(self.classes_))
        builder.set_beta(self.coef_, self.intercept_)
        predict = d4p.logistic_regression_prediction(
            nClasses=len(self.classes_),
            fptype=fptype,
            method='defaultDense',
            resultsToEvaluate=resultsToEvaluate)
        res = predict.compute(X, builder.model)
        if resultsToEvaluate == 'computeClassLabels':
            res = res.prediction
            if not np.array_equal(self.classes_, np.arange(0, len(self.classes_))) or \
                    self.classes_.dtype != X.dtype:
                res = self.classes_.take(np.asarray(res, dtype=np.intp))
        elif resultsToEvaluate == 'computeClassProbabilities':
            res = res.probabilities
        elif resultsToEvaluate == 'computeClassLogProbabilities':
            res = res.logProbabilities
        else:
            raise ValueError(
                'resultsToEvaluate must be in [computeClassLabels, \
                computeClassProbabilities, computeClassLogProbabilities]')
        if res.shape[1] == 1:
            res = np.ravel(res)
        return res

    if resultsToEvaluate == 'computeClassLabels':
        return LogisticRegression_original.predict(self, X)
    if resultsToEvaluate == 'computeClassProbabilities':
        return LogisticRegression_original.predict_proba(self, X)
    if resultsToEvaluate == 'computeClassLogProbabilities':
        return LogisticRegression_original.predict_log_proba(self, X)