Пример #1
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 2
    nFeatures = 20

    # read training data from file with 20 features per observation and 1 class label
    trainfile = "./data/batch/binary_cls_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 20 features per observation
    testfile = "./data/batch/binary_cls_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))
    predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    predict_result = predict_alg.compute(predict_data, train_result.model)

    # the prediction result provides prediction
    assert predict_result.prediction.shape == (predict_data.shape[0],
                                               train_labels.shape[1])

    return (train_result, predict_result, predict_labels)
Пример #2
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses,
                                                     resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities")
    predict_result = predict_alg.compute(predict_data, train_result.model)
    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses)
    predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2)
    assert np.count_nonzero(predict_result.prediction-predict_labels)/predict_labels.shape[0] < 0.025

    return (train_result, predict_result, predict_labels)
Пример #3
0
def main():
    nClasses = 2
    nFeatures = 20

    # read training data from file with 20 features per observation and 1 class label
    # and use only a chunk per process
    trainfile = "./data/batch/binary_cls_train.csv"
    train_data = np.split(read_csv(trainfile, range(nFeatures)), d4p.num_procs())[d4p.my_procid()]
    train_labels = np.split(read_csv(trainfile, range(nFeatures, nFeatures + 1)), d4p.num_procs())[d4p.my_procid()]

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True, distributed=True)
    train_result = train_alg.compute(train_data, train_labels)

    # Now let's do some prediction
    # It operates on the same data on each process
    # read testing data from file with 20 features per observation
    testfile = "./data/batch/binary_cls_test.csv"
    predict_data = read_csv(testfile, range(nFeatures))
    predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1))
    
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    predict_result = predict_alg.compute(predict_data, train_result.model)
    
    # the prediction result provides prediction
    assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1])
    
    return (train_result, predict_result, predict_labels)
def compute(train_data, train_labels, predict_data, nClasses):
    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
    return predict_alg.compute(predict_data, train_result.model), train_result
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    train_x_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    train_y_df = common.get_test_data_df(X=common.y_dfc, size=num_observations)
    num_rows = len(train_x_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        MODEL = d4p.logistic_regression_training(nClasses=2)
        train_result = MODEL.compute(train_x_df, train_y_df)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)
    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
Пример #6
0
 def train_impl(n, d):
     X = np.ones((n,d), dtype=np.double)+.5
     Y = np.ones((n,1), dtype=np.double)
     algo = d4p.logistic_regression_training(2,
                                             penaltyL1=0.1,
                                             penaltyL2=0.1,
                                             interceptFlag=True)
     return algo.compute(X, Y)
Пример #7
0
def compute(train_data, train_labels, predict_data, nClasses):
    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)
    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses,
                                                     resultsToEvaluate="computeClassLabels|computeClassProbabilities|computeClassLogProbabilities")
    return predict_alg.compute(predict_data, train_result.model), train_result
Пример #8
0
def main(readcsv=read_csv, method='defaultDense'):
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = readcsv(trainfile, range(nFeatures))
    train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = readcsv(testfile, range(nFeatures))

    # set parameters and compute predictions
    # previous version has different interface
    from daal4py import __daal_link_version__ as dv
    daal_version = tuple(map(int, (dv[0:4], dv[4:8])))
    if daal_version < (2020, 0):
        predict_alg = d4p.logistic_regression_prediction(
            nClasses=nClasses,
            resultsToCompute=
            "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
        )
    else:
        predict_alg = d4p.logistic_regression_prediction(
            nClasses=nClasses,
            resultsToEvaluate=
            "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities"
        )
    predict_result = predict_alg.compute(predict_data, train_result.model)
    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.probabilities.shape == (predict_data.shape[0],
                                                  nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0],
                                                     nClasses)
    predict_labels = np.loadtxt(testfile,
                                usecols=range(nFeatures, nFeatures + 1),
                                delimiter=',',
                                ndmin=2)
    assert np.count_nonzero(predict_result.prediction -
                            predict_labels) / predict_labels.shape[0] < 0.025

    return (train_result, predict_result, predict_labels)
Пример #9
0
    def train(self, train_data, train_labels):
        dtype = (np.float64 if self.dtype == "double" else np.float32)
        optSolver = None
        #create a solver
        if self.optSolverParam['solverName'] == 'sgd':
            lrs = np.array([[self.optSolverParam['solverLearningRate']]], dtype=dtype)
            batchSize_ = int(self.optSolverParam['solverBatchSize'])
            method = self.optSolverParam["solverMethod"]
            if method == "defaultDense":
                batchSize_ = 1
            optSolver = d4p.optimization_solver_sgd(function = None, learningRateSequence = lrs,
                                                    method = method,
                                                    accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']),
                                                    nIterations = int(self.optSolverParam['solverMaxIterations']),
                                                    batchSize = batchSize_
                                                    )
        if self.optSolverParam['solverName'] == 'lbfgs':
            sls = np.array([[self.optSolverParam['solverStepLength']]], dtype=dtype)
            optSolver = d4p.optimization_solver_lbfgs(function = None,
                                                      stepLengthSequence=sls,
                                                      accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']),
                                                      nIterations = int(self.optSolverParam['solverMaxIterations']),
                                                      batchSize = int(self.optSolverParam['solverBatchSize']),
                                                      correctionPairBatchSize = int(self.optSolverParam['solverCorrectionPairBatchSize']),
                                                      L = int(self.optSolverParam['solverL'])
                                                      )
        if self.optSolverParam['solverName'] == 'adagrad':
            lr = np.array([[self.optSolverParam['solverLearningRate']]], dtype=dtype)
            optSolver = d4p.optimization_solver_adagrad(function = None,
                                                        learningRate=lr,
                                                        accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']),
                                                        nIterations = int(self.optSolverParam['solverMaxIterations']),
                                                        batchSize = int(self.optSolverParam['solverBatchSize'])
                                                        )

        train_alg = d4p.logistic_regression_training(nClasses      = self.nClasses,
                                                     penaltyL1     = self.penaltyL1,
                                                     penaltyL2     = self.penaltyL2,
                                                     interceptFlag = self.interceptFlag,
                                                     fptype        = self.dtype,
                                                     optimizationSolver = optSolver
                                                     )
        self.trainingResult = train_alg.compute(train_data, train_labels)

        return self
Пример #10
0
def main():
    nClasses = 5
    nFeatures = 6

    # read training data from file with 6 features per observation and 1 class label
    trainfile = "./data/batch/logreg_train.csv"
    train_data = read_csv(trainfile, range(nFeatures))
    train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1))

    # set parameters and train
    train_alg = d4p.logistic_regression_training(nClasses=nClasses,
                                                 penaltyL1=0.1,
                                                 penaltyL2=0.1,
                                                 interceptFlag=True)
    train_result = train_alg.compute(train_data, train_labels)

    # read testing data from file with 6 features per observation
    testfile = "./data/batch/logreg_test.csv"
    predict_data = read_csv(testfile, range(nFeatures))
    predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1))

    # set parameters and compute predictions
    predict_alg = d4p.logistic_regression_prediction(
        nClasses=nClasses,
        resultsToCompute=
        "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
    )
    predict_result = predict_alg.compute(predict_data, train_result.model)

    # the prediction result provides prediction, probabilities and logProbabilities
    assert predict_result.prediction.shape == (predict_data.shape[0],
                                               train_labels.shape[1])
    assert predict_result.probabilities.shape == (predict_data.shape[0],
                                                  nClasses)
    assert predict_result.logProbabilities.shape == (predict_data.shape[0],
                                                     nClasses)

    return (train_result, predict_result, predict_labels)
Пример #11
0
from timeit import default_timer as timer
from sklearn.metrics import mean_squared_error
import daal4py as d4p
import numpy as np
import pandas as pd

import common

NUM_LOOPS = 100
d4p.daalinit()

print("Computing for Logistic Regression With Daal")
MODEL = d4p.logistic_regression_training(nClasses=2)
train_result = MODEL.compute(common.X_dfc, common.y_dfc)


def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        predict_algo = d4p.logistic_regression_prediction(
            nClasses=2,