示例#1
0
def main(readcsv=read_csv, method='defaultDense'):
    nFeatures = 3
    nClasses = 5
    maxIterations = 200
    minObservationsInLeafNode = 8
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    # previous version has different interface
    from daal4py import __daal_link_version__ as dv
    daal_version = tuple(map(int, (dv[0:4], dv[4:8])))
    if daal_version < (2020, 0):
        train_algo = d4p.gbt_classification_training(
            nClasses=nClasses,
            maxIterations=maxIterations,
            minObservationsInLeafNode=minObservationsInLeafNode,
            featuresPerNode=nFeatures)
    else:
        train_algo = d4p.gbt_classification_training(
            nClasses=nClasses,
            maxIterations=maxIterations,
            minObservationsInLeafNode=minObservationsInLeafNode,
            featuresPerNode=nFeatures,
            varImportance='weight|totalCover|cover|totalGain|gain')

    # Read data. Let's use 3 features per observation
    data = readcsv(infile, range(3), t=np.float32)
    labels = readcsv(infile, range(3, 4), t=np.float32)
    train_result = train_algo.compute(data, labels)

    # Now let's do some prediction
    # previous version has different interface
    if daal_version < (2020, 0):
        predict_algo = d4p.gbt_classification_prediction(nClasses=nClasses)
    else:
        predict_algo = d4p.gbt_classification_prediction(
            nClasses=nClasses,
            resultsToEvaluate="computeClassLabels|computeClassProbabilities")
    # read test data (with same #features)
    pdata = readcsv(testfile, range(3), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    plabels = readcsv(testfile, range(3, 4), t=np.float32)
    assert np.count_nonzero(predict_result.prediction -
                            plabels) / pdata.shape[0] < 0.022

    return (train_result, predict_result, plabels)
示例#2
0
def main():
    nFeatures = 3
    nClasses = 5
    maxIterations = 40
    minObservationsInLeafNode = 8
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.gbt_classification_training(
        nClasses=nClasses,
        maxIterations=maxIterations,
        minObservationsInLeafNode=minObservationsInLeafNode,
        featuresPerNode=nFeatures)

    # Read data. Let's use 3 features per observation
    data = read_csv(infile, range(3))
    labels = read_csv(infile, range(3, 4))
    train_result = train_algo.compute(data, labels)

    # Now let's do some prediction
    predict_algo = d4p.gbt_classification_prediction(5)
    # read test data (with same #features)
    pdata = read_csv(testfile, range(3))
    plabels = read_csv(testfile, range(3, 4))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert (predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, plabels)
示例#3
0
def main(readcsv=read_csv, method='defaultDense'):
    nFeatures = 3
    nClasses = 5
    maxIterations = 200
    minObservationsInLeafNode = 8
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.gbt_classification_training(
        nClasses=nClasses,
        maxIterations=maxIterations,
        minObservationsInLeafNode=minObservationsInLeafNode,
        featuresPerNode=nFeatures)

    # Read data. Let's use 3 features per observation
    data = readcsv(infile, range(3), t=np.float32)
    labels = readcsv(infile, range(3, 4), t=np.float32)
    train_result = train_algo.compute(data, labels)

    # Now let's do some prediction
    predict_algo = d4p.gbt_classification_prediction(5)
    # read test data (with same #features)
    pdata = readcsv(testfile, range(3), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    plabels = readcsv(testfile, range(3, 4), t=np.float32)
    assert np.count_nonzero(predict_result.prediction -
                            plabels) / pdata.shape[0] < 0.022

    return (train_result, predict_result, plabels)
示例#4
0
def main(readcsv=pd_read_csv, method='defaultDense'):
    # Path to data
    train_file = "./data/batch/df_classification_train.csv"
    test_file = "./data/batch/df_classification_test.csv"

    # Data reading
    X_train = readcsv(train_file, range(3), t=np.float32)
    y_train = readcsv(train_file, range(3, 4), t=np.float32)
    X_test = readcsv(test_file, range(3), t=np.float32)
    y_test = readcsv(test_file, range(3, 4), t=np.float32)

    # Datasets creation
    lgb_train = lgb.Dataset(X_train,
                            np.array(y_train).reshape(X_train.shape[0]),
                            free_raw_data=False)

    # training parameters setting
    params = {
        'max_bin': 256,
        'scale_pos_weight': 2,
        'lambda_l2': 1,
        'alpha': 0.9,
        'max_depth': 8,
        'num_leaves': 2**8,
        'verbose': -1,
        'objective': 'multiclass',
        'learning_rate': 0.3,
        'num_class': 5,
    }

    # Training
    lgb_model = lgb.train(params,
                          lgb_train,
                          valid_sets=lgb_train,
                          verbose_eval=False)

    # LightGBM prediction
    lgb_prediction = np.argmax(lgb_model.predict(X_test), axis=1)
    lgb_errors_count = np.count_nonzero(lgb_prediction - np.ravel(y_test))

    # Conversion to daal4py
    daal_model = d4p.get_gbt_model_from_lightgbm(lgb_model)

    # daal4py prediction
    daal_predict_algo = d4p.gbt_classification_prediction(
        nClasses=params["num_class"],
        resultsToEvaluate="computeClassLabels",
        fptype='float')
    daal_prediction = daal_predict_algo.compute(X_test, daal_model)
    daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test)
    assert np.absolute(lgb_errors_count - daal_errors_count) == 0

    return (lgb_prediction, lgb_errors_count,
            np.ravel(daal_prediction.prediction), daal_errors_count,
            np.ravel(y_test))
def main(readcsv=pd_read_csv, method='defaultDense'):
    # Path to data
    train_file = "./data/batch/df_classification_train.csv"
    test_file = "./data/batch/df_classification_test.csv"

    # Data reading
    X_train = readcsv(train_file, range(3), t=np.float32)
    y_train = readcsv(train_file, range(3, 4), t=np.float32)
    X_test = readcsv(test_file, range(3), t=np.float32)
    y_test = readcsv(test_file, range(3, 4), t=np.float32)

    # Datasets creation
    xgb_train = xgb.DMatrix(X_train, label=np.array(y_train))
    xgb_test = xgb.DMatrix(X_test, label=np.array(y_test))

    # training parameters setting
    params = {
        'max_bin': 256,
        'scale_pos_weight': 2,
        'lambda_l2': 1,
        'alpha': 0.9,
        'max_depth': 8,
        'num_leaves': 2**8,
        'verbosity': 0,
        'objective': 'multi:softmax',
        'learning_rate': 0.3,
        'num_class': 5,
    }

    # Training
    xgb_model = xgb.train(params, xgb_train, num_boost_round=100)

    # XGBoost prediction
    xgb_prediction = xgb_model.predict(xgb_test)
    xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(y_test))

    # Conversion to daal4py
    daal_model = d4p.get_gbt_model_from_xgboost(xgb_model)

    # daal4py prediction
    daal_predict_algo = d4p.gbt_classification_prediction(
        nClasses=params["num_class"],
        resultsToEvaluate="computeClassLabels",
        fptype='float')
    daal_prediction = daal_predict_algo.compute(X_test, daal_model)
    daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test)
    assert np.absolute(xgb_errors_count - daal_errors_count) == 0

    return (xgb_prediction, xgb_errors_count,
            np.ravel(daal_prediction.prediction), daal_errors_count,
            np.ravel(y_test))
def main(readcsv=pd_read_csv, method='defaultDense'):
    # Path to data
    train_file = "./data/batch/df_classification_train.csv"
    test_file = "./data/batch/df_classification_test.csv"

    # Data reading
    X_train = readcsv(train_file, range(3), t=np.float32)
    y_train = readcsv(train_file, range(3, 4), t=np.float32)
    X_test = readcsv(test_file, range(3), t=np.float32)
    y_test = readcsv(test_file, range(3, 4), t=np.float32)

    # Datasets creation
    cb_train = cb.Pool(X_train, label=np.array(y_train))
    cb_test = cb.Pool(X_test, label=np.array(y_test))

    # training parameters setting
    params = {
        'reg_lambda': 1,
        'max_depth': 8,
        'num_leaves': 2**8,
        'verbose': 0,
        'objective': 'MultiClass',
        'learning_rate': 0.3,
        'n_estimators': 100,
        'classes_count': 5,
    }

    # Training
    cb_model = cb.CatBoost(params)
    cb_model.fit(cb_train)

    # Catboost prediction
    cb_prediction = cb_model.predict(cb_test, prediction_type='Class').T[0]
    cb_errors_count = np.count_nonzero(cb_prediction - np.ravel(y_test))

    # Conversion to daal4py
    daal_model = d4p.get_gbt_model_from_catboost(cb_model)

    # daal4py prediction
    daal_predict_algo = d4p.gbt_classification_prediction(
        nClasses=params['classes_count'],
        resultsToEvaluate="computeClassLabels",
        fptype='float')
    daal_prediction = daal_predict_algo.compute(X_test, daal_model)
    daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test)
    assert np.absolute(cb_errors_count - daal_errors_count) == 0

    return (cb_prediction, cb_errors_count,
            np.ravel(daal_prediction.prediction), daal_errors_count,
            np.ravel(y_test))
示例#7
0
    def _predict(self, X, resultsToEvaluate):
        # Check is fit had been called
        check_is_fitted(self, ['n_features_', 'n_classes_'])
        
        # Input validation
        X = check_array(X, dtype=[np.single, np.double])
        if X.shape[1] != self.n_features_:
            raise ValueError('Shape of input is different from what was seen in `fit`')

        # Trivial case
        if self.n_classes_ == 1:
            return np.full(X.shape[0], self.classes_[0])

        if not hasattr(self, 'daal_model_'):
            raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. "
                              "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__))

        # Define type of data
        fptype = getFPType(X)

        # Prediction
        if daal_version < (2020,1):
            predict_algo = d4p.gbt_classification_prediction(fptype=fptype,
                                                             nClasses=self.n_classes_)
        else:
            predict_algo = d4p.gbt_classification_prediction(fptype=fptype,
                                                             nClasses=self.n_classes_,
                                                             resultsToEvaluate=resultsToEvaluate)
        predict_result = predict_algo.compute(X, self.daal_model_)

        if resultsToEvaluate == "computeClassLabels":
            # Decode labels
            le = preprocessing.LabelEncoder()
            le.classes_ = self.classes_
            return le.inverse_transform(predict_result.prediction.ravel().astype(np.int64, copy=False))
        else:
            return predict_result.probabilities
示例#8
0
train_metric = None
if not X_train.equals(X_test):
    y_train_pred = model_lgbm.predict(X_train)
    train_metric = metric_func(y_train, y_train_pred)

t_lgbm_pred, y_test_pred = bench.measure_function_time(model_lgbm.predict,
                                                       X_test,
                                                       params=params)
test_metric_lgbm = metric_func(y_test, y_test_pred)

t_trans, model_daal = bench.measure_function_time(
    daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params)

if hasattr(params, 'n_classes'):
    predict_algo = daal4py.gbt_classification_prediction(
        nClasses=params.n_classes,
        resultsToEvaluate='computeClassLabels',
        fptype='float')
    t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute,
                                                         X_test,
                                                         model_daal,
                                                         params=params)
    test_metric_daal = metric_func(y_test, daal_pred.prediction)
else:
    predict_algo = daal4py.gbt_regression_prediction()
    t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute,
                                                         X_test,
                                                         model_daal,
                                                         params=params)
    test_metric_daal = metric_func(y_test, daal_pred.prediction)

utils.print_output(