def main():
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.decision_forest_classification_training(
        5,
        nTrees=10,
        minObservationsInLeafNode=8,
        featuresPerNode=3,
        engine=d4p.engines_mt19937(seed=777),
        varImportance='MDI',
        bootstrap=True,
        resultsToCompute='computeOutOfBagError')

    # Read data. Let's use 3 features per observation
    data = read_csv(infile, range(3), t=np.float32)
    labels = read_csv(infile, range(3, 4), t=np.float32)
    train_result = train_algo.compute(data, labels)
    # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance

    # Now let's do some prediction
    predict_algo = d4p.decision_forest_classification_prediction(5)
    # read test data (with same #features)
    pdata = read_csv(testfile, range(3), t=np.float32)
    plabels = read_csv(testfile, range(3, 4), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert (predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, plabels)
def compute(train_data, train_labels, predict_data, method='defaultDense'):
    # Configure a training object (5 classes)
    train_algo = d4p.decision_forest_classification_training(
        5,
        fptype='float',
        nTrees=10,
        minObservationsInLeafNode=8,
        featuresPerNode=3,
        engine=d4p.engines_mt19937(seed=777),
        varImportance='MDI',
        bootstrap=True,
        resultsToCompute='computeOutOfBagError',
        method=method)
    # Training result provides (depending on parameters) model,
    # outOfBagError, outOfBagErrorPerObservation and/or variableImportance
    train_result = train_algo.compute(train_data, train_labels)

    # now predict using the model from the training above
    predict_algo = d4p.decision_forest_classification_prediction(
        nClasses=5,
        fptype='float',
        resultsToEvaluate="computeClassLabels|computeClassProbabilities",
        votingMethod="unweighted")

    predict_result = predict_algo.compute(predict_data, train_result.model)

    return train_result, predict_result
Exemplo n.º 3
0
    def _daal_predict(self, X):
        X = self._validate_X_predict(X)

        if daal_version < (2020,1):
            dfc_algorithm = daal4py.decision_forest_classification_prediction(
                nClasses = int(self.n_classes_),
                fptype = 'float'
                )
        else:
            dfc_algorithm = daal4py.decision_forest_classification_prediction(
                nClasses = int(self.n_classes_),
                fptype = 'float',
                resultsToEvaluate="computeClassLabels"
                )
        dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)

        pred = dfc_predictionResult.prediction

        return np.take(self.classes_, pred.ravel().astype(np.int, casting='unsafe'))
Exemplo n.º 4
0
def df_clsf_predict(X, training_result, n_classes, verbose=False):

    algorithm = decision_forest_classification_prediction(
        nClasses=n_classes,
        fptype='float',  # we give float here specifically to match sklearn
    )

    result = algorithm.compute(X, training_result.model)

    return result.prediction
Exemplo n.º 5
0
    def daal_predict(self, X):
        X = self._validate_X_predict(X)

        dfc_algorithm = daal4py.decision_forest_classification_prediction(
            nClasses=int(self.n_classes_), fptype='float')
        dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)

        pred = dfc_predictionResult.prediction

        return np.take(self.classes_,
                       pred.ravel().astype(np.int, casting='unsafe'))
Exemplo n.º 6
0
def _daal_predict_proba(self, X):
    X_fptype = getFPType(X)
    dfc_algorithm = daal4py.decision_forest_classification_prediction(
        nClasses=int(self.n_classes_),
        fptype=X_fptype,
        resultsToEvaluate="computeClassProbabilities")
    dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)

    pred = dfc_predictionResult.probabilities

    return pred
Exemplo n.º 7
0
def _daal_predict_proba(self, X):
    if not daal_check_version((2021, 'P', 200)):
        X = self._validate_X_predict(X)
    X_fptype = getFPType(X)
    dfc_algorithm = daal4py.decision_forest_classification_prediction(
        nClasses=int(self.n_classes_),
        fptype=X_fptype,
        resultsToEvaluate="computeClassProbabilities")
    dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)

    pred = dfc_predictionResult.probabilities

    return pred
Exemplo n.º 8
0
def _daal_predict_classifier(self, X):
    X_fptype = getFPType(X)
    dfc_algorithm = daal4py.decision_forest_classification_prediction(
        nClasses=int(self.n_classes_),
        fptype=X_fptype,
        resultsToEvaluate="computeClassLabels")
    if X.shape[1] != self.n_features_in_:
        raise ValueError((f'X has {X.shape[1]} features, '
                          f'but RandomForestClassifier is expecting '
                          f'{self.n_features_in_} features as input'))
    dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)

    pred = dfc_predictionResult.prediction

    return np.take(self.classes_,
                   pred.ravel().astype(np.int64, casting='unsafe'))
Exemplo n.º 9
0
    def predict(cls, input):
        """For the input, do the predictions and return them.
		Args:
			input (a pandas dataframe): The data on which to do the predictions. There will be
				one prediction per row in the dataframe"""
        with open(param_path, "r") as pf:
            params = json.load(pf)
            predict_algo = decision_forest_classification_prediction(
                int(params["nClasses"]),
                fptype=params["fptype"],
                method=params["method"],
                distributed=(True
                             if params["distributed"] == "True" else False))
            dtype = (np.float64
                     if params["fptype"] == "double" else np.float32)
            clf = cls.get_model()
        return predict_algo.compute(input, clf)