Пример #1
0
def test_ovr():
    X = np.array([[0, 0, 1, 1], [1, 1, 0, 0], [-1, -1, -1, -1]] * 100)
    y = np.array([0, 1, 2] * 100)
    ovr = DistOneVsRestClassifier(LogisticRegression(solver="liblinear"))
    ovr.fit(X, y)
    preds = ovr.predict(X[:3])
    assert np.allclose(preds, np.array([0, 1, 2]))
Пример #2
0
def test_multiclass(spark_session):
    sc = spark_session.sparkContext

    # variables
    solver = "liblinear"
    test_size = 0.2

    # load sample data (binary target)
    data = load_digits()
    X = data["data"]
    y = data["target"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=10
    )

    ### distributed one vs rest
    model = DistOneVsRestClassifier(LogisticRegression(solver=solver), sc)
    # distributed fitting with spark
    model.fit(X_train, y_train)
    # predictions on the driver
    preds = model.predict(X_test)

    assert preds.shape == y_test.shape
Пример #3
0
# load sample data (binary target)
data = load_digits()
X = data["data"]
y = data["target"]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=test_size,
                                                    random_state=10)

### distributed one vs rest
model = DistOneVsRestClassifier(LogisticRegression(solver=solver), sc)
# distributed fitting with spark
model.fit(X_train, y_train)
# predictions on the driver
preds = model.predict(X_test)
probs = model.predict_proba(X_test)

# results
print("-- One Vs Rest --")
print("Weighted F1: {0}".format(
    f1_score(y_test, preds, average=scoring_average)))
print("Precision: {0}".format(
    precision_score(y_test, preds, average=scoring_average)))
print("Recall: {0}".format(recall_score(y_test, preds,
                                        average=scoring_average)))
print(pickle.loads(pickle.dumps(model)))

### distributed one vs one
model = DistOneVsOneClassifier(LogisticRegression(solver=solver), sc)
# distributed fitting with spark