Exemplo n.º 1
0
def test_online_batch_consistent():

    # Batch

    batch = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    dataset = datasets.ImageSegments()

    batch_metric = metrics.MacroF1()

    for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
        y = x.pop('category')
        y_pred = batch.predict_many(x)
        batch.fit_many(x, y)

        for yt, yp in zip(y, y_pred):
            if yp is not None:
                batch_metric.update(yt, yp)

        if i == 30:
            break

    # Online

    online = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    online_metric = metrics.MacroF1()

    X = pd.read_csv(dataset.path)
    Y = X.pop('category')

    for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
        y_pred = online.predict_one(x)
        online.fit_one(x, y)

        if y_pred is not None:
            online_metric.update(y, y_pred)

        if i == 30:
            break

    assert online_metric.get() == batch_metric.get()
 def build_oracle(self) -> compose.Pipeline:
     model = compose.Pipeline(
         ('scale', preprocessing.StandardScaler()),
         ('learn',
          multiclass.OneVsRestClassifier(
              binary_classifier=linear_model.LogisticRegression())))
     return model
Exemplo n.º 3
0
            else:
                inst = obj()

            yield inst


@pytest.mark.parametrize('estimator, check', [
    pytest.param(
        copy.deepcopy(estimator), check, id=f'{estimator}:{check.__name__}')
    for estimator in list(get_all_estimators()) + [
        feature_extraction.TFIDF(),
        linear_model.LogisticRegression(),
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        preprocessing.StandardScaler() | linear_model.PAClassifier(),
        preprocessing.StandardScaler()
        | multiclass.OneVsRestClassifier(linear_model.LogisticRegression()),
        preprocessing.StandardScaler()
        | multiclass.OneVsRestClassifier(linear_model.PAClassifier()),
        naive_bayes.GaussianNB(),
        preprocessing.StandardScaler(),
        cluster.KMeans(n_clusters=5, seed=42),
        preprocessing.MinMaxScaler(),
        preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
        preprocessing.PolynomialExtender(),
        feature_selection.VarianceThreshold(),
        feature_selection.SelectKBest(similarity=stats.PearsonCorrelation())
    ] for check in utils.estimator_checks.yield_checks(estimator)
])
def test_check_estimator(estimator, check):
    check(estimator)
Exemplo n.º 4
0
            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(binary_classifier=linear_model.LogisticRegression())

            else:
                inst = obj()

            yield inst


@pytest.mark.parametrize('estimator', [
    pytest.param(copy.deepcopy(estimator), id=str(estimator))
    for estimator in list(get_all_estimators()) + [
        feature_extraction.TFIDF(),
        linear_model.LogisticRegression(),
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        preprocessing.StandardScaler() | linear_model.PAClassifier(),
        preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.LogisticRegression()),
        preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier()),
        naive_bayes.GaussianNB(),
        preprocessing.StandardScaler(),
        cluster.KMeans(n_clusters=5, seed=42),
        preprocessing.MinMaxScaler(),
        preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
        preprocessing.PolynomialExtender(),
        feature_selection.VarianceThreshold(),
        feature_selection.SelectKBest(similarity=stats.PearsonCorrelation())
    ]
])
def test_check_estimator(estimator):
    utils.estimator_checks.check_estimator(estimator)
Exemplo n.º 5
0
    optimizer = optim.Adam(lr, beta_1, beta_2, eps)
elif (opt == "FTRLProximal"):
    optimizer = optim.FTRLProximal(alpha, beta, l1, l2)
elif (opt == "Momentum"):
    optimizer = optim.Momentum(lr, rho)
elif (opt == "RMSProp"):
    optimizer = optim.RMSProp(lr, rho, eps)
elif (opt == "VanillaSGD"):
    optimizer = optim.VanillaSGD(lr)
elif (opt == "NesterovMomentum"):
    optimizer = optim.NesterovMomentum(lr, rho)
else:
    optimizer = None

log_reg = linear_model.LogisticRegression(optimizer, l2=l2)
OVRClassifier = multiclass.OneVsRestClassifier(log_reg)

output = {}

while True:

    #wait request
    data = input()

    Xi = json.loads(data)
    y = float(Xi.pop(target))

    output["Predict"] = OVRClassifier.predict_one(Xi)
    output["Truth"] = y

    model = OVRClassifier.fit_one(Xi, y)