Пример #1
0
def test_bagging():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for n_estimators in [1, 10]:
            for max_depth in [5, 10, None]:
                for max_features in [0.75, 1.0]:
                    dt = DecisionTreeClassifier(max_depth=max_depth,
                                                random_state=5)
                    clf = BaggingClassifier(
                        dt,
                        bootstrap=False,
                        n_estimators=n_estimators,
                        random_state=5,
                        max_features=max_features,
                    )
                    clf.fit(X, y_)
                    clf_ = convert_estimator(clf)

                    for method in METHODS:
                        with warnings.catch_warnings():
                            warnings.simplefilter("ignore")
                            scores = getattr(clf, method)(X)
                        scores_ = getattr(clf_, method)(X_)
                        assert np.allclose(scores.shape, shape(scores_))
                    assert np.allclose(scores, scores_, equal_nan=True)
Пример #2
0
def test_dict_vectorizer():
    dv = DictVectorizer()
    dv.fit(X)
    dv_ = convert_estimator(dv)
    dv_t = dv.transform(X)
    dv_t_ = dv_.transform(X)
    assert np.allclose(dv_t.toarray(), dv_t_.todense())
Пример #3
0
def test_max_abs_scaler():
    tform = MaxAbsScaler()
    tform.fit(X)
    tform_ = convert_estimator(tform)
    X_t = tform.transform(X)
    X_t_ = tform_.transform(X)
    np.allclose(X_t, X_t_)
Пример #4
0
def test_dict_vectorizer_dense():
    dv = DictVectorizer(sparse=False)
    dv.fit(X)
    dv_ = convert_estimator(dv)
    dv_t = dv.transform(X)
    dv_t_ = dv_.transform(X)
    assert np.allclose(dv_t, dv_t_)
Пример #5
0
def test_normalizer():
    for norm in ["l1", "l2", "max"]:
        tform = Normalizer(norm=norm)
        tform.fit(X)
        tform_ = convert_estimator(tform)
        X_t = tform.transform(X)
        X_t_ = tform_.transform(X)
        np.allclose(X_t, X_t_)
Пример #6
0
def test_max_abs_scaler_sparse():
    X_sparse = tosparse(X)
    tform = MaxAbsScaler()
    tform.fit(X)
    tform_ = convert_estimator(tform)
    X_t = tform.transform(X)
    X_t_ = tform_.transform(X_sparse)
    np.allclose(X_t, X_t_.todense())
Пример #7
0
def test_min_max_scaler():
    for feature_range in [(0, 1), (1, 2), (-1, 1)]:
        tform = MinMaxScaler(feature_range=feature_range)
        tform.fit(X)
        tform_ = convert_estimator(tform)
        X_t = tform.transform(X)
        X_t_ = tform_.transform(X)
        np.allclose(X_t, X_t_)
Пример #8
0
def test_tfidf_vectorizer():
    for norm in ["l1", "l2", None]:
        vec = TfidfVectorizer(norm=norm)
        vec.fit(X)
        vec_ = convert_estimator(vec)
        assert np.allclose(
            vec.transform(X).toarray(),
            vec_.transform(X).todense())
Пример #9
0
def test_onehotencoder():
    X0 = [["Male", 1], ["Female", 3], ["Female", 2]]
    X1 = [["Male", 1], ["Female", 27], ["Bananas", 2]]
    for X in [X0, X1]:
        ohe = OneHotEncoder(handle_unknown="ignore")
        ohe.fit(X)
        ohe_ = convert_estimator(ohe)
        assert np.allclose(ohe.transform(X).toarray(), ohe_.transform(X).todense())
Пример #10
0
def test_ordinalencoder():
    X0 = [["Male", 1], ["Female", 3], ["Female", 2]]
    X1 = [["Male", 1], ["Female", 27], ["Bananas", 2]]
    for X in [X0, X1]:
        ohe = OrdinalEncoder()
        ohe.fit(X)
        ohe_ = convert_estimator(ohe)
        assert np.allclose(ohe.transform(X), ohe_.transform(X))
def convert_to_pure_predict():
    #import pickle
    #from sklearn.ensemble import RandomForestClassifier
    classifier_path = "friend_rating_classifier.pkl"
    classifier = db_functions.load_pickle(classifier_path)
    clf_pure_predict = convert_estimator(classifier)
    db_functions.save_pickle(clf_pure_predict,
                             "friend_rating_classifier_pure_predict.pkl")
Пример #12
0
def test_standard_scaler_sparse():
    X_sparse = tosparse(X)
    for with_std in [True, False]:
        tform = StandardScaler(with_mean=False, with_std=with_std)
        tform.fit(X)
        tform_ = convert_estimator(tform)
        X_t = tform.transform(X)
        X_t_ = tform_.transform(X_sparse)
        np.allclose(X_t, X_t_.todense())
Пример #13
0
def test_normalizer_sparse():
    X_sparse = tosparse(X)
    for norm in ["l1", "l2", "max"]:
        tform = Normalizer(norm=norm)
        tform.fit(X)
        tform_ = convert_estimator(tform)
        X_t = tform.transform(X)
        X_t_ = tform_.transform(X_sparse)
        np.allclose(X_t, X_t_.todense())
Пример #14
0
def test_standard_scaler():
    for with_mean in [True, False]:
        for with_std in [True, False]:
            tform = StandardScaler(with_mean=with_mean, with_std=with_std)
            tform.fit(X)
            tform_ = convert_estimator(tform)
            X_t = tform.transform(X)
            X_t_ = tform_.transform(X)
            np.allclose(X_t, X_t_)
Пример #15
0
def test_feature_union_sparse():
    X, y = load_iris(return_X_y=True)
    X_ = tosparse(X.tolist())

    union = FeatureUnion([("ss", StandardScaler(with_mean=False)),
                          ("mms", MaxAbsScaler())])
    union.fit(X, y)
    union_ = convert_estimator(union)
    assert np.allclose(union.transform(X), union_.transform(X_).todense())
Пример #16
0
def test_hashing_vectorizer():
    for norm in ["l1", "l2", None]:
        vec = HashingVectorizer(n_features=2**8, norm=norm)
        vec.fit(X)
        vec_ = convert_estimator(vec)
        X_t = vec.transform(X)
        X_t_ = vec_.transform(X)
        assert np.allclose(
            vec.transform(X).toarray(),
            vec_.transform(X).todense())
Пример #17
0
def test_feature_union():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()

    union = FeatureUnion([
        ("imp_mean", SimpleImputer(strategy="mean")),
        ("imp_median", SimpleImputer(strategy="median")),
    ])
    union.fit(X, y)
    union_ = convert_estimator(union)
    assert np.allclose(union.transform(X), union_.transform(X.tolist()))
Пример #18
0
def test_pipeline():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()

    lr = LogisticRegression(solver="lbfgs", multi_class="auto", max_iter=1000)
    pipe = Pipeline(steps=[
        ("imp", SimpleImputer()),
        ("lr", lr),
    ])
    pipe.fit(X, y)
    pipe_ = convert_estimator(pipe)
    assert np.allclose(pipe.predict_proba(X), pipe_.predict_proba(X.tolist()))
Пример #19
0
def test_dummy():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        clf = DummyClassifier(strategy="prior")
        clf.fit(X, y_)
        clf_ = convert_estimator(clf)

        for method in METHODS:
            scores = getattr(clf, method)(X)
            scores_ = getattr(clf_, method)(X_)
            assert np.allclose(scores.shape, shape(scores_))
            assert np.allclose(scores, scores_, equal_nan=True)
Пример #20
0
    def load(cls, vectorizer_folder, realtime=False):
        """
        Load a saved object.

        Parameters
        ----------
        vectorizer_folder: str
            Folder to load the model from
        realtime: bool
            if true then it is loaded for realtime inference

        """
        with open(pathlib.Path(vectorizer_folder, "vectorizer_query.pkl"),
                  "rb") as pfile:
            model_query = pickle.load(pfile)
        with open(pathlib.Path(vectorizer_folder, "vectorizer_prefix.pkl"),
                  "rb") as pfile:
            model_prefix = pickle.load(pfile)
        with open(pathlib.Path(vectorizer_folder, "delim.json"), "r") as jfile:
            delim = json.load(jfile)["delim"]
        try:
            with open(pathlib.Path(vectorizer_folder, "max_prefix_len.json"),
                      "r") as jfile:
                max_prefix_len = json.load(jfile)["max_prefix_len"]
        except Exception:
            LOGGER.warning(
                "max_prefix_len.json file not found. Max Prefix Len set to null"
            )
            max_prefix_len = None
        if realtime:
            model_query = convert_estimator(model_query)
            model_prefix = convert_estimator(
                model_prefix)  # convert to predict only faster version
        return cls(
            model_query=model_query,
            model_prefix=model_prefix,
            delim=delim,
            max_prefix_len=max_prefix_len,
        )
Пример #21
0
def test_complement():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        clf = ComplementNB()
        clf.fit(X, y_)
        clf_ = convert_estimator(clf)

        for method in METHODS:
            scores = getattr(clf, method)(X)
            scores_ = getattr(clf_, method)(X_)
            assert np.allclose(scores.shape, shape(scores_))
            assert np.allclose(scores, scores_, equal_nan=True)
Пример #22
0
def test_ridge():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for fit_intercept in [True, False]:
            clf = RidgeClassifier(fit_intercept=fit_intercept)
            clf.fit(X, y_)
            clf_ = convert_estimator(clf)

            for method in METHODS:
                scores = getattr(clf, method)(X)
                scores_ = getattr(clf_, method)(X_)
                assert np.allclose(scores.shape, shape(scores_))
                assert np.allclose(scores, scores_)
Пример #23
0
def convert_classifier(path, origin: str):
    # convert to pure python estimator
    dir_path = os.path.dirname(path)
    filename = os.path.basename(path)
    filename, _ = filename.split(".")

    print("Loading classifier...")
    if origin.lower() == 'simba':
        clf = load_classifier_SIMBA(path)
        clf_pure_predict = convert_estimator(clf)
        with open(dir_path + "/" + filename + "_pure.sav", "wb") as f:
            pickle.dump(clf_pure_predict, f)

    elif origin.lower() == 'bsoid':
        clf_pack = load_classifier_BSOID(path)
        # bsoid exported classfier has format [a, b, c, clf, d, e]
        clf_pure_predict = convert_estimator(clf_pack[3])
        clf_pack[3] = clf_pure_predict
        with open(dir_path + "/" + filename + "_pure.sav", "wb") as f:
            joblib.dump(clf_pack, f)
    else:
        raise ValueError(f'{origin} is not a valid classifier origin.')

    print(f"Converted Classifier {filename}")
Пример #24
0
def test_extra_tree_clf():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for max_depth in [5, 10, None]:
            clf = ExtraTreeClassifier()
            clf.fit(X, y_)
            clf_ = convert_estimator(clf)

            for method in METHODS:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    scores = getattr(clf, method)(X)
                scores_ = getattr(clf_, method)(X_)
                assert np.allclose(scores.shape, shape(scores_))
                assert np.allclose(scores, scores_, equal_nan=True)
Пример #25
0
def test_extra_tree_reg():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [(y == 0).astype(int), (y == 2).astype(int)]:
        for max_depth in [5, 10, None]:
            clf = ExtraTreeRegressor(max_depth=max_depth, random_state=5)
            clf.fit(X, y_)
            clf_ = convert_estimator(clf)

            for method in ["predict"]:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    scores = getattr(clf, method)(X)
                scores_ = getattr(clf_, method)(X_)
                assert np.allclose(scores.shape, shape(scores_))
                assert np.allclose(scores, scores_, equal_nan=True)
Пример #26
0
def test_decision_tree_clf():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    X_sparse = tosparse(X_)
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for max_depth in [5, 10, None]:
            clf = DecisionTreeClassifier(max_depth=max_depth, random_state=5)
            clf.fit(X, y_)
            clf_ = convert_estimator(clf)

            for method in METHODS:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    scores = getattr(clf, method)(X)
                scores_ = getattr(clf_, method)(X_)
                scores_sparse = getattr(clf_, method)(X_sparse)
                assert np.allclose(scores, scores_, equal_nan=True)
                assert np.allclose(scores, scores_sparse, equal_nan=True)
Пример #27
0
def test_missing_indicator():
    X, y = load_iris(return_X_y=True)
    for missing_values in [np.nan, X[0][0], X[-1][1]]:
        X, y = load_iris(return_X_y=True)
        if np.isnan(missing_values):
            X.ravel()[np.random.choice(X.size, 20, replace=False)] = np.nan
        X_ = X.tolist()
        for features in ["missing-only", "all"]:
            imp = MissingIndicator(
                features=features, missing_values=missing_values, error_on_new=False
            )
            imp.fit(X)
            imp_ = convert_estimator(imp)

            X_t = getattr(imp, "transform")(X)
            X_t_ = getattr(imp_, "transform")(X_)
            assert np.allclose(X_t.shape, shape(X_t_))
            assert np.allclose(X_t, X_t_)
Пример #28
0
def test_xgboost():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for n_estimators in [2, 10]:
            for max_depth in [3, 10]:
                clf = XGBClassifier(
                    booster="gbtree",
                    random_state=5,
                    n_estimators=n_estimators,
                    max_depth=max_depth,
                )
                clf.fit(X, y_)
                clf_ = convert_estimator(clf)
                for method in METHODS:
                    scores = getattr(clf, method)(X)
                    scores_ = getattr(clf_, method)(X_)
                    assert np.allclose(scores, scores_, equal_nan=True)
Пример #29
0
def test_sgd():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for loss in LOSSES:
            for fit_intercept in [True, False]:
                clf = SGDClassifier(fit_intercept=fit_intercept,
                                    max_iter=MAX_ITER,
                                    tol=TOL,
                                    loss=loss)
                clf.fit(X, y_)
                clf_ = convert_estimator(clf)

                for method in METHODS:
                    if hasattr(clf, method) and hasattr(clf_, method):
                        with warnings.catch_warnings():
                            warnings.simplefilter("ignore")
                            scores = getattr(clf, method)(X)
                        scores_ = getattr(clf_, method)(X_)
                        assert np.allclose(scores.shape, shape(scores_))
                        assert np.allclose(scores, scores_, equal_nan=True)
Пример #30
0
def test_logistic():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    X_sparse = tosparse(X_)
    for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]:
        for multi_class in ["ovr", "multinomial"]:
            for fit_intercept in [True, False]:
                clf = LogisticRegression(
                    solver=SOLVER,
                    multi_class=multi_class,
                    fit_intercept=fit_intercept,
                    max_iter=MAX_ITER,
                )
                clf.fit(X, y_)
                clf_ = convert_estimator(clf)

                for method in METHODS:
                    scores = getattr(clf, method)(X)
                    scores_ = getattr(clf_, method)(X_)
                    scores_sparse = getattr(clf_, method)(X_sparse)
                    assert np.allclose(scores, scores_)
                    assert np.allclose(scores, scores_sparse, equal_nan=True)