Пример #1
0
def test_foreshadow_serialization_adults_small_classification_override():
    from foreshadow.foreshadow import Foreshadow
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression

    np.random.seed(1337)

    data_path = get_file_path("data", "adult_small.csv")

    adult = pd.read_csv(data_path)
    X_df = adult.loc[:, "age":"workclass"]
    y_df = adult.loc[:, "class"]

    X_train, X_test, y_train, y_test = train_test_split(X_df,
                                                        y_df,
                                                        test_size=0.2)

    shadow = Foreshadow(estimator=LogisticRegression(),
                        problem_type=ProblemType.CLASSIFICATION)
    shadow.fit(X_train, y_train)
    score1 = shadow.score(X_test, y_test)

    from foreshadow.intents import IntentType

    shadow.override_intent("age", IntentType.CATEGORICAL)
    shadow.override_intent("workclass", IntentType.CATEGORICAL)
    shadow.fit(X_train, y_train)

    assert shadow.get_intent("age") == IntentType.CATEGORICAL
    assert shadow.get_intent("workclass") == IntentType.CATEGORICAL
    score2 = shadow.score(X_test, y_test)
    print(score1, score2)
Пример #2
0
def test_foreshadow_titanic(tmpdir):
    import pandas as pd

    train_data = pd.read_csv(get_file_path("data", "titanic-train.csv"))
    X_train_df = train_data.loc[:, "Pclass":"Embarked"]
    y_train_df = train_data.loc[:, "Survived"]

    X_train_df = X_train_df.drop(columns=["SibSp", "Parch", "Cabin"])

    X_train, X_test, y_train, y_test = train_test_split(X_train_df,
                                                        y_train_df,
                                                        test_size=0.2,
                                                        random_state=42)

    from foreshadow.estimators import AutoEstimator

    estimator = AutoEstimator(
        problem_type=ProblemType.CLASSIFICATION,
        auto="tpot",
        estimator_kwargs={
            "max_time_mins": 1,
            "random_state": 42
        },
    )

    shadow = Foreshadow(estimator=estimator,
                        problem_type=ProblemType.CLASSIFICATION)

    shadow.override_intent(column_name="Name", intent=IntentType.TEXT)
    shadow.fit(X_train, y_train)

    score = shadow.score(X_test, y_test)
    print(score)