def test_foreshadow_serialization_adults_small_classification_override(): from foreshadow.foreshadow import Foreshadow import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression np.random.seed(1337) data_path = get_file_path("data", "adult_small.csv") adult = pd.read_csv(data_path) X_df = adult.loc[:, "age":"workclass"] y_df = adult.loc[:, "class"] X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2) shadow = Foreshadow(estimator=LogisticRegression(), problem_type=ProblemType.CLASSIFICATION) shadow.fit(X_train, y_train) score1 = shadow.score(X_test, y_test) from foreshadow.intents import IntentType shadow.override_intent("age", IntentType.CATEGORICAL) shadow.override_intent("workclass", IntentType.CATEGORICAL) shadow.fit(X_train, y_train) assert shadow.get_intent("age") == IntentType.CATEGORICAL assert shadow.get_intent("workclass") == IntentType.CATEGORICAL score2 = shadow.score(X_test, y_test) print(score1, score2)
def test_foreshadow_titanic(tmpdir): import pandas as pd train_data = pd.read_csv(get_file_path("data", "titanic-train.csv")) X_train_df = train_data.loc[:, "Pclass":"Embarked"] y_train_df = train_data.loc[:, "Survived"] X_train_df = X_train_df.drop(columns=["SibSp", "Parch", "Cabin"]) X_train, X_test, y_train, y_test = train_test_split(X_train_df, y_train_df, test_size=0.2, random_state=42) from foreshadow.estimators import AutoEstimator estimator = AutoEstimator( problem_type=ProblemType.CLASSIFICATION, auto="tpot", estimator_kwargs={ "max_time_mins": 1, "random_state": 42 }, ) shadow = Foreshadow(estimator=estimator, problem_type=ProblemType.CLASSIFICATION) shadow.override_intent(column_name="Name", intent=IntentType.TEXT) shadow.fit(X_train, y_train) score = shadow.score(X_test, y_test) print(score)