Пример #1
0
def test_nans():
    # nans are ok in transform but not fit or predict (due to sklearn model)
    X, target = get_random_data()
    X[998, 0] = np.nan
    X[999, 1] = np.nan
    afreg = AutoFeatRegressor(verbose=1, feateng_steps=3)
    try:
        _ = afreg.fit_transform(pd.DataFrame(X, columns=["x 1.1", 2, "x/3"]),
                                pd.DataFrame(target))
    except ValueError:
        pass
    else:
        raise AssertionError("fit with NaNs should throw an error")
    _ = afreg.fit_transform(pd.DataFrame(X[:900], columns=["x 1.1", 2, "x/3"]),
                            pd.DataFrame(target[:900]))
    try:
        _ = afreg.predict(pd.DataFrame(X[900:], columns=["x 1.1", 2, "x/3"]))
    except ValueError:
        pass
    else:
        raise AssertionError("predict with NaNs should throw an error")
    df = afreg.transform(pd.DataFrame(X, columns=["x 1.1", 2, "x/3"]))
    assert all([pd.isna(df.iloc[998, 0]),
                pd.isna(df.iloc[999, 1])]), "Original features should be NaNs"
    assert np.sum(
        np.array(pd.isna(df.iloc[998]),
                 dtype=int)) >= 2, "There should be at least 2 NaNs in row 998"
    assert np.sum(
        np.array(pd.isna(df.iloc[999]),
                 dtype=int)) >= 2, "There should be at least 3 NaNs in row 999"
Пример #2
0
def main():
    # Get the dataset from the users GitHub repository
    dataset_path = "https://raw.githubusercontent.com/" + os.environ["GITHUB_REPOSITORY"] +"/master/dataset.csv"
    df = pd.read_csv(dataset_path)
    print()
    print(df.describe())

for steps in range(5):
    np.random.seed(55)
    print("### AutoFeat with %i feateng_steps" % steps)
    afreg = AutoFeatRegressor(verbose=1, feateng_steps=steps)
    df = afreg.fit_transform(df_org, target)
    r2 = afreg.score(df_org, target)
    print("## Final R^2: %.4f" % r2)
    plt.figure()
    plt.scatter(afreg.predict(df_org), target, s=2);
    plt.title("%i FE steps (R^2: %.4f; %i new features)" % (steps, r2, len(afreg.new_feat_cols_)))
afreg = AutoFeatRegressor(verbose=1, feateng_steps=3)
# train on noisy data
df = afreg.fit_transform(df_org, target_noisy)
# test on real targets
print("Final R^2: %.4f" % afreg.score(df, target))
plt.figure()
plt.scatter(afreg.predict(df), target, s=2);
afreg = AutoFeatRegressor(verbose=1, feateng_steps=3)
# train on noisy data
df = afreg.fit_transform(df_org, target_very_noisy)
# test on real targets
print("Final R^2: %.4f" % afreg.score(df, target))
plt.figure()
plt.scatter(afreg.predict(df), target, s=2);