示例#1
0
def test_factorize():
    fact = DateFactorizer(cols=['b'], features=("year", "month"))
    trans = fact.fit_transform(df)
    assert trans.columns.tolist() == ['a', 'b_year', 'b_month']

    # Assert that the transform function works independent of fit_transform
    assert trans.equals(fact.transform(df))
示例#2
0
def test_factorize_attribute_error():
    # also show we can handle a non-iterable in features
    factorizer = DateFactorizer(cols=['b'], features="yr")
    assert_raises(AttributeError, factorizer.fit, df)
示例#3
0
def test_factorize_preserve_original():
    # keep the original columns
    trans = DateFactorizer(cols=['b'],
                           features=("year", "month"),
                           drop_original=False).fit_transform(df)
    assert trans.columns.tolist() == ['a', 'b', 'b_year', 'b_month']
示例#4
0
def test_non_date_factorize():
    # Fails since not a date time
    assert_raises(ValueError, DateFactorizer(cols=["a", "b"]).fit, df)
示例#5
0
def test_date_factorizer_asdf():
    assert_transformer_asdf(DateFactorizer(cols=['b']), df)
示例#6
0
def test_date_factorizer_persistable():
    assert_persistable(DateFactorizer(cols=['b']), location="loc.pkl", X=df)
示例#7
0
from datetime import datetime as dt

# #############################################################################
# create data
data = [[1, dt.strptime("06-01-2018 12:00:05", "%m-%d-%Y %H:%M:%S")],
        [2, dt.strptime("06-02-2018 13:19:12", "%m-%d-%Y %H:%M:%S")],
        [3, dt.strptime("06-03-2018 06:04:17", "%m-%d-%Y %H:%M:%S")],
        [4, dt.strptime("06-04-2018 03:56:32", "%m-%d-%Y %H:%M:%S")],
        [5, None]]

df = pd.DataFrame.from_records(data, columns=["transaction_id", "time"])

# We can extract a multitude of features from date fields. The default will
# grab the year, month, day and hour
print("Default features:")
print(DateFactorizer(cols=['time']).fit_transform(df))

# we can specify more if we'd like:
print("\n+Minutes, +Seconds:")
print(
    DateFactorizer(cols=['time'],
                   features=("year", "month", "day", "hour", "minute",
                             "second")).fit_transform(df))

# And we can retain the old (pre-transform) time features if we wanted
print("\nSame as above, but retain old time column:")
print(
    DateFactorizer(cols=['time'],
                   drop_original=False,
                   features=("year", "month", "day", "hour", "minute",
                             "second")).fit_transform(df))