Exemplo n.º 1
0
def test_serialization():
    times = [
        Timedelta(1, unit='w'),
        Timedelta(3, unit='d', inclusive=True),
        Timedelta(5, unit='o', entity='log'),
    ]

    dictionaries = [
        {
            'value': 1,
            'unit': 'w',
            'entity_id': None,
            'inclusive': False
        },
        {
            'value': 3,
            'unit': 'd',
            'entity_id': None,
            'inclusive': True
        },
        {
            'value': 5,
            'unit': 'o',
            'entity_id': 'log',
            'inclusive': False
        },
    ]

    for td, expected in zip(times, dictionaries):
        assert expected == td.get_arguments()

    for expected, dictionary in zip(times, dictionaries):
        assert expected == Timedelta.from_dictionary(dictionary)
Exemplo n.º 2
0
def test_deltas_week(es):
    df = es.related_instances('customers', 'log', 0)
    all_times = df['datetime'].sort_values().tolist()
    delta_week = Timedelta(1, "w")
    delta_days = Timedelta(7, "d")

    assert all_times[0] + delta_days == all_times[0] + delta_week
Exemplo n.º 3
0
def test_accepts_relative_training_window(datetime_es):
    feature_matrix, features = dfs(entityset=datetime_es,
                                   target_entity="transactions")

    feature_matrix_2, features_2 = dfs(entityset=datetime_es,
                                       target_entity="transactions",
                                       cutoff_time=pd.Timestamp("2012-4-1 04:00"))

    feature_matrix_3, features_3 = dfs(entityset=datetime_es,
                                       target_entity="transactions",
                                       cutoff_time=pd.Timestamp("2012-4-1 04:00"),
                                       training_window=Timedelta("3 months"))

    feature_matrix_4, features_4 = dfs(entityset=datetime_es,
                                       target_entity="transactions",
                                       cutoff_time=pd.Timestamp("2012-4-1 04:00"),
                                       training_window="3 months")

    # Test case for leap years
    feature_matrix_5, features_5 = dfs(entityset=datetime_es,
                                       target_entity="transactions",
                                       cutoff_time=pd.Timestamp("2012-2-29 04:00"),
                                       training_window=Timedelta("1 year"))

    assert (feature_matrix.index == [1, 2, 3, 4, 5]).all()
    assert (feature_matrix_2.index == [1, 2, 3, 4]).all()
    assert (feature_matrix_3.index == [2, 3, 4]).all()
    assert (feature_matrix_4.index == [2, 3, 4]).all()
    assert (feature_matrix_5.index == [1, 2]).all()
Exemplo n.º 4
0
def test_delta_with_observations(es):
    four_delta = Timedelta(4, 'observations', 'log')
    assert not four_delta.is_absolute()
    assert four_delta.value == 4

    neg_four_delta = -four_delta
    assert not neg_four_delta.is_absolute()
    assert neg_four_delta.value == -4
Exemplo n.º 5
0
def test_serialization():
    times = [
        Timedelta(1, unit='w'),
        Timedelta(3, unit='d'),
        Timedelta(5, unit='o')
    ]

    dictionaries = [
        {'value': 1, 'unit': 'w'},
        {'value': 3, 'unit': 'd'},
        {'value': 5, 'unit': 'o'}
    ]

    for td, expected in zip(times, dictionaries):
        assert expected == td.get_arguments()

    for expected, dictionary in zip(times, dictionaries):
        assert expected == Timedelta.from_dictionary(dictionary)

    # Test multiple temporal parameters separately since it is not deterministic
    mult_time = {'years': 4, 'months': 3, 'days': 2}
    mult_td = Timedelta(mult_time)

    # Serialize
    td_units = mult_td.get_arguments()['unit']
    td_values = mult_td.get_arguments()['value']
    arg_list = list(zip(td_values, td_units))

    assert (4, 'Y') in arg_list
    assert (3, 'mo') in arg_list
    assert (2, 'd') in arg_list

    # Deserialize
    assert mult_td == Timedelta.from_dictionary({'value': [4, 3, 2],
                                                 'unit': ['Y', 'mo', 'd']})
Exemplo n.º 6
0
def test_deltas_week(es):
    customer_id = 0
    sessions_df = to_pandas(es['sessions'])
    sessions_df = sessions_df[sessions_df['customer_id'] == customer_id]
    log_df = to_pandas(es['log'])
    log_df = log_df[log_df['session_id'].isin(sessions_df['id'])]
    all_times = log_df['datetime'].sort_values().tolist()
    delta_week = Timedelta(1, "w")
    delta_days = Timedelta(7, "d")

    assert all_times[0] + delta_days == all_times[0] + delta_week
Exemplo n.º 7
0
def test_deltas_week(es):
    customer_id = 0
    sessions_df = to_pandas(es["sessions"])
    sessions_df = sessions_df[sessions_df["customer_id"] == customer_id]
    log_df = to_pandas(es["log"])
    log_df = log_df[log_df["session_id"].isin(sessions_df["id"])]
    all_times = log_df["datetime"].sort_values().tolist()
    delta_week = Timedelta(1, "w")
    delta_days = Timedelta(7, "d")

    assert all_times[0] + delta_days == all_times[0] + delta_week
Exemplo n.º 8
0
def test_deltas_week(es):
    customer_id = 0
    sessions_df = es['sessions'].df
    if isinstance(sessions_df, dd.DataFrame):
        sessions_df = sessions_df.compute()
    sessions_df = sessions_df[sessions_df['customer_id'] == customer_id]
    log_df = es['log'].df
    if isinstance(log_df, dd.DataFrame):
        log_df = log_df.compute()
    log_df = log_df[log_df['session_id'].isin(sessions_df['id'])]
    all_times = log_df['datetime'].sort_values().tolist()
    delta_week = Timedelta(1, "w")
    delta_days = Timedelta(7, "d")

    assert all_times[0] + delta_days == all_times[0] + delta_week
Exemplo n.º 9
0
def test_accepts_relative_training_window(datetime_es):
    # TODO: Update to use Dask dataframes when issue #882 is closed
    feature_matrix, _ = dfs(entityset=datetime_es,
                            target_dataframe_name="transactions")

    feature_matrix_2, _ = dfs(
        entityset=datetime_es,
        target_dataframe_name="transactions",
        cutoff_time=pd.Timestamp("2012-4-1 04:00"),
    )

    feature_matrix_3, _ = dfs(
        entityset=datetime_es,
        target_dataframe_name="transactions",
        cutoff_time=pd.Timestamp("2012-4-1 04:00"),
        training_window=Timedelta("3 months"),
    )

    feature_matrix_4, _ = dfs(
        entityset=datetime_es,
        target_dataframe_name="transactions",
        cutoff_time=pd.Timestamp("2012-4-1 04:00"),
        training_window="3 months",
    )

    assert (feature_matrix.index == [1, 2, 3, 4, 5]).all()
    assert (feature_matrix_2.index == [1, 2, 3, 4]).all()
    assert (feature_matrix_3.index == [2, 3, 4]).all()
    assert (feature_matrix_4.index == [2, 3, 4]).all()

    # Test case for leap years
    feature_matrix_5, _ = dfs(
        entityset=datetime_es,
        target_dataframe_name="transactions",
        cutoff_time=pd.Timestamp("2012-2-29 04:00"),
        training_window=Timedelta("1 year"),
        include_cutoff_time=True,
    )
    assert (feature_matrix_5.index == [2]).all()

    feature_matrix_5, _ = dfs(
        entityset=datetime_es,
        target_dataframe_name="transactions",
        cutoff_time=pd.Timestamp("2012-2-29 04:00"),
        training_window=Timedelta("1 year"),
        include_cutoff_time=False,
    )
    assert (feature_matrix_5.index == [1, 2]).all()
Exemplo n.º 10
0
def test_delta_with_time_unit_matches_pandas(es):
    customer_id = 0
    sessions_df = es['sessions'].df
    if isinstance(sessions_df, dd.DataFrame):
        sessions_df = sessions_df.compute()
    sessions_df = sessions_df[sessions_df['customer_id'] == customer_id]
    log_df = es['log'].df
    if isinstance(log_df, dd.DataFrame):
        log_df = log_df.compute()
    log_df = log_df[log_df['session_id'].isin(sessions_df['id'])]
    all_times = log_df['datetime'].sort_values().tolist()

    # 4 observation delta
    value = 4
    unit = 'h'
    delta = Timedelta(value, unit)
    neg_delta = -delta
    # first plus 4 obs is fifth
    assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit)
    # using negative
    assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit)

    # fifth minus 4 obs is first
    assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit)
    # using negative
    assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
Exemplo n.º 11
0
def test_feature_takes_timedelta_string(es):
    feature = Feature(
        Feature(es["log"].ww["id"]),
        parent_dataframe_name="customers",
        use_previous="1 day",
        primitive=Count,
    )
    assert feature.use_previous == Timedelta(1, "d")
Exemplo n.º 12
0
def test_delta_with_observations(es):
    four_delta = Timedelta(4, 'observations')
    assert not four_delta.is_absolute()
    assert four_delta.get_value('o') == 4

    neg_four_delta = -four_delta
    assert not neg_four_delta.is_absolute()
    assert neg_four_delta.get_value('o') == -4

    time = pd.to_datetime('2019-05-01')

    error_txt = 'Invalid unit'
    with pytest.raises(Exception, match=error_txt):
        time + four_delta

    with pytest.raises(Exception, match=error_txt):
        time - four_delta
Exemplo n.º 13
0
def test_serialization():
    times = [
        Timedelta(1, unit="w"),
        Timedelta(3, unit="d"),
        Timedelta(5, unit="o")
    ]

    dictionaries = [
        {
            "value": 1,
            "unit": "w"
        },
        {
            "value": 3,
            "unit": "d"
        },
        {
            "value": 5,
            "unit": "o"
        },
    ]

    for td, expected in zip(times, dictionaries):
        assert expected == td.get_arguments()

    for expected, dictionary in zip(times, dictionaries):
        assert expected == Timedelta.from_dictionary(dictionary)

    # Test multiple temporal parameters separately since it is not deterministic
    mult_time = {"years": 4, "months": 3, "days": 2}
    mult_td = Timedelta(mult_time)

    # Serialize
    td_units = mult_td.get_arguments()["unit"]
    td_values = mult_td.get_arguments()["value"]
    arg_list = list(zip(td_values, td_units))

    assert (4, "Y") in arg_list
    assert (3, "mo") in arg_list
    assert (2, "d") in arg_list

    # Deserialize
    assert mult_td == Timedelta.from_dictionary({
        "value": [4, 3, 2],
        "unit": ["Y", "mo", "d"]
    })
Exemplo n.º 14
0
def test_delta_with_observations(es):
    df = es.related_instances('customers', 'log', 0)
    all_times = df['datetime'].sort_values().tolist()

    # 4 observation delta
    four_delta = Timedelta(4, 'observations', 'log')('customers',
                                                     instance_id=0,
                                                     entityset=es)

    neg_four_delta = -four_delta
    # first plus 4 obs is fifth
    assert all_times[0] + four_delta == all_times[4]
    # using negative
    assert all_times[0] - neg_four_delta == all_times[4]

    # fifth minus 4 obs is first
    assert all_times[4] - four_delta == all_times[0]
    # using negative
    assert all_times[4] + neg_four_delta == all_times[0]

    # Test 0 observations
    zero_delta = Timedelta(0, 'observations', 'log')('customers',
                                                     instance_id=0,
                                                     entityset=es)
    neg_zero_delta = -zero_delta
    assert all_times[0] + zero_delta == all_times[0]
    assert all_times[0] - zero_delta == all_times[0]
    assert all_times[0] + neg_zero_delta == all_times[0]
    assert all_times[0] - neg_zero_delta == all_times[0]

    # Errors when trying to add or subtract more observations than available
    large_delta = Timedelta(99999, 'observations', 'log')('customers',
                                                          instance_id=0,
                                                          entityset=es)
    with pytest.raises(NotEnoughData):
        all_times[0] + large_delta
    with pytest.raises(NotEnoughData):
        all_times[0] - large_delta
Exemplo n.º 15
0
def test_serialization():
    times = [
        Timedelta(1, unit='w'),
        Timedelta(3, unit='d'),
        Timedelta(5, unit='o'),
    ]

    dictionaries = [{
        'value': 1,
        'unit': 'w'
    }, {
        'value': 3,
        'unit': 'd'
    }, {
        'value': 5,
        'unit': 'o'
    }]

    for td, expected in zip(times, dictionaries):
        assert expected == td.get_arguments()

    for expected, dictionary in zip(times, dictionaries):
        assert expected == Timedelta.from_dictionary(dictionary)
Exemplo n.º 16
0
def test_delta_with_time_unit_matches_pandas(es):
    df = es.related_instances('customers', 'log', 0)
    all_times = df['datetime'].sort_values().tolist()

    # 4 observation delta
    value = 4
    unit = 'h'
    delta = Timedelta(value, unit)
    neg_delta = -delta
    # first plus 4 obs is fifth
    assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit)
    # using negative
    assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit)

    # fifth minus 4 obs is first
    assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit)
    # using negative
    assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
Exemplo n.º 17
0
def test_delta_with_time_unit_matches_pandas(es):
    customer_id = 0
    sessions_df = to_pandas(es["sessions"])
    sessions_df = sessions_df[sessions_df["customer_id"] == customer_id]
    log_df = to_pandas(es["log"])
    log_df = log_df[log_df["session_id"].isin(sessions_df["id"])]
    all_times = log_df["datetime"].sort_values().tolist()

    # 4 observation delta
    value = 4
    unit = "h"
    delta = Timedelta(value, unit)
    neg_delta = -delta
    # first plus 4 obs is fifth
    assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit)
    # using negative
    assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit)

    # fifth minus 4 obs is first
    assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit)
    # using negative
    assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
Exemplo n.º 18
0
def test_week_to_days():
    assert Timedelta("1001 weeks") == Timedelta(1001 * 7, "days")
Exemplo n.º 19
0
def test_string_timedelta_args():
    assert Timedelta("1 second") == Timedelta(1, "second")
    assert Timedelta("1 seconds") == Timedelta(1, "second")
    assert Timedelta("10 days") == Timedelta(10, "days")
    assert Timedelta("100 days") == Timedelta(100, "days")
    assert Timedelta("1001 days") == Timedelta(1001, "days")
    assert Timedelta("1001 weeks") == Timedelta(1001, "weeks")
def test_feature_takes_timedelta_string(es):
    feature = Count(es['log']['id'], es['customers'], use_previous="1 day")
    assert feature.use_previous == Timedelta(1, 'd')
def test_requires_entities_if_observations():
    with pytest.raises(Exception):
        Timedelta(4, 'observations')
Exemplo n.º 22
0
def test_singular():
    assert Timedelta.make_singular("Month") == "Month"
    assert Timedelta.make_singular("Months") == "Month"
Exemplo n.º 23
0
def test_timedelta_equality():
    assert Timedelta(10, "d") == Timedelta(10, "d")
    assert Timedelta(10, "d") != 1
Exemplo n.º 24
0
def test_feature_takes_timedelta_string(es):
    feature = ft.Feature(es['log']['id'],
                         parent_entity=es['customers'],
                         use_previous="1 day",
                         primitive=Count)
    assert feature.use_previous == Timedelta(1, 'd')
Exemplo n.º 25
0
def test_feature_takes_timedelta_string(es):
    feature = Feature(Feature(es['log'].ww['id']), parent_dataframe_name='customers',
                      use_previous="1 day", primitive=Count)
    assert feature.use_previous == Timedelta(1, 'd')
Exemplo n.º 26
0
def test_requires_entities_if_observations():
    error_txt = 'Must define entity to use o as unit'
    with pytest.raises(Exception, match=error_txt):
        Timedelta(4, 'observations')