def test_look_back_time_equal_modeling_start(self): # TODO: rework this test since the test label window of 3 months # cannot be satisfied by the 10 day difference between modeling # start and end times, so it's not a very realistic case expected_result = { 'feature_start_time': datetime.datetime(1990, 1, 1, 0, 0), 'label_start_time': datetime.datetime(2010, 1, 1, 0, 0), 'feature_end_time': datetime.datetime(2010, 1, 11, 0, 0), 'label_end_time': datetime.datetime(2010, 1, 11, 0, 0), 'train_matrix': { 'first_as_of_time': datetime.datetime(2010, 1, 1, 0, 0), 'last_as_of_time': datetime.datetime(2010, 1, 5, 0, 0), 'matrix_info_end_time': datetime.datetime(2010, 1, 6, 0, 0), 'as_of_times': [ datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0) ], 'training_label_timespan': '1 day', 'training_as_of_date_frequency': '1 days', 'max_training_history': '5 days' }, 'test_matrices': [{ 'first_as_of_time': datetime.datetime(2010, 1, 6, 0, 0), 'last_as_of_time': datetime.datetime(2010, 1, 9, 0, 0), 'matrix_info_end_time': datetime.datetime(2010, 1, 10, 0, 0), 'as_of_times': [ datetime.datetime(2010, 1, 6, 0, 0), datetime.datetime(2010, 1, 9, 0, 0) ], 'test_label_timespan': '1 day', 'test_as_of_date_frequency': '3 days', 'test_duration': '5 days' }] } chopper = Timechop( feature_start_time=datetime.datetime(1990, 1, 1, 0, 0), feature_end_time=datetime.datetime(2010, 1, 11, 0, 0), label_start_time=datetime.datetime(2010, 1, 1, 0, 0), label_end_time=datetime.datetime(2010, 1, 11, 0, 0), model_update_frequency='5 days', training_as_of_date_frequencies=['1 days'], test_as_of_date_frequencies=['3 days'], max_training_histories=['5 days'], test_durations=['5 days'], test_label_timespans=['1 day'], training_label_timespans=['1 day'] ) result = chopper.generate_matrix_definitions( train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0), training_as_of_date_frequency='1 days', max_training_history='5 days', test_duration='5 days', test_label_timespan='1 day', training_label_timespan='1 day' ) assert result == expected_result
def test_look_back_time_before_modeling_start(self): expected_result = { "feature_start_time": datetime.datetime(1990, 1, 1, 0, 0), "label_start_time": datetime.datetime(2010, 1, 1, 0, 0), "feature_end_time": datetime.datetime(2010, 1, 11, 0, 0), "label_end_time": datetime.datetime(2010, 1, 11, 0, 0), "train_matrix": { "first_as_of_time": datetime.datetime(2010, 1, 1, 0, 0), "last_as_of_time": datetime.datetime(2010, 1, 5, 0, 0), "matrix_info_end_time": datetime.datetime(2010, 1, 6, 0, 0), "as_of_times": [ datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0), ], "training_label_timespan": "1 day", "training_as_of_date_frequency": "1 days", "max_training_history": "10 days", }, "test_matrices": [ { "first_as_of_time": datetime.datetime(2010, 1, 6, 0, 0), "last_as_of_time": datetime.datetime(2010, 1, 9, 0, 0), "matrix_info_end_time": datetime.datetime(2010, 1, 10, 0, 0), "as_of_times": [ datetime.datetime(2010, 1, 6, 0, 0), datetime.datetime(2010, 1, 9, 0, 0), ], "test_label_timespan": "1 day", "test_as_of_date_frequency": "3 days", "test_duration": "5 days", }, { "first_as_of_time": datetime.datetime(2010, 1, 6, 0, 0), "last_as_of_time": datetime.datetime(2010, 1, 6, 0, 0), "matrix_info_end_time": datetime.datetime(2010, 1, 7, 0, 0), "as_of_times": [datetime.datetime(2010, 1, 6, 0, 0)], "test_label_timespan": "1 day", "test_as_of_date_frequency": "6 days", "test_duration": "5 days", }, ], } chopper = Timechop( feature_start_time=datetime.datetime(1990, 1, 1, 0, 0), feature_end_time=datetime.datetime(2010, 1, 11, 0, 0), label_start_time=datetime.datetime(2010, 1, 1, 0, 0), label_end_time=datetime.datetime(2010, 1, 11, 0, 0), model_update_frequency="5 days", training_as_of_date_frequencies=["1 days"], test_as_of_date_frequencies=["3 days", "6 days"], max_training_histories=["10 days"], test_durations=["5 days"], test_label_timespans=["1 day"], training_label_timespans=["1 day"], ) result = chopper.generate_matrix_definitions( train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0), training_as_of_date_frequency="1 days", max_training_history="10 days", test_duration="5 days", test_label_timespan="1 day", training_label_timespan="1 day", ) assert result == expected_result
def test_look_back_time_equal_modeling_start(self): # TODO: rework this test since the test label window of 3 months # cannot be satisfied by the 10 day difference between modeling # start and end times, so it's not a very realistic case expected_result = { "feature_start_time": datetime.datetime(1990, 1, 1, 0, 0), "label_start_time": datetime.datetime(2010, 1, 1, 0, 0), "feature_end_time": datetime.datetime(2010, 1, 11, 0, 0), "label_end_time": datetime.datetime(2010, 1, 11, 0, 0), "train_matrix": { "first_as_of_time": datetime.datetime(2010, 1, 1, 0, 0), "last_as_of_time": datetime.datetime(2010, 1, 5, 0, 0), "matrix_info_end_time": datetime.datetime(2010, 1, 6, 0, 0), "as_of_times": [ datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0), ], "training_label_timespan": "1 day", "training_as_of_date_frequency": "1 days", "max_training_history": "5 days", }, "test_matrices": [ { "first_as_of_time": datetime.datetime(2010, 1, 6, 0, 0), "last_as_of_time": datetime.datetime(2010, 1, 9, 0, 0), "matrix_info_end_time": datetime.datetime(2010, 1, 10, 0, 0), "as_of_times": [ datetime.datetime(2010, 1, 6, 0, 0), datetime.datetime(2010, 1, 9, 0, 0), ], "test_label_timespan": "1 day", "test_as_of_date_frequency": "3 days", "test_duration": "5 days", } ], } chopper = Timechop( feature_start_time=datetime.datetime(1990, 1, 1, 0, 0), feature_end_time=datetime.datetime(2010, 1, 11, 0, 0), label_start_time=datetime.datetime(2010, 1, 1, 0, 0), label_end_time=datetime.datetime(2010, 1, 11, 0, 0), model_update_frequency="5 days", training_as_of_date_frequencies=["1 days"], test_as_of_date_frequencies=["3 days"], max_training_histories=["5 days"], test_durations=["5 days"], test_label_timespans=["1 day"], training_label_timespans=["1 day"], ) result = chopper.generate_matrix_definitions( train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0), training_as_of_date_frequency="1 days", max_training_history="5 days", test_duration="5 days", test_label_timespan="1 day", training_label_timespan="1 day", ) assert result == expected_result
def test_look_back_time_before_modeling_start(self): expected_result = { 'feature_start_time': datetime.datetime(1990, 1, 1, 0, 0), 'label_start_time': datetime.datetime(2010, 1, 1, 0, 0), 'feature_end_time': datetime.datetime(2010, 1, 11, 0, 0), 'label_end_time': datetime.datetime(2010, 1, 11, 0, 0), 'train_matrix': { 'first_as_of_time': datetime.datetime(2010, 1, 1, 0, 0), 'last_as_of_time': datetime.datetime(2010, 1, 5, 0, 0), 'matrix_info_end_time': datetime.datetime(2010, 1, 6, 0, 0), 'as_of_times': [ datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0) ], 'training_label_timespan': '1 day', 'training_as_of_date_frequency': '1 days', 'max_training_history': '10 days' }, 'test_matrices': [ { 'first_as_of_time': datetime.datetime(2010, 1, 6, 0, 0), 'last_as_of_time': datetime.datetime(2010, 1, 9, 0, 0), 'matrix_info_end_time': datetime.datetime(2010, 1, 10, 0, 0), 'as_of_times': [ datetime.datetime(2010, 1, 6, 0, 0), datetime.datetime(2010, 1, 9, 0, 0) ], 'test_label_timespan': '1 day', 'test_as_of_date_frequency': '3 days', 'test_duration': '5 days' }, { 'first_as_of_time': datetime.datetime(2010, 1, 6, 0, 0), 'last_as_of_time': datetime.datetime(2010, 1, 6, 0, 0), 'matrix_info_end_time': datetime.datetime(2010, 1, 7, 0, 0), 'as_of_times': [ datetime.datetime(2010, 1, 6, 0, 0), ], 'test_label_timespan': '1 day', 'test_as_of_date_frequency': '6 days', 'test_duration': '5 days' } ] } chopper = Timechop( feature_start_time=datetime.datetime(1990, 1, 1, 0, 0), feature_end_time=datetime.datetime(2010, 1, 11, 0, 0), label_start_time=datetime.datetime(2010, 1, 1, 0, 0), label_end_time=datetime.datetime(2010, 1, 11, 0, 0), model_update_frequency='5 days', training_as_of_date_frequencies=['1 days'], test_as_of_date_frequencies=['3 days', '6 days'], max_training_histories=['10 days'], test_durations=['5 days'], test_label_timespans=['1 day'], training_label_timespans=['1 day'] ) result = chopper.generate_matrix_definitions( train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0), training_as_of_date_frequency='1 days', max_training_history='10 days', test_duration='5 days', test_label_timespan='1 day', training_label_timespan='1 day' ) assert result == expected_result