示例#1
0
    def test_valid_input(self):
        expected_result = [
            datetime.datetime(2015, 3, 1, 0, 0),
            datetime.datetime(2015, 6, 1, 0, 0),
            datetime.datetime(2015, 9, 1, 0, 0),
            datetime.datetime(2015, 12, 1, 0, 0),
            datetime.datetime(2016, 3, 1, 0, 0),
            datetime.datetime(2016, 6, 1, 0, 0)
        ]
        chopper = Timechop(
            feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
            feature_end_time=datetime.datetime(2017, 1, 1, 0, 0),
            label_start_time=datetime.datetime(2015, 1, 1, 0, 0),
            label_end_time=datetime.datetime(2017, 1, 1, 0, 0),
            model_update_frequency='3 months',
            training_as_of_date_frequencies=['1 day'],
            test_as_of_date_frequencies=['1 day'],
            max_training_histories=['1 year'],
            test_durations=['6 months'],
            test_label_timespans=['1 months'],
            training_label_timespans=['3 days'])

        # this should throw an exception because last possible label date is after
        # end of feature time
        result = chopper.calculate_train_test_split_times(
            training_label_timespan=convert_str_to_relativedelta('3 days'),
            test_duration='6 months',
            test_label_timespan=convert_str_to_relativedelta('1 month'))

        assert result == expected_result
示例#2
0
def test_calculate_as_of_times_one_day_freq():
    expected_result = [
        datetime.datetime(2011, 1, 1, 0, 0),
        datetime.datetime(2011, 1, 2, 0, 0),
        datetime.datetime(2011, 1, 3, 0, 0),
        datetime.datetime(2011, 1, 4, 0, 0),
        datetime.datetime(2011, 1, 5, 0, 0),
        datetime.datetime(2011, 1, 6, 0, 0),
        datetime.datetime(2011, 1, 7, 0, 0),
        datetime.datetime(2011, 1, 8, 0, 0),
        datetime.datetime(2011, 1, 9, 0, 0),
        datetime.datetime(2011, 1, 10, 0, 0),
        datetime.datetime(2011, 1, 11, 0, 0)
    ]
    chopper = Timechop(feature_start_time=datetime.datetime(1990, 1, 1, 0, 0),
                       feature_end_time=datetime.datetime(2012, 1, 1, 0, 0),
                       label_start_time=datetime.datetime(2010, 1, 1, 0, 0),
                       label_end_time=datetime.datetime(2012, 1, 1, 0, 0),
                       model_update_frequency='1 year',
                       training_as_of_date_frequencies=['1 days'],
                       test_as_of_date_frequencies=['7 days'],
                       max_training_histories=['10 days', '1 year'],
                       test_durations=['1 month'],
                       test_label_timespans=['1 day'],
                       training_label_timespans=['3 months'])
    result = chopper.calculate_as_of_times(
        as_of_start_limit=datetime.datetime(2011, 1, 1, 0, 0),
        as_of_end_limit=datetime.datetime(2011, 1, 11, 0, 0),
        data_frequency=convert_str_to_relativedelta('1 days'))
    assert (result == expected_result)
示例#3
0
def matrices():
    timechop_config = snakify_keys(request.get_json())
    for datetime_key in ['feature_start_time', 'feature_end_time', 'label_start_time', 'label_end_time']:
        timechop_config[datetime_key] = datetime.strptime(timechop_config[datetime_key], '%Y-%m-%d')
    try:
        chopper = Timechop(**timechop_config)
        results = chopper.chop_time()
        return jsonify(data=results, error='')
    except Exception as e:
        return jsonify(data=[], error=str(e))
示例#4
0
    def test_no_valid_label_dates(self):
        chopper = Timechop(
            feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
            feature_end_time=datetime.datetime(2016, 1, 1, 0, 0),
            label_start_time=datetime.datetime(2015, 1, 1, 0, 0),
            label_end_time=datetime.datetime(2015, 2, 1, 0, 0),
            model_update_frequency='3 months',
            training_as_of_date_frequencies=['1 day'],
            test_as_of_date_frequencies=['1 day'],
            max_training_histories=['1 year'],
            test_durations=['6 months'],
            test_label_timespans=['1 months'],
            training_label_timespans=['3 days'])

        # this should raise an error because there are no valid label dates in
        # the labeling time (label span is longer than labeling time)
        with self.assertRaises(ValueError):
            chopper.calculate_train_test_split_times(
                training_label_timespan=convert_str_to_relativedelta('3 days'),
                test_duration='6 months',
                test_label_timespan=convert_str_to_relativedelta('1 month'))
示例#5
0
    def test_labels_after_features(self):
        chopper = Timechop(
            feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
            feature_end_time=datetime.datetime(2016, 1, 1, 0, 0),
            label_start_time=datetime.datetime(2015, 1, 1, 0, 0),
            label_end_time=datetime.datetime(2017, 1, 1, 0, 0),
            model_update_frequency='3 months',
            training_as_of_date_frequencies=['1 day'],
            test_as_of_date_frequencies=['1 day'],
            max_training_histories=['1 year'],
            test_durations=['6 months'],
            test_label_timespans=['1 months'],
            training_label_timespans=['3 days'])

        # this should throw an exception because last possible label date is after
        # end of feature time
        with self.assertRaises(ValueError):
            result = chopper.calculate_train_test_split_times(
                training_label_timespan=convert_str_to_relativedelta('3 days'),
                test_duration='6 months',
                test_label_timespan=convert_str_to_relativedelta('1 month'))
示例#6
0
 def test_bad_feature_start_time(self):
     with self.assertRaises(ValueError):
         chopper = Timechop(
             feature_start_time=datetime.datetime(2011, 1, 1, 0, 0),
             feature_end_time=datetime.datetime(2010, 1, 16, 0, 0),
             label_start_time=datetime.datetime(2010, 1, 3, 0, 0),
             label_end_time=datetime.datetime(2010, 1, 16, 0, 0),
             model_update_frequency='5 days',
             training_as_of_date_frequencies=['1 days'],
             test_as_of_date_frequencies=['1 days'],
             max_training_histories=['5 days'],
             test_durations=['5 days'],
             test_label_timespans=['1 day'],
             training_label_timespans=['1 day'])
示例#7
0
def basic_integration_test(state_filters, feature_group_create_rules,
                           feature_group_mix_rules, expected_num_matrices):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        Base.metadata.create_all(db_engine)
        populate_source_data(db_engine)

        with TemporaryDirectory() as temp_dir:
            chopper = Timechop(
                beginning_of_time=datetime(2010, 1, 1),
                modeling_start_time=datetime(2011, 1, 1),
                modeling_end_time=datetime(2014, 1, 1),
                update_window='1y',
                train_label_windows=['6months'],
                test_label_windows=['6months'],
                train_example_frequency='1day',
                test_example_frequency='3months',
                train_durations=['1months'],
                test_durations=['1months'],
            )

            state_table_generator = StateTableGenerator(db_engine=db_engine,
                                                        experiment_hash='abcd')

            label_generator = BinaryLabelGenerator(db_engine=db_engine,
                                                   events_table='events')

            feature_generator = FeatureGenerator(
                db_engine=db_engine,
                features_schema_name='features',
                replace=True,
            )

            feature_dictionary_creator = FeatureDictionaryCreator(
                db_engine=db_engine, features_schema_name='features')

            feature_group_creator = FeatureGroupCreator(
                feature_group_create_rules)

            feature_group_mixer = FeatureGroupMixer(feature_group_mix_rules)

            planner = Planner(engine=db_engine,
                              beginning_of_time=datetime(2010, 1, 1),
                              label_names=['outcome'],
                              label_types=['binary'],
                              db_config={
                                  'features_schema_name':
                                  'features',
                                  'labels_schema_name':
                                  'public',
                                  'labels_table_name':
                                  'labels',
                                  'sparse_state_table_name':
                                  'tmp_sparse_states_abcd',
                              },
                              matrix_directory=os.path.join(
                                  temp_dir, 'matrices'),
                              states=state_filters,
                              user_metadata={},
                              replace=True)

            # chop time
            split_definitions = chopper.chop_time()

            # generate as_of_times for feature/label/state generation
            all_as_of_times = []
            for split in split_definitions:
                all_as_of_times.extend(split['train_matrix']['as_of_times'])
                for test_matrix in split['test_matrices']:
                    all_as_of_times.extend(test_matrix['as_of_times'])
            all_as_of_times = list(set(all_as_of_times))

            # generate sparse state table
            state_table_generator.generate_sparse_table(
                dense_state_table='states', as_of_dates=all_as_of_times)

            # create labels table
            label_generator.generate_all_labels(labels_table='labels',
                                                as_of_dates=all_as_of_times,
                                                label_windows=['6months'])

            # create feature table tasks
            # we would use FeatureGenerator#create_all_tables but want to use
            # the tasks dict directly to create a feature dict
            feature_table_tasks = feature_generator.generate_all_table_tasks(
                feature_aggregation_config=[{
                    'prefix':
                    'cat',
                    'from_obj':
                    'cat_complaints',
                    'knowledge_date_column':
                    'as_of_date',
                    'aggregates': [{
                        'quantity': 'cat_sightings',
                        'metrics': ['count', 'avg'],
                    }],
                    'intervals': ['1y'],
                    'groups': ['entity_id']
                }, {
                    'prefix':
                    'dog',
                    'from_obj':
                    'dog_complaints',
                    'knowledge_date_column':
                    'as_of_date',
                    'aggregates': [{
                        'quantity': 'dog_sightings',
                        'metrics': ['count', 'avg'],
                    }],
                    'intervals': ['1y'],
                    'groups': ['entity_id']
                }],
                feature_dates=all_as_of_times,
            )

            # create feature tables
            feature_generator.process_table_tasks(feature_table_tasks)

            # build feature dictionaries from feature tables and
            # subsetting config
            master_feature_dict = feature_dictionary_creator\
                .feature_dictionary(feature_table_tasks.keys())

            feature_dicts = feature_group_mixer.generate(
                feature_group_creator.subsets(master_feature_dict))

            # figure out what matrices need to be built
            _, matrix_build_tasks =\
                planner.generate_plans(
                    split_definitions,
                    feature_dicts
                )

            # go and build the matrices
            planner.build_all_matrices(matrix_build_tasks)

            # super basic assertion: did matrices we expect get created?
            matrix_directory = os.path.join(temp_dir, 'matrices')
            matrices = [
                path for path in os.listdir(matrix_directory) if '.csv' in path
            ]
            metadatas = [
                path for path in os.listdir(matrix_directory)
                if '.yaml' in path
            ]
            assert len(matrices) == expected_num_matrices
            assert len(metadatas) == expected_num_matrices
示例#8
0
 def test_unevenly_divisible_update_window(self):
     expected_result = [{
         'feature_start_time':
         datetime.datetime(1990, 1, 1, 0, 0),
         'label_start_time':
         datetime.datetime(2010, 1, 3, 0, 0),
         'feature_end_time':
         datetime.datetime(2010, 1, 16, 0, 0),
         'label_end_time':
         datetime.datetime(2010, 1, 16, 0, 0),
         'train_matrix': {
             'first_as_of_time':
             datetime.datetime(2010, 1, 3, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 4, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 5, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 3, 0, 0),
                 datetime.datetime(2010, 1, 4, 0, 0)
             ],
             'training_label_timespan':
             '1 day',
             'training_as_of_date_frequency':
             '1 days',
             'max_training_history':
             '5 days'
         },
         'test_matrices': [{
             'first_as_of_time':
             datetime.datetime(2010, 1, 5, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 9, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 10, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 5, 0, 0),
                 datetime.datetime(2010, 1, 6, 0, 0),
                 datetime.datetime(2010, 1, 7, 0, 0),
                 datetime.datetime(2010, 1, 8, 0, 0),
                 datetime.datetime(2010, 1, 9, 0, 0)
             ],
             'test_label_timespan':
             '1 day',
             'test_as_of_date_frequency':
             '1 days',
             'test_duration':
             '5 days'
         }]
     }, {
         'feature_start_time':
         datetime.datetime(1990, 1, 1, 0, 0),
         'label_start_time':
         datetime.datetime(2010, 1, 3, 0, 0),
         'feature_end_time':
         datetime.datetime(2010, 1, 16, 0, 0),
         'label_end_time':
         datetime.datetime(2010, 1, 16, 0, 0),
         'train_matrix': {
             'first_as_of_time':
             datetime.datetime(2010, 1, 4, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 9, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 10, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 4, 0, 0),
                 datetime.datetime(2010, 1, 5, 0, 0),
                 datetime.datetime(2010, 1, 6, 0, 0),
                 datetime.datetime(2010, 1, 7, 0, 0),
                 datetime.datetime(2010, 1, 8, 0, 0),
                 datetime.datetime(2010, 1, 9, 0, 0)
             ],
             'training_label_timespan':
             '1 day',
             'training_as_of_date_frequency':
             '1 days',
             'max_training_history':
             '5 days'
         },
         'test_matrices': [{
             'first_as_of_time':
             datetime.datetime(2010, 1, 10, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 14, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 15, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 10, 0, 0),
                 datetime.datetime(2010, 1, 11, 0, 0),
                 datetime.datetime(2010, 1, 12, 0, 0),
                 datetime.datetime(2010, 1, 13, 0, 0),
                 datetime.datetime(2010, 1, 14, 0, 0)
             ],
             'test_label_timespan':
             '1 day',
             'test_as_of_date_frequency':
             '1 days',
             'test_duration':
             '5 days'
         }]
     }]
     chopper = Timechop(
         feature_start_time=datetime.datetime(1990, 1, 1, 0, 0),
         feature_end_time=datetime.datetime(2010, 1, 16, 0, 0),
         label_start_time=datetime.datetime(2010, 1, 3, 0, 0),
         label_end_time=datetime.datetime(2010, 1, 16, 0, 0),
         model_update_frequency='5 days',
         training_as_of_date_frequencies=['1 days'],
         test_as_of_date_frequencies=['1 days'],
         max_training_histories=['5 days'],
         test_durations=['5 days'],
         test_label_timespans=['1 day'],
         training_label_timespans=['1 day'])
     result = chopper.chop_time()
     assert (result == expected_result)
示例#9
0
 def test_look_back_time_before_modeling_start(self):
     expected_result = {
         'feature_start_time':
         datetime.datetime(1990, 1, 1, 0, 0),
         'label_start_time':
         datetime.datetime(2010, 1, 1, 0, 0),
         'feature_end_time':
         datetime.datetime(2010, 1, 11, 0, 0),
         'label_end_time':
         datetime.datetime(2010, 1, 11, 0, 0),
         'train_matrix': {
             'first_as_of_time':
             datetime.datetime(2010, 1, 1, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 5, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 1, 0, 0),
                 datetime.datetime(2010, 1, 2, 0, 0),
                 datetime.datetime(2010, 1, 3, 0, 0),
                 datetime.datetime(2010, 1, 4, 0, 0),
                 datetime.datetime(2010, 1, 5, 0, 0)
             ],
             'training_label_timespan':
             '1 day',
             'training_as_of_date_frequency':
             '1 days',
             'max_training_history':
             '10 days'
         },
         'test_matrices': [{
             'first_as_of_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 9, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 10, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 6, 0, 0),
                 datetime.datetime(2010, 1, 9, 0, 0)
             ],
             'test_label_timespan':
             '1 day',
             'test_as_of_date_frequency':
             '3 days',
             'test_duration':
             '5 days'
         }, {
             'first_as_of_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 7, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 6, 0, 0),
             ],
             'test_label_timespan':
             '1 day',
             'test_as_of_date_frequency':
             '6 days',
             'test_duration':
             '5 days'
         }]
     }
     chopper = Timechop(
         feature_start_time=datetime.datetime(1990, 1, 1, 0, 0),
         feature_end_time=datetime.datetime(2010, 1, 11, 0, 0),
         label_start_time=datetime.datetime(2010, 1, 1, 0, 0),
         label_end_time=datetime.datetime(2010, 1, 11, 0, 0),
         model_update_frequency='5 days',
         training_as_of_date_frequencies=['1 days'],
         test_as_of_date_frequencies=['3 days', '6 days'],
         max_training_histories=['10 days'],
         test_durations=['5 days'],
         test_label_timespans=['1 day'],
         training_label_timespans=['1 day'])
     result = chopper.generate_matrix_definitions(
         train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0),
         training_as_of_date_frequency='1 days',
         max_training_history='10 days',
         test_duration='5 days',
         test_label_timespan='1 day',
         training_label_timespan='1 day')
     assert result == expected_result
示例#10
0
 def test_look_back_time_equal_modeling_start(self):
     # TODO: rework this test since the test label window of 3 months
     # cannot be satisfied by the 10 day difference between modeling
     # start and end times, so it's not a very realistic case
     expected_result = {
         'feature_start_time':
         datetime.datetime(1990, 1, 1, 0, 0),
         'label_start_time':
         datetime.datetime(2010, 1, 1, 0, 0),
         'feature_end_time':
         datetime.datetime(2010, 1, 11, 0, 0),
         'label_end_time':
         datetime.datetime(2010, 1, 11, 0, 0),
         'train_matrix': {
             'first_as_of_time':
             datetime.datetime(2010, 1, 1, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 5, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 1, 0, 0),
                 datetime.datetime(2010, 1, 2, 0, 0),
                 datetime.datetime(2010, 1, 3, 0, 0),
                 datetime.datetime(2010, 1, 4, 0, 0),
                 datetime.datetime(2010, 1, 5, 0, 0)
             ],
             'training_label_timespan':
             '1 day',
             'training_as_of_date_frequency':
             '1 days',
             'max_training_history':
             '5 days'
         },
         'test_matrices': [{
             'first_as_of_time':
             datetime.datetime(2010, 1, 6, 0, 0),
             'last_as_of_time':
             datetime.datetime(2010, 1, 9, 0, 0),
             'matrix_info_end_time':
             datetime.datetime(2010, 1, 10, 0, 0),
             'as_of_times': [
                 datetime.datetime(2010, 1, 6, 0, 0),
                 datetime.datetime(2010, 1, 9, 0, 0)
             ],
             'test_label_timespan':
             '1 day',
             'test_as_of_date_frequency':
             '3 days',
             'test_duration':
             '5 days'
         }]
     }
     chopper = Timechop(
         feature_start_time=datetime.datetime(1990, 1, 1, 0, 0),
         feature_end_time=datetime.datetime(2010, 1, 11, 0, 0),
         label_start_time=datetime.datetime(2010, 1, 1, 0, 0),
         label_end_time=datetime.datetime(2010, 1, 11, 0, 0),
         model_update_frequency='5 days',
         training_as_of_date_frequencies=['1 days'],
         test_as_of_date_frequencies=['3 days'],
         max_training_histories=['5 days'],
         test_durations=['5 days'],
         test_label_timespans=['1 day'],
         training_label_timespans=['1 day'])
     result = chopper.generate_matrix_definitions(
         train_test_split_time=datetime.datetime(2010, 1, 6, 0, 0),
         training_as_of_date_frequency='1 days',
         max_training_history='5 days',
         test_duration='5 days',
         test_label_timespan='1 day',
         training_label_timespan='1 day')
     assert result == expected_result
示例#11
0
def basic_integration_test(
    state_filters,
    feature_group_create_rules,
    feature_group_mix_rules,
    expected_num_matrices
):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        Base.metadata.create_all(db_engine)
        populate_source_data(db_engine)

        with TemporaryDirectory() as temp_dir:
            chopper = Timechop(
                beginning_of_time=datetime(2010, 1, 1),
                modeling_start_time=datetime(2011, 1, 1),
                modeling_end_time=datetime(2014, 1, 1),
                update_window='1y',
                train_label_windows=['6months'],
                test_label_windows=['6months'],
                train_example_frequency='1day',
                test_example_frequency='3months',
                train_durations=['1months'],
                test_durations=['1months'],
            )

            state_table_generator = StateTableGenerator(
                db_engine=db_engine,
                experiment_hash='abcd'
            )

            label_generator = BinaryLabelGenerator(
                db_engine=db_engine,
                events_table='events'
            )

            feature_generator = FeatureGenerator(
                db_engine=db_engine,
                features_schema_name='features',
                replace=True,
            )

            feature_dictionary_creator = FeatureDictionaryCreator(
                db_engine=db_engine,
                features_schema_name='features'
            )

            feature_group_creator = FeatureGroupCreator(feature_group_create_rules)

            feature_group_mixer = FeatureGroupMixer(feature_group_mix_rules)

            planner = Planner(
                engine=db_engine,
                beginning_of_time=datetime(2010, 1, 1),
                label_names=['outcome'],
                label_types=['binary'],
                db_config={
                    'features_schema_name': 'features',
                    'labels_schema_name': 'public',
                    'labels_table_name': 'labels',
                    'sparse_state_table_name': 'tmp_sparse_states_abcd',
                },
                matrix_directory=os.path.join(temp_dir, 'matrices'),
                states=state_filters,
                user_metadata={},
                replace=True
            )

            # chop time
            split_definitions = chopper.chop_time()

            # generate as_of_times for feature/label/state generation
            all_as_of_times = []
            for split in split_definitions:
                all_as_of_times.extend(split['train_matrix']['as_of_times'])
                for test_matrix in split['test_matrices']:
                    all_as_of_times.extend(test_matrix['as_of_times'])
            all_as_of_times = list(set(all_as_of_times))

            # generate sparse state table
            state_table_generator.generate_sparse_table(
                dense_state_table='states',
                as_of_dates=all_as_of_times
            )

            # create labels table
            label_generator.generate_all_labels(
                labels_table='labels',
                as_of_dates=all_as_of_times,
                label_windows=['6months']
            )

            # create feature table tasks
            # we would use FeatureGenerator#create_all_tables but want to use
            # the tasks dict directly to create a feature dict
            feature_table_tasks = feature_generator.generate_all_table_tasks(
                feature_aggregation_config=[{
                    'prefix': 'cat',
                    'from_obj': 'cat_complaints',
                    'knowledge_date_column': 'as_of_date',
                    'aggregates': [{
                        'quantity': 'cat_sightings',
                        'metrics': ['count', 'avg'],
                    }],
                    'intervals': ['1y'],
                    'groups': ['entity_id']
                }, {
                    'prefix': 'dog',
                    'from_obj': 'dog_complaints',
                    'knowledge_date_column': 'as_of_date',
                    'aggregates': [{
                        'quantity': 'dog_sightings',
                        'metrics': ['count', 'avg'],
                    }],
                    'intervals': ['1y'],
                    'groups': ['entity_id']
                }],
                feature_dates=all_as_of_times,
            )

            # create feature tables
            feature_generator.process_table_tasks(feature_table_tasks)

            # build feature dictionaries from feature tables and
            # subsetting config
            master_feature_dict = feature_dictionary_creator\
                .feature_dictionary(feature_table_tasks.keys())

            feature_dicts = feature_group_mixer.generate(
                feature_group_creator.subsets(master_feature_dict)
            )

            # figure out what matrices need to be built
            _, matrix_build_tasks =\
                planner.generate_plans(
                    split_definitions,
                    feature_dicts
                )

            # go and build the matrices
            planner.build_all_matrices(matrix_build_tasks)

            # super basic assertion: did matrices we expect get created?
            matrix_directory = os.path.join(temp_dir, 'matrices')
            matrices = [path for path in os.listdir(matrix_directory) if '.csv' in path]
            metadatas = [path for path in os.listdir(matrix_directory) if '.yaml' in path]
            assert len(matrices) == expected_num_matrices
            assert len(metadatas) == expected_num_matrices