Exemplo n.º 1
0
def test_Planner():
    matrix_set_definitions = [
        {
            "feature_start_time":
            datetime.datetime(1990, 1, 1, 0, 0),
            "modeling_start_time":
            datetime.datetime(2010, 1, 1, 0, 0),
            "modeling_end_time":
            datetime.datetime(2010, 1, 16, 0, 0),
            "train_matrix": {
                "first_as_of_time":
                datetime.datetime(2010, 1, 1, 0, 0),
                "matrix_info_end_time":
                datetime.datetime(2010, 1, 6, 0, 0),
                "as_of_times": [
                    datetime.datetime(2010, 1, 1, 0, 0),
                    datetime.datetime(2010, 1, 2, 0, 0),
                    datetime.datetime(2010, 1, 3, 0, 0),
                    datetime.datetime(2010, 1, 4, 0, 0),
                    datetime.datetime(2010, 1, 5, 0, 0),
                ],
            },
            "test_matrices": [{
                "first_as_of_time":
                datetime.datetime(2010, 1, 6, 0, 0),
                "matrix_info_end_time":
                datetime.datetime(2010, 1, 11, 0, 0),
                "as_of_times": [
                    datetime.datetime(2010, 1, 6, 0, 0),
                    datetime.datetime(2010, 1, 7, 0, 0),
                    datetime.datetime(2010, 1, 8, 0, 0),
                    datetime.datetime(2010, 1, 9, 0, 0),
                    datetime.datetime(2010, 1, 10, 0, 0),
                ],
            }],
        },
        {
            "feature_start_time":
            datetime.datetime(1990, 1, 1, 0, 0),
            "modeling_start_time":
            datetime.datetime(2010, 1, 1, 0, 0),
            "modeling_end_time":
            datetime.datetime(2010, 1, 16, 0, 0),
            "train_matrix": {
                "first_as_of_time":
                datetime.datetime(2010, 1, 6, 0, 0),
                "matrix_info_end_time":
                datetime.datetime(2010, 1, 11, 0, 0),
                "as_of_times": [
                    datetime.datetime(2010, 1, 6, 0, 0),
                    datetime.datetime(2010, 1, 7, 0, 0),
                    datetime.datetime(2010, 1, 8, 0, 0),
                    datetime.datetime(2010, 1, 9, 0, 0),
                    datetime.datetime(2010, 1, 10, 0, 0),
                ],
            },
            "test_matrices": [{
                "first_as_of_time":
                datetime.datetime(2010, 1, 11, 0, 0),
                "matrix_info_end_time":
                datetime.datetime(2010, 1, 16, 0, 0),
                "as_of_times": [
                    datetime.datetime(2010, 1, 11, 0, 0),
                    datetime.datetime(2010, 1, 12, 0, 0),
                    datetime.datetime(2010, 1, 13, 0, 0),
                    datetime.datetime(2010, 1, 14, 0, 0),
                    datetime.datetime(2010, 1, 15, 0, 0),
                ],
            }],
        },
    ]
    feature_dict_one = FeatureGroup(
        name="first_features",
        features_by_table={
            "features0": ["f1", "f2"],
            "features1": ["f1", "f2"]
        },
    )
    feature_dict_two = FeatureGroup(
        name="second_features",
        features_by_table={
            "features2": ["f3", "f4"],
            "features3": ["f5", "f6"]
        },
    )
    feature_dicts = [feature_dict_one, feature_dict_two]
    planner = Planner(
        feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
        label_names=["booking"],
        label_types=["binary"],
        cohort_names=["prior_bookings"],
        user_metadata={},
    )

    updated_matrix_definitions, build_tasks = planner.generate_plans(
        matrix_set_definitions, feature_dicts)
    # test that it added uuids: we don't much care what they are
    matrix_uuids = []
    for matrix_def in updated_matrix_definitions:
        assert isinstance(matrix_def["train_uuid"], str)
        matrix_uuids.append(matrix_def["train_uuid"])
        for test_uuid in matrix_def["test_uuids"]:
            assert isinstance(test_uuid, str)
    assert len(set(matrix_uuids)) == 4

    # not going to assert anything on the keys (uuids), just get out the values
    build_tasks = build_tasks.values()
    assert len(
        build_tasks) == 8  # 2 splits * 2 matrices per split * 2 feature dicts

    assert sum(1 for task in build_tasks
               if task["matrix_type"] == "train") == 4
    assert sum(1 for task in build_tasks if task["matrix_type"] == "test") == 4
    assert (sum(1 for task in build_tasks
                if task["feature_dictionary"] == feature_dict_one) == 4)
    assert (sum(1 for task in build_tasks
                if task["feature_dictionary"] == feature_dict_two) == 4)
    assert (sum(1 for task in build_tasks if task["matrix_metadata"]
                ["feature_groups"] == ["first_features"]) == 4)
    assert (sum(1 for task in build_tasks if task["matrix_metadata"]
                ["feature_groups"] == ["second_features"]) == 4)
    assert (sum(
        1 for task in build_tasks
        if task["matrix_metadata"]["cohort_name"] == "prior_bookings") == 8)
Exemplo n.º 2
0
def test_Planner():
    matrix_set_definitions = [{
        'feature_start_time':
        datetime.datetime(1990, 1, 1, 0, 0),
        'modeling_start_time':
        datetime.datetime(2010, 1, 1, 0, 0),
        'modeling_end_time':
        datetime.datetime(2010, 1, 16, 0, 0),
        'train_matrix': {
            'first_as_of_time':
            datetime.datetime(2010, 1, 1, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 1, 0, 0),
                datetime.datetime(2010, 1, 2, 0, 0),
                datetime.datetime(2010, 1, 3, 0, 0),
                datetime.datetime(2010, 1, 4, 0, 0),
                datetime.datetime(2010, 1, 5, 0, 0)
            ]
        },
        'test_matrices': [{
            'first_as_of_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 6, 0, 0),
                datetime.datetime(2010, 1, 7, 0, 0),
                datetime.datetime(2010, 1, 8, 0, 0),
                datetime.datetime(2010, 1, 9, 0, 0),
                datetime.datetime(2010, 1, 10, 0, 0)
            ]
        }]
    }, {
        'feature_start_time':
        datetime.datetime(1990, 1, 1, 0, 0),
        'modeling_start_time':
        datetime.datetime(2010, 1, 1, 0, 0),
        'modeling_end_time':
        datetime.datetime(2010, 1, 16, 0, 0),
        'train_matrix': {
            'first_as_of_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 6, 0, 0),
                datetime.datetime(2010, 1, 7, 0, 0),
                datetime.datetime(2010, 1, 8, 0, 0),
                datetime.datetime(2010, 1, 9, 0, 0),
                datetime.datetime(2010, 1, 10, 0, 0)
            ]
        },
        'test_matrices': [{
            'first_as_of_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 16, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 11, 0, 0),
                datetime.datetime(2010, 1, 12, 0, 0),
                datetime.datetime(2010, 1, 13, 0, 0),
                datetime.datetime(2010, 1, 14, 0, 0),
                datetime.datetime(2010, 1, 15, 0, 0)
            ]
        }]
    }]
    feature_dict_one = FeatureGroup(name='first_features',
                                    features_by_table={
                                        'features0': ['f1', 'f2'],
                                        'features1': ['f1', 'f2']
                                    })
    feature_dict_two = FeatureGroup(name='second_features',
                                    features_by_table={
                                        'features2': ['f3', 'f4'],
                                        'features3': ['f5', 'f6']
                                    })
    feature_dicts = [feature_dict_one, feature_dict_two]
    planner = Planner(
        feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
        label_names=['booking'],
        label_types=['binary'],
        cohort_name='prior_bookings',
        states=['state_one AND state_two'],
        user_metadata={},
    )

    updated_matrix_definitions, build_tasks = \
        planner.generate_plans(matrix_set_definitions, feature_dicts)
    # test that it added uuids: we don't much care what they are
    matrix_uuids = []
    for matrix_def in updated_matrix_definitions:
        assert isinstance(matrix_def['train_uuid'], str)
        matrix_uuids.append(matrix_def['train_uuid'])
        for test_uuid in matrix_def['test_uuids']:
            assert isinstance(test_uuid, str)
    assert len(set(matrix_uuids)) == 4

    # not going to assert anything on the keys (uuids), just get out the values
    build_tasks = build_tasks.values()
    assert len(
        build_tasks) == 8  # 2 splits * 2 matrices per split * 2 feature dicts

    assert sum(1 for task in build_tasks
               if task['matrix_type'] == 'train') == 4
    assert sum(1 for task in build_tasks if task['matrix_type'] == 'test') == 4
    assert sum(1 for task in build_tasks
               if task['feature_dictionary'] == feature_dict_one) == 4
    assert sum(1 for task in build_tasks
               if task['feature_dictionary'] == feature_dict_two) == 4
    assert sum(1 for task in build_tasks if task['matrix_metadata']
               ['feature_groups'] == ['first_features']) == 4
    assert sum(1 for task in build_tasks if task['matrix_metadata']
               ['feature_groups'] == ['second_features']) == 4
    assert sum(
        1 for task in build_tasks
        if task['matrix_metadata']['cohort_name'] == 'prior_bookings') == 8
Exemplo n.º 3
0
def test_generate_plans():
    matrix_set_definitions = [{
        'feature_start_time':
        datetime.datetime(1990, 1, 1, 0, 0),
        'modeling_start_time':
        datetime.datetime(2010, 1, 1, 0, 0),
        'modeling_end_time':
        datetime.datetime(2010, 1, 16, 0, 0),
        'train_matrix': {
            'first_as_of_time':
            datetime.datetime(2010, 1, 1, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 1, 0, 0),
                datetime.datetime(2010, 1, 2, 0, 0),
                datetime.datetime(2010, 1, 3, 0, 0),
                datetime.datetime(2010, 1, 4, 0, 0),
                datetime.datetime(2010, 1, 5, 0, 0)
            ]
        },
        'test_matrices': [{
            'first_as_of_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 6, 0, 0),
                datetime.datetime(2010, 1, 7, 0, 0),
                datetime.datetime(2010, 1, 8, 0, 0),
                datetime.datetime(2010, 1, 9, 0, 0),
                datetime.datetime(2010, 1, 10, 0, 0)
            ]
        }]
    }, {
        'feature_start_time':
        datetime.datetime(1990, 1, 1, 0, 0),
        'modeling_start_time':
        datetime.datetime(2010, 1, 1, 0, 0),
        'modeling_end_time':
        datetime.datetime(2010, 1, 16, 0, 0),
        'train_matrix': {
            'first_as_of_time':
            datetime.datetime(2010, 1, 6, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 6, 0, 0),
                datetime.datetime(2010, 1, 7, 0, 0),
                datetime.datetime(2010, 1, 8, 0, 0),
                datetime.datetime(2010, 1, 9, 0, 0),
                datetime.datetime(2010, 1, 10, 0, 0)
            ]
        },
        'test_matrices': [{
            'first_as_of_time':
            datetime.datetime(2010, 1, 11, 0, 0),
            'matrix_info_end_time':
            datetime.datetime(2010, 1, 16, 0, 0),
            'as_of_times': [
                datetime.datetime(2010, 1, 11, 0, 0),
                datetime.datetime(2010, 1, 12, 0, 0),
                datetime.datetime(2010, 1, 13, 0, 0),
                datetime.datetime(2010, 1, 14, 0, 0),
                datetime.datetime(2010, 1, 15, 0, 0)
            ]
        }]
    }]
    feature_dict_one = {'features0': ['f1', 'f2'], 'features1': ['f1', 'f2']}
    feature_dict_two = {'features2': ['f3', 'f4'], 'features3': ['f5', 'f6']}
    feature_dicts = [feature_dict_one, feature_dict_two]
    planner = Planner(
        feature_start_time=datetime.datetime(2010, 1, 1, 0, 0),
        label_names=['booking'],
        label_types=['binary'],
        states=['state_one AND state_two'],
        db_config=db_config,
        user_metadata={},
        matrix_directory='',  # this test won't write anything
        engine=None  # or look at the db!
    )

    updated_matrix_definitions, build_tasks = planner.generate_plans(
        matrix_set_definitions, feature_dicts)
    # test that it added uuids: we don't much care what they are
    matrix_uuids = []
    for matrix_def in updated_matrix_definitions:
        assert isinstance(matrix_def['train_uuid'], str)
        matrix_uuids.append(matrix_def['train_uuid'])
        for test_uuid in matrix_def['test_uuids']:
            assert isinstance(test_uuid, str)
    assert len(set(matrix_uuids)) == 4

    # not going to assert anything on the keys (uuids), just get out the values
    build_tasks = build_tasks.values()
    assert len(
        build_tasks) == 8  # 2 splits * 2 matrices per split * 2 feature dicts

    assert sum(1 for task in build_tasks
               if task['matrix_type'] == 'train') == 4
    assert sum(1 for task in build_tasks if task['matrix_type'] == 'test') == 4
    assert all(task for task in build_tasks if task['matrix_directory'] == '')
    assert sum(1 for task in build_tasks
               if task['feature_dictionary'] == feature_dict_one) == 4
    assert sum(1 for task in build_tasks
               if task['feature_dictionary'] == feature_dict_two) == 4