Пример #1
0
def test_aggregations_materialize_on(test_engine):
    aggregate_config = {
        "prefix": "aprefix",
        "categoricals": [
            {
                "column": "cat_one",
                "choices": ["good", "bad"],
                "metrics": ["sum"],
                "imputation": {"all": {"type": "null_category"}},
            }
        ],
        "groups": ["entity_id", "zip_code"],
        "intervals": ["all"],
        "knowledge_date_column": "knowledge_date",
        "from_obj": "data",
    }

    feature_generator = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name="features",
    )

    with patch("triage.component.architect.feature_generators.FromObj") as fromobj_mock:
        feature_generator.aggregations([aggregate_config], "2016-01-01", "states")
        fromobj_mock.assert_called_once_with(
            from_obj="data",
            knowledge_date_column="knowledge_date",
            name="features.aprefix"
        )
Пример #2
0
def test_aggregations_materialize_off(test_engine):
    aggregate_config = {
        "prefix":
        "aprefix",
        "categoricals": [{
            "column": "cat_one",
            "choices": ["good", "bad"],
            "metrics": ["sum"],
            "imputation": {
                "all": {
                    "type": "null_category"
                }
            },
        }],
        "intervals": ["all"],
        "knowledge_date_column":
        "knowledge_date",
        "from_obj":
        "data",
    }

    feature_generator = FeatureGenerator(db_engine=test_engine,
                                         features_schema_name="features",
                                         materialize_subquery_fromobjs=False)

    with patch("triage.component.architect.feature_generators.FromObj"
               ) as fromobj_mock:
        feature_generator.aggregations([aggregate_config], "2016-01-01",
                                       "states")
        assert not fromobj_mock.called
Пример #3
0
def test_replace(test_engine):
    aggregate_config = [
        {
            "prefix": "aprefix",
            "aggregates_imputation": {"all": {"type": "mean"}},
            "aggregates": [{"quantity": "quantity_one", "metrics": ["sum", "count"]}],
            "categoricals": [
                {
                    "column": "cat_one",
                    "choices": ["good", "bad"],
                    "metrics": ["sum"],
                    "imputation": {"all": {"type": "null_category"}},
                }
            ],
            "groups": ["entity_id"],
            "intervals": ["all"],
            "knowledge_date_column": "knowledge_date",
            "from_obj": "data",
        }
    ]

    features_schema_name = "features"
    feature_tables = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name=features_schema_name,
        replace=False,
    ).create_all_tables(
        feature_dates=["2013-09-30", "2014-09-30"],
        feature_aggregation_config=aggregate_config,
        state_table="states",
    )

    assert len(feature_tables) == 1
    assert list(feature_tables)[0] == "aprefix_aggregation_imputed"

    feature_generator = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name=features_schema_name,
        replace=False,
    )
    aggregations = feature_generator.aggregations(
        feature_dates=["2013-09-30", "2014-09-30"],
        feature_aggregation_config=aggregate_config,
        state_table="states",
    )
    table_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="aggregation",
    )

    assert len(table_tasks["aprefix_entity_id"]) == 0

    imp_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="imputation",
    )

    assert len(imp_tasks["aprefix_aggregation_imputed"]) == 0
Пример #4
0
def test_replace(test_engine):
    # test the replace=False functionality, wherein we see if the cohort is fully represented
    # in the imputed table and reuse the features if so
    aggregate_config = [
        {
            "prefix": "aprefix",
            "aggregates_imputation": {"all": {"type": "mean"}},
            "aggregates": [{"quantity": "quantity_one", "metrics": ["sum", "count"]}],
            "categoricals": [
                {
                    "column": "cat_one",
                    "choices": ["good", "bad"],
                    "metrics": ["sum"],
                    "imputation": {"all": {"type": "null_category"}},
                }
            ],
            "groups": ["entity_id"],
            "intervals": ["all"],
            "knowledge_date_column": "knowledge_date",
            "from_obj": "data",
        }
    ]

    features_schema_name = "features"
    feature_tables = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name=features_schema_name,
        replace=False,
    ).create_all_tables(
        feature_dates=["2013-09-30", "2014-09-30", "2015-01-01"],
        feature_aggregation_config=aggregate_config,
        state_table="states",
    )

    assert len(feature_tables) == 1
    assert list(feature_tables)[0] == "aprefix_aggregation_imputed"

    # now try and run feature generation with replace=False. We should
    # be able to see that the entire cohort is there and reuse the features
    feature_generator = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name=features_schema_name,
        replace=False,
    )
    aggregations = feature_generator.aggregations(
        feature_dates=["2013-09-30", "2014-09-30", "2015-01-01"],
        feature_aggregation_config=aggregate_config,
        state_table="states",
    )
    table_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="aggregation",
    )

    assert len(table_tasks["aprefix_entity_id"]) == 0
    assert len(table_tasks["aprefix_aggregation"]) == 0

    imp_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="imputation",
    )

    assert len(imp_tasks["aprefix_aggregation_imputed"]) == 0

    # add a new member of the cohort. now we should need to rebuild everything
    test_engine.execute("insert into states values (%s, %s)", 999, "2015-01-01")
    table_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="aggregation",
    )
    assert len(table_tasks["aprefix_entity_id"]) == 3
    assert len(table_tasks["aprefix_aggregation"]) == 3
    feature_generator.process_table_tasks(table_tasks)
    imp_tasks = feature_generator.generate_all_table_tasks(
        aggregations,
        task_type="imputation",
    )

    assert len(imp_tasks["aprefix_aggregation_imputed"]) == 3
Пример #5
0
def test_replace():
    aggregate_config = [{
        'prefix':
        'aprefix',
        'aggregates_imputation': {
            'all': {
                'type': 'mean'
            }
        },
        'aggregates': [
            {
                'quantity': 'quantity_one',
                'metrics': ['sum', 'count']
            },
        ],
        'categoricals': [
            {
                'column': 'cat_one',
                'choices': ['good', 'bad'],
                'metrics': ['sum'],
                'imputation': {
                    'all': {
                        'type': 'null_category'
                    }
                }
            },
        ],
        'groups': ['entity_id'],
        'intervals': ['all'],
        'knowledge_date_column':
        'knowledge_date',
        'from_obj':
        'data'
    }]

    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        setup_db(engine)

        features_schema_name = 'features'
        feature_tables = FeatureGenerator(
            db_engine=engine,
            features_schema_name=features_schema_name,
            replace=False).create_all_tables(
                feature_dates=['2013-09-30', '2014-09-30'],
                feature_aggregation_config=aggregate_config,
                state_table='states')

        assert len(feature_tables) == 1
        assert list(feature_tables)[0] == 'aprefix_aggregation_imputed'

        feature_generator = FeatureGenerator(
            db_engine=engine,
            features_schema_name=features_schema_name,
            replace=False)
        aggregations = feature_generator.aggregations(
            feature_dates=['2013-09-30', '2014-09-30'],
            feature_aggregation_config=aggregate_config,
            state_table='states')
        table_tasks = feature_generator.generate_all_table_tasks(
            aggregations, task_type='aggregation')

        assert len(table_tasks['aprefix_entity_id'].keys()) == 0

        imp_tasks = feature_generator.generate_all_table_tasks(
            aggregations, task_type='imputation')

        assert len(imp_tasks['aprefix_aggregation_imputed'].keys()) == 0

        engine.dispose()