Exemplo n.º 1
0
def test_index_column_lookup(test_engine):
    aggregations = [
        SpacetimeAggregation(
            prefix="prefix1",
            aggregates=[
                Categorical(
                    col="cat_one",
                    function="sum",
                    choices=["good", "bad", "inbetween"],
                    impute_rules={"coltype": "categorical", "all": {"type": "zero"}},
                )
            ],
            groups=["entity_id"],
            intervals=["all"],
            date_column="knowledge_date",
            output_date_column="as_of_date",
            dates=["2013-09-30", "2014-09-30"],
            state_table="states",
            state_group="entity_id",
            schema="features",
            from_obj="data",
        ),
        SpacetimeAggregation(
            prefix="prefix2",
            aggregates=[
                Aggregate(
                    quantity="quantity_one",
                    function="count",
                    impute_rules={"coltype": "aggregate", "all": {"type": "zero"}},
                )
            ],
            groups=["entity_id", "zip_code"],
            intervals=["all"],
            date_column="knowledge_date",
            output_date_column="as_of_date",
            dates=["2013-09-30", "2014-09-30"],
            state_table="states",
            state_group="entity_id",
            schema="features",
            from_obj="data",
        ),
    ]

    features_schema_name = "features"
    feature_generator = FeatureGenerator(
        db_engine=test_engine,
        features_schema_name=features_schema_name,
    )
    lookup = feature_generator.index_column_lookup(aggregations)
    assert lookup == {
        "prefix1_aggregation_imputed": ["as_of_date", "entity_id"],
        "prefix2_aggregation_imputed": ["as_of_date", "entity_id", "zip_code"],
    }
Exemplo n.º 2
0
def test_index_column_lookup():
    aggregations = [
        SpacetimeAggregation(prefix='prefix1',
                             aggregates=[
                                 Categorical(
                                     col='cat_one',
                                     function='sum',
                                     choices=['good', 'bad', 'inbetween'],
                                     impute_rules={
                                         'coltype': 'categorical',
                                         'all': {
                                             'type': 'zero'
                                         }
                                     })
                             ],
                             groups=['entity_id'],
                             intervals=['all'],
                             date_column='knowledge_date',
                             output_date_column='as_of_date',
                             dates=['2013-09-30', '2014-09-30'],
                             state_table='states',
                             state_group='entity_id',
                             schema='features',
                             from_obj='data'),
        SpacetimeAggregation(prefix='prefix2',
                             aggregates=[
                                 Aggregate(quantity='quantity_one',
                                           function='count',
                                           impute_rules={
                                               'coltype': 'aggregate',
                                               'all': {
                                                   'type': 'zero'
                                               }
                                           })
                             ],
                             groups=['entity_id', 'zip_code'],
                             intervals=['all'],
                             date_column='knowledge_date',
                             output_date_column='as_of_date',
                             dates=['2013-09-30', '2014-09-30'],
                             state_table='states',
                             state_group='entity_id',
                             schema='features',
                             from_obj='data')
    ]
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        setup_db(engine)

        features_schema_name = 'features'
        feature_generator = FeatureGenerator(
            db_engine=engine, features_schema_name=features_schema_name)
        lookup = feature_generator.index_column_lookup(aggregations)
        assert lookup == {
            'prefix1_aggregation_imputed': [
                'as_of_date',
                'entity_id',
            ],
            'prefix2_aggregation_imputed':
            ['as_of_date', 'entity_id', 'zip_code']
        }