def test_multi_output_features(es):
    value = ft.IdentityFeature(es['log']['product_id'])
    threecommon = ft.primitives.NMostCommon()
    tc = ft.Feature(es['log']['product_id'], parent_entity=es["sessions"], primitive=threecommon)

    features = [tc, value]
    for i in range(3):
        features.append(ft.Feature(tc[i],
                                   parent_entity=es['customers'],
                                   primitive=ft.primitives.NumUnique))
        features.append(tc[i])

    serializer = FeaturesSerializer(features)

    flist = [feat.unique_name() for feat in features]
    fd = [feat.to_dictionary() for feat in features]
    fdict = dict(zip(flist, fd))

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': flist,
        'feature_definitions': fdict
    }
    actual = serializer.to_dict()

    _compare_feature_dicts(expected, actual)
Пример #2
0
def test_feature_use_previous_pd_timedelta(es):
    value = ft.IdentityFeature(es["log"].ww["id"])
    td = pd.Timedelta(12, "W")
    count_primitive = Count()
    count_feature = ft.AggregationFeature(
        value, "customers", count_primitive, use_previous=td
    )
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(), value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {"0": serialize_primitive(count_primitive)}
    expected["feature_definitions"][count_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
Пример #3
0
def test_where_feature_dependency(es):
    max_primitive = Max()
    value = ft.IdentityFeature(es["log"].ww["value"])
    is_purchased = ft.IdentityFeature(es["log"].ww["purchased"])
    max_feature = ft.AggregationFeature(
        value, "sessions", max_primitive, where=is_purchased
    )
    features = [max_feature]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feature.unique_name()],
        "feature_definitions": {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
            is_purchased.unique_name(): is_purchased.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {
        "0": serialize_primitive(max_primitive),
    }
    expected["feature_definitions"][max_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
Пример #4
0
def test_base_features_not_in_list(es):
    max_primitive = Max()
    mult_primitive = MultiplyNumericScalar(value=2)
    value = ft.IdentityFeature(es["log"].ww["value"])
    value_x2 = ft.TransformFeature(value, mult_primitive)
    max_feature = ft.AggregationFeature(value_x2, "sessions", max_primitive)
    features = [max_feature]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feature.unique_name()],
        "feature_definitions": {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value_x2.unique_name(): value_x2.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {
        "0": serialize_primitive(max_primitive),
        "1": serialize_primitive(mult_primitive),
    }
    expected["feature_definitions"][max_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"
    expected["feature_definitions"][value_x2.unique_name()]["arguments"][
        "primitive"
    ] = "1"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
def test_multi_output_features(es):
    product_id = ft.IdentityFeature(es["log"].ww["product_id"])
    threecommon = ft.primitives.NMostCommon()
    tc = ft.Feature(product_id,
                    parent_dataframe_name="sessions",
                    primitive=threecommon)

    features = [tc, product_id]
    for i in range(3):
        features.append(
            ft.Feature(
                tc[i],
                parent_dataframe_name="customers",
                primitive=ft.primitives.NumUnique,
            ))
        features.append(tc[i])

    serializer = FeaturesSerializer(features)

    flist = [feat.unique_name() for feat in features]
    fd = [feat.to_dictionary() for feat in features]
    fdict = dict(zip(flist, fd))

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": flist,
        "feature_definitions": fdict,
    }
    actual = serializer.to_dict()

    _compare_feature_dicts(expected, actual)
Пример #6
0
    def serialize_name_unchanged(original):
        renamed = original.rename('MyFeature')
        assert renamed.get_name() == 'MyFeature'

        serializer = FeaturesSerializer([renamed])
        serialized = serializer.to_dict()

        deserializer = FeaturesDeserializer(serialized)
        deserialized = deserializer.to_list()[0]
        assert deserialized.get_name() == 'MyFeature'
    def serialize_name_unchanged(original):
        new_name = 'MyFeature'
        original_names = original.get_feature_names()
        renamed = original.rename(new_name)
        new_names = [new_name] if len(original_names) == 1 else [new_name + '[{}]'.format(i) for i in range(len(original_names))]
        check_names(renamed, new_name, new_names)

        serializer = FeaturesSerializer([renamed])
        serialized = serializer.to_dict()

        deserializer = FeaturesDeserializer(serialized)
        deserialized = deserializer.to_list()[0]
        check_names(deserialized, new_name, new_names)
Пример #8
0
def test_single_feature(es):
    feature = ft.IdentityFeature(es["log"].ww["value"])
    serializer = FeaturesSerializer([feature])

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [feature.unique_name()],
        "feature_definitions": {feature.unique_name(): feature.to_dictionary()},
        "primitive_definitions": {},
    }

    _compare_feature_dicts(expected, serializer.to_dict())
def test_single_feature(es):
    feature = ft.IdentityFeature(es['log']['value'])
    serializer = FeaturesSerializer([feature])

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [feature.unique_name()],
        'feature_definitions': {
            feature.unique_name(): feature.to_dictionary()
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
def test_base_features_in_list(es):
    value = ft.IdentityFeature(es['log']['value'])
    max_feature = ft.AggregationFeature(value, es['sessions'], ft.primitives.Max)
    features = [max_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [max_feature.unique_name(), value.unique_name()],
        'feature_definitions': {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
def test_feature_use_previous_pd_timedelta(es):
    value = ft.IdentityFeature(es['log']['id'])
    td = pd.Timedelta(12, "W")
    count_feature = ft.AggregationFeature(value, es['customers'], ft.primitives.Count, use_previous=td)
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [count_feature.unique_name(), value.unique_name()],
        'feature_definitions': {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
def test_base_features_in_list(es):
    value = ft.IdentityFeature(es["log"].ww["value"])
    max_feature = ft.AggregationFeature(value, "sessions", ft.primitives.Max)
    features = [max_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feature.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }

    _compare_feature_dicts(expected, serializer.to_dict())
Пример #13
0
def test_base_features_not_in_list(es):
    value = ft.IdentityFeature(es['log'].ww['value'])
    value_x2 = ft.TransformFeature(
        value, ft.primitives.MultiplyNumericScalar(value=2))
    max_feature = ft.AggregationFeature(value_x2, 'sessions',
                                        ft.primitives.Max)
    features = [max_feature]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [max_feature.unique_name()],
        'feature_definitions': {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value_x2.unique_name(): value_x2.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
Пример #14
0
def test_where_feature_dependency(es):
    value = ft.IdentityFeature(es['log'].ww['value'])
    is_purchased = ft.IdentityFeature(es['log'].ww['purchased'])
    max_feature = ft.AggregationFeature(value,
                                        'sessions',
                                        ft.primitives.Max,
                                        where=is_purchased)
    features = [max_feature]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [max_feature.unique_name()],
        'feature_definitions': {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
            is_purchased.unique_name(): is_purchased.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
Пример #15
0
def test_feature_use_previous_pd_dateoffset(es):
    value = ft.IdentityFeature(es['log']['id'])
    do = pd.DateOffset(months=3)
    count_feature = ft.AggregationFeature(value,
                                          es['customers'],
                                          ft.primitives.Count,
                                          use_previous=do)
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [count_feature.unique_name(),
                         value.unique_name()],
        'feature_definitions': {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())

    value = ft.IdentityFeature(es['log']['id'])
    do = pd.DateOffset(months=3, days=2, minutes=30)
    count_feature = ft.AggregationFeature(value,
                                          es['customers'],
                                          ft.primitives.Count,
                                          use_previous=do)
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        'ft_version': ft.__version__,
        'schema_version': SCHEMA_VERSION,
        'entityset': es.to_dictionary(),
        'feature_list': [count_feature.unique_name(),
                         value.unique_name()],
        'feature_definitions': {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        }
    }

    _compare_feature_dicts(expected, serializer.to_dict())
def test_feature_use_previous_pd_dateoffset(es):
    value = ft.IdentityFeature(es["log"].ww["id"])
    do = pd.DateOffset(months=3)
    count_feature = ft.AggregationFeature(value,
                                          "customers",
                                          ft.primitives.Count,
                                          use_previous=do)
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }

    _compare_feature_dicts(expected, serializer.to_dict())

    value = ft.IdentityFeature(es["log"].ww["id"])
    do = pd.DateOffset(months=3, days=2, minutes=30)
    count_feature = ft.AggregationFeature(value,
                                          "customers",
                                          ft.primitives.Count,
                                          use_previous=do)
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }

    _compare_feature_dicts(expected, serializer.to_dict())