def test_init_and_name(es):
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives():
        if issubclass(transform_prim, Compare):
            continue
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = [
                g for s in input_types for g in match(s, features)
            ]
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            instance.head()
Exemplo n.º 2
0
    def __gt__(self, other_feature_or_val):
        """Compares if greater than other_feature_or_val

        See also:
            :meth:`PrimitiveBase.GT`
        """
        from featuretools.primitives import GreaterThan
        return GreaterThan(self, other_feature_or_val)
Exemplo n.º 3
0
def test_override_boolean(es):
    count = Count(es['log']['value'], es['sessions'])
    count_lo = GreaterThan(count, 1)
    count_hi = LessThan(count, 10)

    to_test = [[True, True, True], [True, True, False], [False, False, True]]

    features = []
    features.append(count_lo.OR(count_hi))
    features.append(count_lo.AND(count_hi))
    features.append(~(count_lo.AND(count_hi)))

    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2],
                                               time_last=None)
    for i, test in enumerate(to_test):
        v = df[features[i].get_name()].values.tolist()
        assert v == test
def test_cum_sum_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_sum = CumSum(log_value_feat, dfeat, where=compare_feat)
    features = [cum_sum]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)
    cvalues = df[cum_sum.get_name()].values
    assert len(cvalues) == 15
    cum_sum_values = [0, 5, 15, 30, 50, 50, 50, 50, 50, 50, 0, 5, 5, 12, 26]
    for i, v in enumerate(cum_sum_values):
        if not np.isnan(v):
            assert v == cvalues[i]
        else:
            assert (np.isnan(cvalues[i]))
def test_cum_sum_use_previous_and_where_absolute(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_sum = CumSum(log_value_feat, dfeat, es["log"]["datetime"],
                     where=compare_feat,
                     use_previous=Timedelta(40, 'seconds'))
    features = [cum_sum]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)

    cum_sum_values = [0, 5, 15, 30, 50, 0, 0, 0, 0, 0,
                      0, 5, 0, 7, 21]
    cvalues = df[cum_sum.get_name()].values
    assert len(cvalues) == 15
    for i, v in enumerate(cum_sum_values):
        assert v == cvalues[i]
def test_cum_mean_use_previous_and_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    # todo should this be cummean?
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_mean = CumMean(log_value_feat, dfeat,
                       where=compare_feat,
                       use_previous=Timedelta(2, 'observations',
                                              entity=es['log']))
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)

    cum_mean_values = [0, 5, 7.5, 12.5, 17.5, 17.5, 17.5, 17.5, 17.5, 17.5,
                       0, 5, 5, 6, 10.5]
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    for i, v in enumerate(cum_mean_values):
        assert v == cvalues[i]
Exemplo n.º 7
0
def test_init_and_name(es):
    from featuretools import calculate_feature_matrix
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features)
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            calculate_feature_matrix([instance], entityset=es).head(5)