def test_init_and_name(es): log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives(): if issubclass(transform_prim, Compare): continue # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = [ g for s in input_types for g in match(s, features) ] else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() instance.head()
def __gt__(self, other_feature_or_val): """Compares if greater than other_feature_or_val See also: :meth:`PrimitiveBase.GT` """ from featuretools.primitives import GreaterThan return GreaterThan(self, other_feature_or_val)
def test_cum_sum_where(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_sum = CumSum(log_value_feat, dfeat, where=compare_feat) features = [cum_sum] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cvalues = df[cum_sum.get_name()].values assert len(cvalues) == 15 cum_sum_values = [0, 5, 15, 30, 50, 50, 50, 50, 50, 50, 0, 5, 5, 12, 26] for i, v in enumerate(cum_sum_values): if not np.isnan(v): assert v == cvalues[i] else: assert (np.isnan(cvalues[i]))
def test_override_boolean(es): count = Count(es['log']['value'], es['sessions']) count_lo = GreaterThan(count, 1) count_hi = LessThan(count, 10) to_test = [[True, True, True], [True, True, False], [False, False, True]] features = [] features.append(count_lo.OR(count_hi)) features.append(count_lo.AND(count_hi)) features.append(~(count_lo.AND(count_hi))) pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2], time_last=None) for i, test in enumerate(to_test): v = df[features[i].get_name()].values.tolist() assert v == test
def test_cum_sum_use_previous_and_where_absolute(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_sum = CumSum(log_value_feat, dfeat, es["log"]["datetime"], where=compare_feat, use_previous=Timedelta(40, 'seconds')) features = [cum_sum] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cum_sum_values = [0, 5, 15, 30, 50, 0, 0, 0, 0, 0, 0, 5, 0, 7, 21] cvalues = df[cum_sum.get_name()].values assert len(cvalues) == 15 for i, v in enumerate(cum_sum_values): assert v == cvalues[i]
def test_cum_mean_use_previous_and_where(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) # todo should this be cummean? dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_mean = CumMean(log_value_feat, dfeat, where=compare_feat, use_previous=Timedelta(2, 'observations', entity=es['log'])) features = [cum_mean] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cum_mean_values = [0, 5, 7.5, 12.5, 17.5, 17.5, 17.5, 17.5, 17.5, 17.5, 0, 5, 5, 6, 10.5] cvalues = df[cum_mean.get_name()].values assert len(cvalues) == 15 for i, v in enumerate(cum_mean_values): assert v == cvalues[i]
def test_init_and_name(es): from featuretools import calculate_feature_matrix log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives().values(): # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features) else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() calculate_feature_matrix([instance], entityset=es).head(5)