def test_make_trans_feat(es): f = Hour(es['log']['datetime']) pandas_backend = PandasBackend(es, [f]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=None) v = df[f.get_name()][0] assert v == 10
def test_seed_features(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) dfs_obj = DeepFeatureSynthesis( target_entity_id='sessions', entityset=es, agg_primitives=[Last], trans_primitives=[], max_depth=2, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features]
def test_dfs_builds_on_seed_features_more_than_max_depth(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) # Depth of this feat is 2 relative to session_agg, the seed feature, # which is greater than max_depth so it shouldn't be built session_agg_trans = DirectFeature(Count(session_agg, es['customers']), es['sessions']) dfs_obj = DeepFeatureSynthesis( target_entity_id='sessions', entityset=es, agg_primitives=[Last, Count], trans_primitives=[], max_depth=1, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features] assert session_agg_trans.get_name() not in [f.get_name() for f in features]
def test_arithmetic(es): # P TODO: return hour = Hour(es['log']['datetime']) day = Day(es['log']['datetime']) to_test = [(Add, [19, 19, 19, 19]), (Subtract, [-1, -1, -1, -1]), (Multiply, [90, 90, 90, 90]), (Divide, [.9, .9, .9, .9])] features = [] features.append(day + hour) features.append(day - hour) features.append(day * hour) features.append(day / hour) pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=[0, 3, 5, 7], time_last=None) for i, test in enumerate(to_test): v = df[features[i].get_name()].values.tolist() assert v == test[1]
def test_overrides(es): # P TODO: return hour = Hour(es['log']['datetime']) day = Day(es['log']['datetime']) feats = [Add, Subtract, Multiply, Divide, Mod, And, Or] compare_ops = [ GreaterThan, LessThan, Equals, NotEquals, GreaterThanEqualTo, LessThanEqualTo ] assert Negate(hour).hash() == (-hour).hash() compares = [(hour, hour), (hour, day), (day, 2)] overrides = [ hour + hour, hour - hour, hour * hour, hour / hour, hour % hour, hour & hour, hour | hour, hour > hour, hour < hour, hour == hour, hour != hour, hour >= hour, hour <= hour, hour + day, hour - day, hour * day, hour / day, hour % day, hour & day, hour | day, hour > day, hour < day, hour == day, hour != day, hour >= day, hour <= day, day + 2, day - 2, day * 2, day / 2, day % 2, day & 2, day | 2, day > 2, day < 2, day == 2, day != 2, day >= 2, day <= 2, ] i = 0 for left, right in compares: for feat in feats: f = feat(left, right) o = overrides[i] assert o.hash() == f.hash() i += 1 for compare_op in compare_ops: f = compare_op(left, right) o = overrides[i] assert o.hash() == f.hash() i += 1 our_reverse_overrides = [ 2 + day, 2 - day, 2 * day, 2 / day, 2 & day, 2 | day ] i = 0 for feat in feats: if feat != Mod: f = feat(2, day) o = our_reverse_overrides[i] assert o.hash() == f.hash() i += 1 python_reverse_overrides = [ 2 < day, 2 > day, 2 == day, 2 != day, 2 <= day, 2 >= day ] i = 0 for compare_op in compare_ops: f = compare_op(day, 2) o = python_reverse_overrides[i] assert o.hash() == f.hash() i += 1