def test_make_agg_feat_using_prev_n_events(entityset, backend): agg_feat_1 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(1, 'observations', entity=entityset['log'])) agg_feat_2 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(3, 'observations', entity=entityset['log'])) assert agg_feat_1.get_name() != agg_feat_2.get_name(), \ 'Features should have different names based on use_previous' pandas_backend = backend([agg_feat_1, agg_feat_2]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime( 2011, 4, 9, 10, 30, 6)) # time_last is included by default v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 5 assert v2 == 0 df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime( 2011, 4, 9, 10, 30, 30)) v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 20 assert v2 == 10
def test_make_agg_feat_using_prev_n_events(entityset, backend): agg_feat_1 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(1, 'observations', entity=entityset['log'])) agg_feat_2 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(3, 'observations', entity=entityset['log'])) assert agg_feat_1.get_name() != agg_feat_2.get_name(), \ 'Features should have different names based on use_previous' pandas_backend = backend([agg_feat_1, agg_feat_2]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime(2011, 4, 9, 10, 30, 6)) # time_last is included by default v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 5 assert v2 == 0 df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime(2011, 4, 9, 10, 30, 30)) v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 20 assert v2 == 10
def test_approx_base_feature_is_also_first_class_feature(entityset): es = entityset log_to_products = DirectFeature(es['products']['rating'], es['log']) # This should still be computed properly agg_feat = Min(log_to_products, es['sessions']) customer_agg_feat = Sum(agg_feat, es['customers']) # This is to be approximated sess_to_cust = DirectFeature(customer_agg_feat, es['sessions']) times = [datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0)] cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 2]}) feature_matrix = calculate_feature_matrix([sess_to_cust, agg_feat], entityset, approximate=Timedelta(10, 's'), cutoff_time=cutoff_time) vals1 = feature_matrix[sess_to_cust.get_name()].tolist() assert vals1 == [8.5, 7] vals2 = feature_matrix[agg_feat.get_name()].tolist() assert vals2 == [4, 1.5]