def test_diff(es): value = IdentityFeature(es['log']['value']) customer_id_feat = \ DirectFeature(es['sessions']['customer_id'], child_entity=es['log']) diff1 = Diff(value, es['log']['session_id']) diff2 = Diff(value, customer_id_feat) pandas_backend = PandasBackend(es, [diff1, diff2]) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) val1 = df[diff1.get_name()].values.tolist() val2 = df[diff2.get_name()].values.tolist() correct_vals1 = [ np.nan, 5, 5, 5, 5, np.nan, 1, 1, 1, np.nan, np.nan, 5, np.nan, 7, 7 ] correct_vals2 = [np.nan, 5, 5, 5, 5, -20, 1, 1, 1, -3, np.nan, 5, -5, 7, 7] for i, v in enumerate(val1): v1 = val1[i] if np.isnan(v1): assert (np.isnan(correct_vals1[i])) else: assert v1 == correct_vals1[i] v2 = val2[i] if np.isnan(v2): assert (np.isnan(correct_vals2[i])) else: assert v2 == correct_vals2[i]
def test_diff_single_value(es): diff = Diff(es['stores']['num_square_feet'], es['stores']['region_id']) pandas_backend = PandasBackend(es, [diff]) df = pandas_backend.calculate_all_features(instance_ids=[5], time_last=None) assert df.shape[0] == 1 assert df[diff.get_name()].dropna().shape[0] == 0
def test_isnull_feat(es): value = IdentityFeature(es['log']['value']) diff = Diff(value, es['log']['session_id']) isnull = IsNull(diff) features = [isnull] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(15), None) # correct_vals_diff = [ # np.nan, 5, 5, 5, 5, np.nan, 1, 1, 1, np.nan, np.nan, 5, np.nan, 7, 7] correct_vals = [True, False, False, False, False, True, False, False, False, True, True, False, True, False, False] values = df[isnull.get_name()].values.tolist() assert correct_vals == values
def test_arithmetic_of_transform(es): diff1 = Diff(IdentityFeature(es['log']['value']), IdentityFeature(es['log']['product_id'])) diff2 = Diff(IdentityFeature(es['log']['value_2']), IdentityFeature(es['log']['product_id'])) to_test = [(Add, [np.nan, 14., -7., 3.]), (Subtract, [np.nan, 6., -3., 1.]), (Multiply, [np.nan, 40., 10., 2.]), (Divide, [np.nan, 2.5, 2.5, 2.])] features = [] for test in to_test: features.append(test[0](diff1, diff2)) pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=[0, 2, 11, 13], time_last=None) for i, test in enumerate(to_test): v = df[features[i].get_name()].values.tolist() assert np.isnan(v.pop(0)) assert np.isnan(test[1].pop(0)) assert v == test[1]