Пример #1
0
def test_apply_complex_policies_spark():
    sess = spark_lib.utils.make_session("test.policy.applyComplexPolicies")
    pd_df = pd.DataFrame({
        "name": ["bob", "alice"],
        "val-int": [30, 50],
        "val-float": [32.43424, 56.64543],
        "date": [pd.Timestamp("2018-10-15"),
                 pd.Timestamp("2016-09-10")],
    })
    expected_df = pd.DataFrame({
        "name": [
            "db6063546d5d6c1fd3826bc0a1d8188fa0dae1a174823eac1e8e063a073bf149",
            "4ae0639267ad49c658e8d266aa1caa51c876ed1d7ca788a0749d5189248295eb",
        ],
        "val-int": [25, 56],
        "val-float": [32.4, 56.6],
        # TODO: when these are pd.Timestamp, Spark's date_trunc is causing
        # dtype erasure. We should figure out why that's happening
        "date": [datetime.date(2018, 1, 1),
                 datetime.date(2016, 1, 1)],
    })
    df = sess.createDataFrame(pd_df)

    d = yaml.load(fixtures.complex_y, Loader=yaml.FullLoader)
    p = data.Policy(**d)
    new_df = policy_lib.apply_policy(p, df).toPandas()
    pdt.assert_frame_equal(new_df, expected_df, check_dtype=True)
Пример #2
0
def test_apply_complex_policies_pandas():
    d = yaml.load(fixtures.complex_y, Loader=yaml.FullLoader)

    df = pd.DataFrame({
        "name": ["bob", "alice"],
        "val-int": [30, 50],
        "val-float": [32.43424, 56.64543],
        "date": [pd.Timestamp("2018-10-15"),
                 pd.Timestamp("2016-09-10")],
    })
    expected_df = pd.DataFrame({
        "name": [
            "db6063546d5d6c1fd3826bc0a1d8188fa0dae1a174823eac1e8e063a073bf149",
            "4ae0639267ad49c658e8d266aa1caa51c876ed1d7ca788a0749d5189248295eb",
        ],
        "val-int": [23, 58],
        "val-float": [32.4, 56.6],
        "date": [pd.Timestamp("2018-01-01"),
                 pd.Timestamp("2016-01-01")],
    })

    p = data.Policy(**d)

    new_df = policy_lib.apply_policy(p, df)

    pdt.assert_frame_equal(new_df, expected_df)
Пример #3
0
def test_reverse_helper():
    p = yaml.load(fixtures.reversible_yaml, Loader=yaml.FullLoader)

    policy = policy_lib.parse_policy(p)

    df = pd.DataFrame({"name": ["bob", "alice"]})

    new_df = policy_lib.apply_policy(policy, df)

    new_policy = policy_lib.reverse(policy)

    another_df = policy_lib.apply_policy(new_policy, new_df)

    for transform in new_policy.transformations:
        assert transform.type == pandas_lib.transformations.TokenReverser.identifier

    pdt.assert_frame_equal(df, another_df)
Пример #4
0
def test_secret_in_named_transform():
    d = yaml.load(fixtures.secret_yaml, Loader=yaml.FullLoader)

    df = pd.DataFrame({"name": ["bob", "alice"]})

    p = data.Policy(**d)

    new_df = policy_lib.apply_policy(p, df)

    pdt.assert_frame_equal(new_df, df)
Пример #5
0
def test_apply_policy_pandas():
    pandas_lib.registry.register("plusN", test_utils.PlusN)
    d = yaml.load(fixtures.y, Loader=yaml.FullLoader)

    df = pd.DataFrame(np.ones(5, ), columns=["test"])

    expected_df = df + 3

    p = data.Policy(**d)

    new_df = policy_lib.apply_policy(p, df)

    pdt.assert_frame_equal(new_df, expected_df)
Пример #6
0
def test_named_transformation_spark():
    sess = spark_lib.utils.make_session("test.policy.namedTransformations")
    pd_df = pd.DataFrame(np.ones(5, ), columns=["test"])
    expected_df = pd_df + 3
    df = sess.createDataFrame(pd_df)

    spark_lib.registry.register(test_utils.PlusN.identifier, test_utils.PlusN)
    d = yaml.load(fixtures.named_y, Loader=yaml.FullLoader)
    p = data.Policy(**d)
    new_df = policy_lib.apply_policy(p, df).toPandas()

    pdt.assert_frame_equal(new_df, expected_df)
    del spark_lib.registry._registry[test_utils.PlusN.identifier]