示例#1
0
def test_value_checker_min_max():
    raw_feature_config = {
        "name": "a_long",
        "type": "INT_FEATURE",
        "min": 1,
        "max": 2
    }
    results = sorted(spark_util.value_checker(raw_feature_config),
                     key=lambda x: x["cond_name"])
    for result in results:
        result["cond_col"] = str(result["cond_col"]._jc)

    assert results == [
        {
            "col_name": "a_long",
            "cond_name": "max",
            "cond_col": ("(a_long > 2)"),
            "positive_cond_str": ("(a_long <= 2)"),
        },
        {
            "col_name": "a_long",
            "cond_name": "min",
            "cond_col": ("(a_long < 1)"),
            "positive_cond_str": ("(a_long >= 1)"),
        },
    ]
示例#2
0
def test_value_checker_values():
    raw_feature_config = {
        "name": "a_long",
        "type": "INT_FEATURE",
        "values": [1, 2, 3],
        "required": True,
    }
    results = sorted(spark_util.value_checker(raw_feature_config),
                     key=lambda x: x["cond_name"])
    for result in results:
        result["cond_col"] = str(result["cond_col"]._jc)

    assert results == [
        {
            "col_name": "a_long",
            "cond_name": "required",
            "cond_col": str(F.col("a_long").isNull()._jc),
            "positive_cond_str": str(F.col("a_long").isNotNull()._jc),
        },
        {
            "col_name": "a_long",
            "cond_name": "values",
            "cond_col": str((F.col("a_long").isin([1, 2, 3]) == False)._jc),
            "positive_cond_str": str(F.col("a_long").isin([1, 2, 3])._jc),
        },
    ]
示例#3
0
def test_value_checker_not_required():
    raw_feature_config = {
        "name": "a_str",
        "type": "STRING_FEATURE",
        "required": False
    }
    results = list(spark_util.value_checker(raw_feature_config))
    assert len(results) == 0
示例#4
0
def test_value_checker_required():
    raw_feature_config = {
        "name": "a_str",
        "type": "STRING_FEATURE",
        "required": True
    }
    results = list(spark_util.value_checker(raw_feature_config))
    results[0]["cond_col"] = str(results[0]["cond_col"]._jc)

    assert results == [{
        "col_name":
        "a_str",
        "cond_name":
        "required",
        "cond_col":
        str(F.col("a_str").isNull()._jc),
        "positive_cond_str":
        str(F.col("a_str").isNotNull()._jc),
    }]