def test_value_checker_min_max(): raw_feature_config = { "name": "a_long", "type": "INT_FEATURE", "min": 1, "max": 2 } results = sorted(spark_util.value_checker(raw_feature_config), key=lambda x: x["cond_name"]) for result in results: result["cond_col"] = str(result["cond_col"]._jc) assert results == [ { "col_name": "a_long", "cond_name": "max", "cond_col": ("(a_long > 2)"), "positive_cond_str": ("(a_long <= 2)"), }, { "col_name": "a_long", "cond_name": "min", "cond_col": ("(a_long < 1)"), "positive_cond_str": ("(a_long >= 1)"), }, ]
def test_value_checker_values(): raw_feature_config = { "name": "a_long", "type": "INT_FEATURE", "values": [1, 2, 3], "required": True, } results = sorted(spark_util.value_checker(raw_feature_config), key=lambda x: x["cond_name"]) for result in results: result["cond_col"] = str(result["cond_col"]._jc) assert results == [ { "col_name": "a_long", "cond_name": "required", "cond_col": str(F.col("a_long").isNull()._jc), "positive_cond_str": str(F.col("a_long").isNotNull()._jc), }, { "col_name": "a_long", "cond_name": "values", "cond_col": str((F.col("a_long").isin([1, 2, 3]) == False)._jc), "positive_cond_str": str(F.col("a_long").isin([1, 2, 3])._jc), }, ]
def test_value_checker_not_required(): raw_feature_config = { "name": "a_str", "type": "STRING_FEATURE", "required": False } results = list(spark_util.value_checker(raw_feature_config)) assert len(results) == 0
def test_value_checker_required(): raw_feature_config = { "name": "a_str", "type": "STRING_FEATURE", "required": True } results = list(spark_util.value_checker(raw_feature_config)) results[0]["cond_col"] = str(results[0]["cond_col"]._jc) assert results == [{ "col_name": "a_str", "cond_name": "required", "cond_col": str(F.col("a_str").isNull()._jc), "positive_cond_str": str(F.col("a_str").isNotNull()._jc), }]