def test_config_fill_values():
    vector_fill_values = ["1.0 0.0 1.04 10.49", "1 2 3 4 5" "0" "1.0" ""]
    binary_fill_values = ["yes", "No", "1", "TRUE", 1]
    for vector_fill_value, binary_fill_value in zip(vector_fill_values,
                                                    binary_fill_values):
        config = {
            "input_features": [
                vector_feature(
                    preprocessing={"fill_value": vector_fill_value}),
            ],
            "output_features":
            [binary_feature(preprocessing={"fill_value": binary_fill_value})],
        }
        validate_config(config)

    bad_vector_fill_values = ["one two three", "1,2,3", 0]
    bad_binary_fill_values = ["one", 2, "maybe"]
    for vector_fill_value, binary_fill_value in zip(bad_vector_fill_values,
                                                    bad_binary_fill_values):
        config = {
            "input_features": [
                vector_feature(
                    preprocessing={"fill_value": vector_fill_value}),
            ],
            "output_features":
            [binary_feature(preprocessing={"fill_value": binary_fill_value})],
        }
        with pytest.raises(ValidationError):
            validate_config(config)
def test_config_bad_preprocessing_param():
    config = {
        "input_features": [
            sequence_feature(reduce_output="sum", encoder="fake"),
            image_feature(
                "/tmp/destination_folder",
                preprocessing={
                    "in_memory": True,
                    "height": 12,
                    "width": 12,
                    "num_channels": 3,
                    "tokenizer": "space",
                },
            ),
        ],
        "output_features":
        [category_feature(vocab_size=2, reduce_input="sum")],
        "combiner": {
            "type": "concat",
            "output_size": 14
        },
    }

    with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"):
        validate_config(config)
示例#3
0
def test_config_input_output_features():
    config = {
        "input_features": [
            category_feature(),
            number_feature(),
        ],
        "output_features": [binary_feature()],
    }

    validate_config(config)
示例#4
0
def test_incorrect_output_features_config():

    config = {
        "input_features": [
            number_feature(),
        ],
        "output_features": [binary_feature(decoder="classifier")],
    }

    # Invalid decoder for binary output feature
    with pytest.raises(ValidationError):
        validate_config(config)
def test_config_bad_encoder_name():
    config = {
        "input_features":
        [sequence_feature(reduce_output="sum", encoder="fake")],
        "output_features":
        [category_feature(vocab_size=2, reduce_input="sum")],
        "combiner": {
            "type": "concat",
            "output_size": 14
        },
    }

    with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"):
        validate_config(config)
def test_config_encoders():
    for encoder in ENCODERS:
        config = {
            "input_features": [
                sequence_feature(reduce_output="sum", encoder=encoder),
                image_feature("/tmp/destination_folder"),
            ],
            "output_features":
            [category_feature(vocab_size=2, reduce_input="sum")],
            "combiner": {
                "type": "concat",
                "output_size": 14
            },
        }
        validate_config(config)
def test_validate_with_preprocessing_defaults():
    config = {
        "input_features": [
            audio_feature(
                "/tmp/destination_folder",
                preprocessing=AudioFeatureMixin.preprocessing_defaults()),
            bag_feature(
                preprocessing=BagFeatureMixin.preprocessing_defaults()),
            binary_feature(
                preprocessing=BinaryFeatureMixin.preprocessing_defaults()),
            category_feature(
                preprocessing=CategoryFeatureMixin.preprocessing_defaults()),
            date_feature(
                preprocessing=DateFeatureMixin.preprocessing_defaults()),
            h3_feature(preprocessing=H3FeatureMixin.preprocessing_defaults()),
            image_feature(
                "/tmp/destination_folder",
                preprocessing=ImageFeatureMixin.preprocessing_defaults()),
            number_feature(
                preprocessing=NumberFeatureMixin.preprocessing_defaults()),
            sequence_feature(
                preprocessing=SequenceFeatureMixin.preprocessing_defaults()),
            set_feature(
                preprocessing=SetFeatureMixin.preprocessing_defaults()),
            text_feature(
                preprocessing=TextFeatureMixin.preprocessing_defaults()),
            timeseries_feature(
                preprocessing=TimeseriesFeatureMixin.preprocessing_defaults()),
            vector_feature(
                preprocessing=VectorFeatureMixin.preprocessing_defaults()),
        ],
        "output_features": [{
            "name": "target",
            "type": "category"
        }],
        TRAINER: {
            "decay": True,
            "learning_rate": 0.001,
            "validation_field": "target",
            "validation_metric": "accuracy",
        },
    }

    validate_config(config)
    config = merge_with_defaults(config)
    validate_config(config)
示例#8
0
def test_incorrect_input_features_config():
    config = {
        "input_features": [
            category_feature(preprocessing={"normalization": "zscore"}),
        ],
        "output_features": [binary_feature()],
    }

    # Not a preprocessing param for category feature
    with pytest.raises(ValidationError):
        validate_config(config)

    config = {
        "input_features": [
            text_feature(preprocessing={"padding_symbol": 0}),
        ],
        "output_features": [binary_feature()],
    }

    # Incorrect type for padding_symbol preprocessing param
    with pytest.raises(ValidationError):
        validate_config(config)

    config = {
        "input_features": [
            binary_feature(),
        ],
        "output_features": [binary_feature()],
    }
    del config["input_features"][0]["type"]

    # Incorrect type for padding_symbol preprocessing param
    with pytest.raises(ValidationError):
        validate_config(config)
示例#9
0
def test_config_tabnet(eval_batch_size):
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
            "size": 24,
            "output_size": 26,
            "sparsity": 0.000001,
            "bn_virtual_divider": 32,
            "bn_momentum": 0.4,
            "num_steps": 5,
            "relaxation_factor": 1.5,
            "use_keras_batch_norm": False,
            "bn_virtual_bs": 512,
        },
        TRAINER: {
            "batch_size": 16384,
            "eval_batch_size": eval_batch_size,
            "epochs": 1000,
            "early_stop": 20,
            "learning_rate": 0.02,
            "optimizer": {
                "type": "adam"
            },
            "decay": True,
            "decay_steps": 20000,
            "decay_rate": 0.9,
            "staircase": True,
            "regularization_lambda": 1,
            "regularization_type": "l2",
            "validation_field": "label",
        },
    }
    validate_config(config)
def test_config_features():
    all_input_features = [
        audio_feature("/tmp/destination_folder"),
        bag_feature(),
        binary_feature(),
        category_feature(),
        date_feature(),
        h3_feature(),
        image_feature("/tmp/destination_folder"),
        number_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        timeseries_feature(),
        vector_feature(),
    ]
    all_output_features = [
        binary_feature(),
        category_feature(),
        number_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        vector_feature(),
    ]

    # validate config with all features
    config = {
        "input_features": all_input_features,
        "output_features": all_output_features,
    }
    validate_config(config)

    # make sure all defaults provided also registers as valid
    config = merge_with_defaults(config)
    validate_config(config)

    # test various invalid output features
    input_only_features = [
        feature for feature in all_input_features
        if feature["type"] not in output_type_registry.keys()
    ]
    for input_feature in input_only_features:
        config = {
            "input_features": all_input_features,
            "output_features": all_output_features + [input_feature],
        }

        dtype = input_feature["type"]
        with pytest.raises(ValidationError,
                           match=rf"^'{dtype}' is not one of .*"):
            validate_config(config)
def test_config_trainer_empty_null_and_default():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
        },
        TRAINER: {},
    }
    validate_config(config)

    config[TRAINER] = None
    with pytest.raises(ValidationError):
        validate_config(config)

    config[TRAINER] = ECDTrainerConfig.Schema().dump({})
    validate_config(config)
def test_optimizer_property_validation():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
        },
        TRAINER: {},
    }
    validate_config(config)

    # Test that an optimizer's property types are enforced:
    config[TRAINER]["optimizer"] = {"type": "rmsprop"}
    validate_config(config)

    config[TRAINER]["optimizer"]["momentum"] = "invalid"
    with pytest.raises(ValidationError):
        validate_config(config)

    # Test extra keys are excluded and defaults are loaded appropriately:
    config[TRAINER]["optimizer"]["momentum"] = 10
    config[TRAINER]["optimizer"]["extra_key"] = "invalid"
    validate_config(config)
    assert not hasattr(
        ECDTrainerConfig.Schema().load(config[TRAINER]).optimizer, "extra_key")

    # Test bad parameter range:
    config[TRAINER]["optimizer"] = {"type": "rmsprop", "eps": -1}
    with pytest.raises(ValidationError):
        validate_config(config)

    # Test config validation for tuple types:
    config[TRAINER]["optimizer"] = {"type": "adam", "betas": (0.1, 0.1)}
    validate_config(config)
def test_config_trainer_bad_optimizer():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
        },
        TRAINER: {},
    }
    validate_config(config)

    # Test manually set-to-null optimizer vs unspecified:
    config[TRAINER]["optimizer"] = None
    with pytest.raises(ValidationError):
        validate_config(config)
    assert ECDTrainerConfig.Schema().load({}).optimizer is not None

    # Test all types in optimizer_registry supported:
    for key in optimizer_registry.keys():
        config[TRAINER]["optimizer"] = {"type": key}
        validate_config(config)

    # Test invalid optimizer type:
    config[TRAINER]["optimizer"] = {"type": 0}
    with pytest.raises(ValidationError):
        validate_config(config)
    config[TRAINER]["optimizer"] = {"type": {}}
    with pytest.raises(ValidationError):
        validate_config(config)
    config[TRAINER]["optimizer"] = {"type": "invalid"}
    with pytest.raises(ValidationError):
        validate_config(config)
def test_clipper_property_validation():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
        },
        TRAINER: {},
    }
    validate_config(config)

    # Test null/empty clipper:
    config[TRAINER]["gradient_clipping"] = None
    validate_config(config)
    config[TRAINER]["gradient_clipping"] = {}
    validate_config(config)
    assert (ECDTrainerConfig.Schema().load(
        config[TRAINER]).gradient_clipping == ECDTrainerConfig.Schema().load(
            {}).gradient_clipping)

    # Test invalid clipper type:
    config[TRAINER]["gradient_clipping"] = 0
    with pytest.raises(ValidationError):
        validate_config(config)
    config[TRAINER]["gradient_clipping"] = "invalid"
    with pytest.raises(ValidationError):
        validate_config(config)

    # Test that an optimizer's property types are enforced:
    config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": None}
    validate_config(config)
    config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": 1}
    validate_config(config)
    config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": "invalid"}
    with pytest.raises(ValidationError):
        validate_config(config)

    # Test extra keys are excluded and defaults are loaded appropriately:
    config[TRAINER]["gradient_clipping"] = {"clipnorm": 1}
    config[TRAINER]["gradient_clipping"]["extra_key"] = "invalid"
    validate_config(config)
    assert not hasattr(
        ECDTrainerConfig.Schema().load(config[TRAINER]).gradient_clipping,
        "extra_key")
示例#15
0
def test_config_bad_combiner_types_enums():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "concat",
            "weights_initializer": "zeros"
        },
    }

    # config is valid at this point
    validate_config(config)

    # Test weights initializer:
    config["combiner"]["weights_initializer"] = {"test": "fail"}
    with pytest.raises(ValidationError, match=r"{'test': 'fail'} is not of*"):
        validate_config(config)
    config["combiner"]["weights_initializer"] = "fail"
    with pytest.raises(ValidationError, match=r"'fail' is not of*"):
        validate_config(config)
    config["combiner"]["weights_initializer"] = {}
    with pytest.raises(ValidationError, match=r"Failed validating 'type'"):
        validate_config(config)
    config["combiner"]["weights_initializer"] = {"type": "fail"}
    with pytest.raises(ValidationError, match=r"'fail' is not one of*"):
        validate_config(config)
    config["combiner"]["weights_initializer"] = {"type": "normal", "stddev": 0}
    validate_config(config)

    # Test bias initializer:
    del config["combiner"]["weights_initializer"]
    config["combiner"]["bias_initializer"] = "kaiming_uniform"
    validate_config(config)
    config["combiner"]["bias_initializer"] = "fail"
    with pytest.raises(ValidationError, match=r"'fail' is not of*"):
        validate_config(config)
    config["combiner"]["bias_initializer"] = {}
    with pytest.raises(ValidationError, match=r"Failed validating 'type'"):
        validate_config(config)
    config["combiner"]["bias_initializer"] = {"type": "fail"}
    with pytest.raises(ValidationError, match=r"'fail' is not one of*"):
        validate_config(config)
    config["combiner"]["bias_initializer"] = {"type": "zeros", "stddev": 0}
    validate_config(config)

    # Test norm:
    del config["combiner"]["bias_initializer"]
    config["combiner"]["norm"] = "batch"
    validate_config(config)
    config["combiner"]["norm"] = "fail"
    with pytest.raises(ValidationError, match=r"'fail' is not one of*"):
        validate_config(config)

    # Test activation:
    del config["combiner"]["norm"]
    config["combiner"]["activation"] = "relu"
    validate_config(config)
    config["combiner"]["activation"] = 123
    with pytest.raises(ValidationError, match=r"123 is not of type*"):
        validate_config(config)

    # Test reduce_output:
    del config["combiner"]["activation"]
    config2 = {**config}
    config2["combiner"]["type"] = "tabtransformer"
    config2["combiner"]["reduce_output"] = "sum"
    validate_config(config)
    config2["combiner"]["reduce_output"] = "fail"
    with pytest.raises(ValidationError, match=r"'fail' is not one of*"):
        validate_config(config2)

    # Test reduce_output = None:
    config2["combiner"]["reduce_output"] = None
    validate_config(config2)
示例#16
0
def test_config_bad_combiner():
    config = {
        "input_features": [
            category_feature(vocab_size=2, reduce_input="sum"),
            number_feature(),
        ],
        "output_features": [binary_feature(weight_regularization=None)],
        "combiner": {
            "type": "tabnet",
        },
    }

    # config is valid at this point
    validate_config(config)

    # combiner without type
    del config["combiner"]["type"]
    with pytest.raises(ValidationError, match=r"^'type' is a required .*"):
        validate_config(config)

    # bad combiner type
    config["combiner"]["type"] = "fake"
    with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"):
        validate_config(config)

    # bad combiner format (list instead of dict)
    config["combiner"] = [{"type": "tabnet"}]
    with pytest.raises(ValidationError,
                       match=r"^\[\{'type': 'tabnet'\}\] is not of .*"):
        validate_config(config)

    # bad combiner parameter types
    config["combiner"] = {
        "type": "tabtransformer",
        "num_layers": 10,
        "dropout": False,
    }
    with pytest.raises(ValidationError, match=r"^False is not of type.*"):
        validate_config(config)

    # bad combiner parameter range
    config["combiner"] = {
        "type": "transformer",
        "dropout": -1,
    }
    with pytest.raises(ValidationError, match=r"less than the minimum.*"):
        validate_config(config)