def test_incorrect_input_features_config(): config = { "input_features": [ category_feature(preprocessing={"normalization": "zscore"}), ], "output_features": [binary_feature()], } # Not a preprocessing param for category feature with pytest.raises(ValidationError): validate_config(config) config = { "input_features": [ text_feature(preprocessing={"padding_symbol": 0}), ], "output_features": [binary_feature()], } # Incorrect type for padding_symbol preprocessing param with pytest.raises(ValidationError): validate_config(config) config = { "input_features": [ binary_feature(), ], "output_features": [binary_feature()], } del config["input_features"][0]["type"] # Incorrect type for padding_symbol preprocessing param with pytest.raises(ValidationError): validate_config(config)
def test_config_fill_values(): vector_fill_values = ["1.0 0.0 1.04 10.49", "1 2 3 4 5" "0" "1.0" ""] binary_fill_values = ["yes", "No", "1", "TRUE", 1] for vector_fill_value, binary_fill_value in zip(vector_fill_values, binary_fill_values): config = { "input_features": [ vector_feature( preprocessing={"fill_value": vector_fill_value}), ], "output_features": [binary_feature(preprocessing={"fill_value": binary_fill_value})], } validate_config(config) bad_vector_fill_values = ["one two three", "1,2,3", 0] bad_binary_fill_values = ["one", 2, "maybe"] for vector_fill_value, binary_fill_value in zip( vector_fill_values[:3] + bad_vector_fill_values, bad_binary_fill_values + binary_fill_values[:3]): config = { "input_features": [ vector_feature( preprocessing={"fill_value": vector_fill_value}), ], "output_features": [binary_feature(preprocessing={"fill_value": binary_fill_value})], } with pytest.raises(ValidationError): validate_config(config)
def test_missing_values_drop_rows(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) kwargs = {PREPROCESSING: {"missing_value_strategy": DROP_ROW}} input_features = [ number_feature(), binary_feature(), category_feature(vocab_size=3), ] output_features = [ binary_feature(**kwargs), number_feature(**kwargs), category_feature(vocab_size=3, **kwargs), sequence_feature(vocab_size=3, **kwargs), text_feature(vocab_size=3, **kwargs), set_feature(vocab_size=3, **kwargs), vector_feature(), ] backend = LocalTestBackend() config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}} training_data_csv_path = generate_data(input_features, output_features, data_csv_path) df = read_csv_with_nan(training_data_csv_path, nan_percent=0.1) # run preprocessing ludwig_model = LudwigModel(config, backend=backend) ludwig_model.preprocess(dataset=df)
def test_roc_curves_from_test_statistics_vis_api(csv_filename): """Ensure pdf and png figures can be saved via visualization API call. :param csv_filename: csv fixture from tests.fixtures.filenames.csv_filename :return: None """ input_features = [binary_feature(), bag_feature()] output_features = [binary_feature()] encoder = 'parallel_cnn' # Generate test data data_csv = generate_data(input_features, output_features, csv_filename) output_feature_name = output_features[0]['name'] input_features[0]['encoder'] = encoder model = run_api_experiment(input_features, output_features) data_df = read_csv(data_csv) model.train(data_df=data_df) test_stats = model.test(data_df=data_df)[1] viz_outputs = ('pdf', 'png') for viz_output in viz_outputs: vis_output_pattern_pdf = model.exp_dir_name + '/*.{}'.format( viz_output) visualize.roc_curves_from_test_statistics( [test_stats, test_stats], output_feature_name, model_namess=['Model1', 'Model2'], output_directory=model.exp_dir_name, file_format=viz_output) figure_cnt = glob.glob(vis_output_pattern_pdf) assert 1 == len(figure_cnt) shutil.rmtree(model.exp_dir_name, ignore_errors=True)
def test_config_fill_values(): vector_fill_values = ['1.0 0.0 1.04 10.49', '1 2 3 4 5' '0' '1.0' ''] binary_fill_values = ['yes', 'No', '1', 'TRUE', 1] for vector_fill_value, binary_fill_value in zip(vector_fill_values, binary_fill_values): config = { 'input_features': [ vector_feature( preprocessing={'fill_value': vector_fill_value}), ], 'output_features': [binary_feature(preprocessing={'fill_value': binary_fill_value})], } validate_config(config) bad_vector_fill_values = ['one two three', '1,2,3', 0] bad_binary_fill_values = ['one', 2, 'maybe'] for vector_fill_value, binary_fill_value in zip( vector_fill_values[:3] + bad_vector_fill_values, bad_binary_fill_values + binary_fill_values[:3]): config = { 'input_features': [ vector_feature( preprocessing={'fill_value': vector_fill_value}), ], 'output_features': [binary_feature(preprocessing={'fill_value': binary_fill_value})], } with pytest.raises(ValidationError): validate_config(config)
def test_torchscript_e2e_tabnet_combiner(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) # Configure features to be tested: input_features = [ binary_feature(), number_feature(), category_feature(vocab_size=3), bag_feature(vocab_size=3), set_feature(vocab_size=3), ] output_features = [ binary_feature(), number_feature(), category_feature(vocab_size=3), ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, COMBINER: { "type": "tabnet", "num_total_blocks": 2, "num_shared_blocks": 2, }, TRAINER: { "epochs": 2 }, } # Generate training data training_data_csv_path = generate_data(input_features, output_features, data_csv_path) validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)
def test_roc_curves_from_test_statistics_vis_api(csv_filename): """Ensure pdf and png figures can be saved via visualization API call. :param csv_filename: csv fixture from tests.fixtures.filenames.csv_filename :return: None """ input_features = [binary_feature(), bag_feature()] output_features = [binary_feature()] # Generate test data data_csv = generate_data(input_features, output_features, csv_filename) output_feature_name = output_features[0]['name'] model = run_api_experiment(input_features, output_features) data_df = read_csv(data_csv) _, _, output_dir = model.train(dataset=data_df) # extract test metrics test_stats, _, _ = model.evaluate(dataset=data_df, collect_overall_stats=True, output_directory=output_dir) test_stats = test_stats viz_outputs = ('pdf', 'png') for viz_output in viz_outputs: vis_output_pattern_pdf = os.path.join(output_dir, '*.{}'.format( viz_output)) visualize.roc_curves_from_test_statistics( [test_stats, test_stats], output_feature_name, model_names=['Model1', 'Model2'], output_directory=output_dir, file_format=viz_output ) figure_cnt = glob.glob(vis_output_pattern_pdf) assert 1 == len(figure_cnt) shutil.rmtree(output_dir, ignore_errors=True)
def test_config_features(): all_input_features = [ audio_feature('/tmp/destination_folder'), bag_feature(), binary_feature(), category_feature(), date_feature(), h3_feature(), image_feature('/tmp/destination_folder'), numerical_feature(), sequence_feature(), set_feature(), text_feature(), timeseries_feature(), vector_feature(), ] all_output_features = [ binary_feature(), category_feature(), numerical_feature(), sequence_feature(), set_feature(), text_feature(), vector_feature(), ] # validate config with all features config = { 'input_features': all_input_features, 'output_features': all_output_features, } validate_config(config) # make sure all defaults provided also registers as valid config = merge_with_defaults(config) validate_config(config) # test various invalid output features input_only_features = [ feature for feature in all_input_features if feature['type'] not in OUTPUT_FEATURE_TYPES ] for input_feature in input_only_features: config = { 'input_features': all_input_features, 'output_features': all_output_features + [input_feature], } dtype = input_feature['type'] with pytest.raises(ValidationError, match=rf"^'{dtype}' is not one of .*"): validate_config(config)
def test_config_features(): all_input_features = [ audio_feature("/tmp/destination_folder"), bag_feature(), binary_feature(), category_feature(), date_feature(), h3_feature(), image_feature("/tmp/destination_folder"), number_feature(), sequence_feature(), set_feature(), text_feature(), timeseries_feature(), vector_feature(), ] all_output_features = [ binary_feature(), category_feature(), number_feature(), sequence_feature(), set_feature(), text_feature(), vector_feature(), ] # validate config with all features config = { "input_features": all_input_features, "output_features": all_output_features, } validate_config(config) # make sure all defaults provided also registers as valid config = merge_with_defaults(config) validate_config(config) # test various invalid output features input_only_features = [ feature for feature in all_input_features if feature["type"] not in output_type_registry.keys() ] for input_feature in input_only_features: config = { "input_features": all_input_features, "output_features": all_output_features + [input_feature], } dtype = input_feature["type"] with pytest.raises(ValidationError, match=rf"^'{dtype}' is not one of .*"): validate_config(config)
def test_torchscript_e2e_tabular(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) # Configure features to be tested: bin_str_feature = binary_feature() transformed_number_features = [ number_feature(preprocessing={"normalization": numeric_transformer}) for numeric_transformer in numeric_transformation_registry.keys() ] input_features = [ bin_str_feature, binary_feature(), *transformed_number_features, category_feature(vocab_size=3), bag_feature(vocab_size=3), set_feature(vocab_size=3), vector_feature(), # TODO: future support # date_feature(), # h3_feature(), ] output_features = [ bin_str_feature, binary_feature(), number_feature(), category_feature(vocab_size=3), set_feature(vocab_size=3), vector_feature(), sequence_feature(vocab_size=3), text_feature(vocab_size=3), ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 } } # Generate training data training_data_csv_path = generate_data(input_features, output_features, data_csv_path) # Convert bool values to strings, e.g., {'Yes', 'No'} df = pd.read_csv(training_data_csv_path) false_value, true_value = "No", "Yes" df[bin_str_feature[NAME]] = df[bin_str_feature[NAME]].map( lambda x: true_value if x else false_value) df.to_csv(training_data_csv_path) validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)
def test_ray_calibration(calibration): input_features = [ number_feature(normalization="zscore"), set_feature(), binary_feature(), ] output_features = [ binary_feature(calibration=calibration), category_feature(vocab_size=3, calibration=calibration), ] run_test_with_features( input_features, output_features, )
def test_validate_with_preprocessing_defaults(): config = { "input_features": [ audio_feature("/tmp/destination_folder", preprocessing=AudioFeatureMixin.preprocessing_defaults), bag_feature(preprocessing=BagFeatureMixin.preprocessing_defaults), binary_feature(preprocessing=BinaryFeatureMixin.preprocessing_defaults), category_feature(preprocessing=CategoryFeatureMixin.preprocessing_defaults), date_feature(preprocessing=DateFeatureMixin.preprocessing_defaults), h3_feature(preprocessing=H3FeatureMixin.preprocessing_defaults), image_feature("/tmp/destination_folder", preprocessing=ImageFeatureMixin.preprocessing_defaults), numerical_feature(preprocessing=NumericalFeatureMixin.preprocessing_defaults), sequence_feature(preprocessing=SequenceFeatureMixin.preprocessing_defaults), set_feature(preprocessing=SetFeatureMixin.preprocessing_defaults), text_feature(preprocessing=TextFeatureMixin.preprocessing_defaults), timeseries_feature(preprocessing=TimeseriesFeatureMixin.preprocessing_defaults), vector_feature(preprocessing=VectorFeatureMixin.preprocessing_defaults), ], "output_features": [{"name": "target", "type": "category"}], "training": { "decay": True, "learning_rate": 0.001, "validation_field": "target", "validation_metric": "accuracy", }, } validate_config(config) config = merge_with_defaults(config) validate_config(config)
def test_strip_whitespace_category(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) input_features = [binary_feature()] cat_feat = category_feature(vocab_size=3) output_features = [cat_feat] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) df = pd.read_csv(training_data_csv_path) # prefix with whitespace df[cat_feat[COLUMN]] = df[cat_feat[COLUMN]].apply(lambda s: " " + s) # run preprocessing ludwig_model = LudwigModel(config, backend=backend) train_ds, _, _, metadata = ludwig_model.preprocess(dataset=df) # expect values containing whitespaces to be properly mapped to vocab_size unique values assert len(np.unique( train_ds.dataset[cat_feat[PROC_COLUMN]])) == cat_feat["vocab_size"]
def test_empty_split_error(backend, tmpdir): """Tests that an error is raised if one or more of the splits is empty after preprocessing.""" data_csv_path = os.path.join(tmpdir, "data.csv") out_feat = binary_feature() input_features = [number_feature()] output_features = [out_feat] config = { "input_features": input_features, "output_features": output_features } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) df = pd.read_csv(training_data_csv_path) # Convert all the output features rows to null. Because the default missing value strategy is to drop empty output # rows, this will result in the dataset being empty after preprocessing. df[out_feat[COLUMN]] = None with init_backend(backend): ludwig_model = LudwigModel(config, backend=backend) with pytest.raises(ValueError, match="Dataset is empty following preprocessing"): ludwig_model.preprocess(dataset=df)
def test_merge_with_defaults_early_stop(use_train, use_hyperopt_scheduler): all_input_features = [ binary_feature(), category_feature(), numerical_feature(), text_feature(), ] all_output_features = [ category_feature(), sequence_feature(), vector_feature(), ] # validate config with all features config = { "input_features": all_input_features, "output_features": all_output_features, HYPEROPT: HYPEROPT_CONFIG, } config = copy.deepcopy(config) if use_train: config[TRAINING] = {"batch_size": "42"} if use_hyperopt_scheduler: # hyperopt scheduler cannot be used with early stopping config[HYPEROPT]["sampler"]["scheduler"] = SCHEDULER merged_config = merge_with_defaults(config) expected = -1 if use_hyperopt_scheduler else default_early_stop assert merged_config[TRAINING]["early_stop"] == expected
def test_torchscript_e2e_audio(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) audio_dest_folder = os.path.join(tmpdir, "generated_audio") input_features = [ audio_feature(audio_dest_folder), ] output_features = [ binary_feature(), ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 } } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) # NOTE: audio preprocessing mismatches by very small margins ~O(1e-6) but causes flakiness in e2e test. # Increasing tolerance is a workaround to reduce flakiness for now. # TODO: remove this workaround when audio preprocessing is fixed. validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path, tolerance=1e-6)
def test_merge_with_defaults_early_stop(use_train, use_hyperopt_scheduler): all_input_features = [ binary_feature(), category_feature(), number_feature(), text_feature(), ] all_output_features = [ category_feature(), sequence_feature(), vector_feature(), ] # validate config with all features config = { INPUT_FEATURES: all_input_features, OUTPUT_FEATURES: all_output_features, HYPEROPT: HYPEROPT_CONFIG, } config = copy.deepcopy(config) if use_train: config[TRAINER] = {"batch_size": 42} if use_hyperopt_scheduler: # hyperopt scheduler cannot be used with early stopping config[HYPEROPT][EXECUTOR][SCHEDULER] = SCHEDULER_DICT merged_config = merge_with_defaults(config) expected = -1 if use_hyperopt_scheduler else ECDTrainerConfig().early_stop assert merged_config[TRAINER]["early_stop"] == expected
def test_number_feature_wrong_dtype(csv_filename, tmpdir): """Tests that a number feature with all string values is treated as having missing values by default.""" data_csv_path = os.path.join(tmpdir, csv_filename) num_feat = number_feature() input_features = [num_feat] output_features = [binary_feature()] config = { "input_features": input_features, "output_features": output_features } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) df = pd.read_csv(training_data_csv_path) # convert numbers to random strings def random_string(): letters = string.ascii_lowercase return "".join(random.choice(letters) for _ in range(10)) df[num_feat[COLUMN]] = df[num_feat[COLUMN]].apply( lambda _: random_string()) # run preprocessing backend = LocalTestBackend() ludwig_model = LudwigModel(config, backend=backend) train_ds, val_ds, test_ds, _ = ludwig_model.preprocess(dataset=df) concatenated_df = concatenate_df(train_ds.to_df(), val_ds.to_df(), test_ds.to_df(), backend) # check that train_ds had invalid values replaced with the missing value assert len(concatenated_df) == len(df) assert np.all(concatenated_df[num_feat[PROC_COLUMN]] == 0.0)
def test_binary_feature(enc_encoder): # synthetic binary tensor binary_tensor = torch.randn([BATCH_SIZE, SEQ_SIZE], dtype=torch.float32) # generate binary feature config binary_feature_config = binary_feature( folder='.', encoder=enc_encoder, max_sequence_length=SEQ_SIZE ) # instantiate binary input feature object binary_input_feature = BinaryInputFeature(binary_feature_config) # pass synthetic binary tensor through the input feature encoder_output = binary_input_feature(binary_tensor) # confirm correctness of the the binary encoder output assert isinstance(encoder_output, dict) assert 'encoder_output' in encoder_output assert isinstance(encoder_output['encoder_output'], torch.Tensor) if enc_encoder == 'passthrough': assert encoder_output['encoder_output'].shape \ == (BATCH_SIZE, 1, SEQ_SIZE) else: assert encoder_output['encoder_output'].shape \ == (BATCH_SIZE, DEFAULT_FC_SIZE)
def test_experiment_various_feature_types(csv_filename): input_features = [binary_feature(), bag_feature()] output_features = [set_feature(max_len=3, vocab_size=5)] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_vector_feature_1(csv_filename): input_features = [vector_feature()] output_features = [binary_feature()] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path)
def test_missing_value_prediction(csv_filename): random.seed(1) np.random.seed(1) with tempfile.TemporaryDirectory() as tmpdir: input_features = [ category_feature( vocab_size=2, reduce_input="sum", preprocessing=dict(missing_value_strategy="fill_with_mode")) ] output_features = [binary_feature()] dataset = pd.read_csv( generate_data(input_features, output_features, csv_filename)) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "fc_size": 14 }, } model = LudwigModel(config) _, _, output_dir = model.train(dataset=dataset, output_directory=tmpdir) # Set the input column to None, we should be able to replace the missing value with the mode # from the training set dataset[input_features[0]["name"]] = None model.predict(dataset=dataset) model = LudwigModel.load(os.path.join(output_dir, "model")) model.predict(dataset=dataset)
def run_test_gbm_multiple_outputs(tmpdir, backend_config): """Test that an error is raised when the model is trained with multiple outputs.""" input_features = [number_feature(), category_feature(reduce_output="sum")] output_features = [ category_feature(vocab_size=3), binary_feature(), category_feature(vocab_size=3), ] csv_filename = os.path.join(tmpdir, "training.csv") dataset_filename = generate_data(input_features, output_features, csv_filename, num_examples=100) config = { MODEL_TYPE: "gbm", "input_features": input_features, "output_features": output_features, TRAINER: { "num_boost_round": 2 }, } model = LudwigModel(config, backend=backend_config) with pytest.raises(ValueError, match="Only single task currently supported"): model.train(dataset=dataset_filename, output_directory=tmpdir)
def test_config_trainer_bad_optimizer(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, TRAINER: {}, } validate_config(config) # Test manually set-to-null optimizer vs unspecified: config[TRAINER]["optimizer"] = None with pytest.raises(ValidationError): validate_config(config) assert ECDTrainerConfig.Schema().load({}).optimizer is not None # Test all types in optimizer_registry supported: for key in optimizer_registry.keys(): config[TRAINER]["optimizer"] = {"type": key} validate_config(config) # Test invalid optimizer type: config[TRAINER]["optimizer"] = {"type": 0} with pytest.raises(ValidationError): validate_config(config) config[TRAINER]["optimizer"] = {"type": {}} with pytest.raises(ValidationError): validate_config(config) config[TRAINER]["optimizer"] = {"type": "invalid"} with pytest.raises(ValidationError): validate_config(config)
def test_binary_predictions(tmpdir, backend, distinct_values): input_features = [ category_feature(vocab_size=3), ] feature = binary_feature() output_features = [ feature, ] data_csv_path = generate_data( input_features, output_features, os.path.join(tmpdir, "dataset.csv"), num_examples=100, ) data_df = pd.read_csv(data_csv_path) # Optionally convert bool values to strings, e.g., {'Yes', 'No'} false_value, true_value = distinct_values data_df[feature[NAME]] = data_df[feature[NAME]].map(lambda x: true_value if x else false_value) data_df.to_csv(data_csv_path, index=False) config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 1 } } patch_args = ( "ludwig.features.binary_feature.BinaryOutputFeature.logits", partial(random_binary_logits, num_predict_samples=len(data_df)), ) preds_df, _ = predict_with_backend(tmpdir, config, data_csv_path, backend, patch_args=patch_args) cols = set(preds_df.columns) assert f"{feature[NAME]}_predictions" in cols assert f"{feature[NAME]}_probabilities_{str(false_value)}" in cols assert f"{feature[NAME]}_probabilities_{str(true_value)}" in cols assert f"{feature[NAME]}_probability" in cols for pred, prob_0, prob_1, prob in zip( preds_df[f"{feature[NAME]}_predictions"], preds_df[f"{feature[NAME]}_probabilities_{str(false_value)}"], preds_df[f"{feature[NAME]}_probabilities_{str(true_value)}"], preds_df[f"{feature[NAME]}_probability"], ): assert pred == false_value or pred == true_value if pred == true_value: assert prob_1 == prob else: assert prob_0 == prob assert np.allclose(prob_0, 1 - prob_1)
def test_missing_value_prediction(csv_filename): with tempfile.TemporaryDirectory() as tmpdir: input_features = [ category_feature( vocab_size=2, reduce_input='sum', preprocessing=dict(missing_value_strategy='fill_with_mode')) ] output_features = [binary_feature()] dataset = pd.read_csv( generate_data(input_features, output_features, csv_filename)) config = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, } model = LudwigModel(config) _, _, output_dir = model.train(dataset=dataset, output_directory=tmpdir) # Set the input column to None, we should be able to replace the missing value with the mode # from the training set dataset[input_features[0]['name']] = None model.predict(dataset=dataset) model = LudwigModel.load(os.path.join(output_dir, 'model')) model.predict(dataset=dataset)
def test_missing_values_fill_with_mean(backend, csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) kwargs = {PREPROCESSING: {"missing_value_strategy": FILL_WITH_MEAN}} input_features = [ number_feature(**kwargs), binary_feature(), category_feature(vocab_size=3), ] output_features = [binary_feature()] training_data_csv_path = generate_data(input_features, output_features, data_csv_path) config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}} with init_backend(backend): # run preprocessing ludwig_model = LudwigModel(config, backend=backend) ludwig_model.preprocess(dataset=training_data_csv_path)
def test_experiment_timeseries(csv_filename): input_features = [timeseries_feature()] output_features = [binary_feature()] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) input_features[0]["encoder"] = "transformer" run_experiment(input_features, output_features, dataset=rel_path)
def test_ray_tabular(): input_features = [ sequence_feature(reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum"), numerical_feature(normalization="zscore"), set_feature(), binary_feature(), bag_feature(), vector_feature(), h3_feature(), date_feature(), ] output_features = [ binary_feature(), numerical_feature(normalization="zscore"), ] run_test_parquet(input_features, output_features)
def test_experiment_h3(csv_filename): input_features = [h3_feature()] output_features = [binary_feature()] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in h3_encoder_registry: input_features[0]['encoder'] = encoder run_experiment(input_features, output_features, data_csv=rel_path)