def test_model_comparison_give_non_null_performance_with_classification(): # Given features = pd.DataFrame( {"numeric_feature": np.random.normal(size=n_samples)}) comparison_dataset = ComparisonDataset(TaskName.CLASSIFICATION, features, categorical_target, cross_validation_n_folds) # When comparison = ModelComparison( comparison_dataset).get_models_scores_and_training_time() # Then for model_name, performance_and_training_time in comparison.items(): performance = performance_and_training_time[MODEL_SCORE] assert_that(~np.isnan(performance), reason=f"Null performance value for model {model_name}")
def test_model_comparison_give_non_null_performance_and_categorical_feature(): # Given features = pd.DataFrame({ "string_feature": np.random.choice(["Paris", "London", "Madrid", "Roma"], n_samples), "numeric_feature": np.random.normal(size=n_samples) }) comparison_dataset = ComparisonDataset(TaskName.REGRESSION, features, categorical_target, cross_validation_n_folds) # When comparison = ModelComparison( comparison_dataset).get_models_scores_and_training_time() # Then for model_name, performance_and_training_time in comparison.items(): performance = performance_and_training_time[MODEL_SCORE] assert_that(~np.isnan(performance), reason=f"Null performance value for model {model_name}")
def test_encode_datetime_columns_as_int(): # Given date_column = "date_column" other_string_column = "other_string_column" other_numeric_column = "other_numeric_column" df = pd.DataFrame({ date_column: ['2017-02-04 18:41:00'], other_numeric_column: [1], other_string_column: ["something"] }) # When parsed_df = ModelComparison._encode_date_columns_as_int(df, 1) # Then parsed_df_dtypes = parsed_df.dtypes df_dtypes = df.dtypes assert_that(parsed_df_dtypes[date_column], is_("float")) assert_that(parsed_df_dtypes[other_string_column], is_(df_dtypes[other_string_column])) assert_that(parsed_df_dtypes[other_numeric_column], is_(df_dtypes[other_numeric_column]))