def test_categorize_no_cat(generate_data_no_cat): assert categorize(pd.DataFrame()) == { 'numeric': [], 'categorical': [] }, "Empty\ dataframe should return dictionary \ with keys but empty list values" assert categorize(generate_data_no_cat) == { 'numeric': [], 'categorical': [] }, "Dataframe\
def run_pylaundry(): """ Runs all modules of pylaundry Arguments -------- NA Returns ------ features_selected = list of final features selected """ col_dict = categorize(df=X_train) # second function - fill_missing clean_data = fill_missing(X_train, X_test, col_dict, num_imp="mean", cat_imp="mode") # third function - transform_columns transformed_data = transform_columns(clean_data['X_train'], clean_data['X_test'], col_dict) # fourth function - feature selection features_selected = select_features(transformed_data['X_train'], y_train, n_features=2) return features_selected
def test_output_type(generate_data): output = categorize(generate_data) assert isinstance(output, dict), \ "Output of categorize() should be a dictionary" assert isinstance(output['numeric'], list), \ "Dictionary value should be list" assert isinstance(output['categorical'], list), \ "Dictionary value should be list"
def test_categorize_bad_input(generate_data): try: categorize('hello!') except AssertionError: pass try: categorize(np.ones((3, 3))) except AssertionError: pass try: categorize(generate_data, max_cat=1.3) except AssertionError: pass try: categorize(generate_data, max_cat=-1) except AssertionError: pass try: categorize(pd.DataFrame(([1, 2, 3, 4]))) except AssertionError: pass
def test_categorize_max_cat(generate_data): output = categorize(generate_data, max_cat=3) assert output['categorical'] == ['cat2'], "Only cat2 of generate_data()\ should be categorical with max_cat = 3" assert set(output['numeric']) == set(['num1', 'num2', 'num3', 'cat1']),\ "Cat 1 should be marked numeric with max_cat = 3"
def test_categorize_numeric(generate_data): output = categorize(generate_data) assert set(output['numeric']) == set(['num1', 'num2', 'num3']), "num1, \
def test_categorize_float_dtype(generate_data): df = pd.DataFrame({'col': [1.1, 1.1, 2.2, 2.2]}) output = categorize(df) assert output['numeric'] == ['col'], "A column with dtype \
def test_categorize_cat_dtype(generate_data): df = pd.DataFrame({'col': [1.1, 2.2, 3.3, 4.4]}) df['col'] = df['col'].astype('category') output = categorize(df, max_cat=2) assert output['categorical'] == ['col'], "A column with \
def test_categorize_categorical(generate_data): output = categorize(generate_data) assert set(output['categorical']) == set(['cat1', 'cat2', 'cat3']), "cat1,\