def test_check_n_categories_ser():
    # verifies that check_n_categories generates the expected output
    assert good_result.loc[1, 'n_categories'] == cf.check_n_categories(good['g2']).values
def test_mostly_same_ser():
    # verifies mostly same output matches expectation
    cols = ['mostly_same', 'thresh', 'most_common', 'count', 'prop']
    assert mostly_same_out2.equals(cf.check_mostly_same(good['g2'], thresh=0.4))
def test_check_n_categories_df():
    # verifies that check_n_categories generates the expected output
    assert good_result.equals(cf.check_n_categories(good))
def test_mostly_same_df():
    # verifies mostly same output matches expectation
    assert mostly_same_out1.equals(cf.check_mostly_same(good, thresh=0.4))
def test_validate_categorical_dtype_bad_ser():
    # checks that TypeError is raised when df contains float data
    with pytest.raises(TypeError) as excinfo:
        cf._validate_categorical_dtype(bad_type['b1'])
    # verifies TypeError contains appropriate message
    assert 'should be of type object or int64' in str(excinfo.value)
def test_validate_categorical_dtype_good_ser():
    # verifies good data passes the dtype check
    cf._validate_categorical_dtype(good['g1'])
def test_check_n_categories_no_dropna_ser():
    # verifies that check_n_categories generates the expected output with nulls
    assert nan_result.loc[0, 'n_categories'] == cf.check_n_categories(nan['g1'], dropna=False).values
def test_check_n_categories_no_dropna_df():
    # verifies that check_n_categories generates the expected output with nulls
    assert nan_result.equals(cf.check_n_categories(nan, dropna=False))