示例#1
0
def test_categorize_no_cat(generate_data_no_cat):
    assert categorize(pd.DataFrame()) == {
        'numeric': [],
        'categorical': []
    }, "Empty\
                                           dataframe should return dictionary \
                                           with keys but empty list values"

    assert categorize(generate_data_no_cat) == {
        'numeric': [],
        'categorical': []
    }, "Dataframe\
示例#2
0
def run_pylaundry():
    """
    Runs all modules of pylaundry
    Arguments
    --------
    NA
    Returns
    ------
    features_selected = list of final features selected
    """
    col_dict = categorize(df=X_train)
    # second function - fill_missing
    clean_data = fill_missing(X_train,
                              X_test,
                              col_dict,
                              num_imp="mean",
                              cat_imp="mode")
    # third function - transform_columns
    transformed_data = transform_columns(clean_data['X_train'],
                                         clean_data['X_test'], col_dict)
    # fourth function - feature selection
    features_selected = select_features(transformed_data['X_train'],
                                        y_train,
                                        n_features=2)
    return features_selected
示例#3
0
def test_output_type(generate_data):
    output = categorize(generate_data)
    assert isinstance(output, dict), \
        "Output of categorize() should be a dictionary"
    assert isinstance(output['numeric'], list), \
        "Dictionary value should be list"
    assert isinstance(output['categorical'], list), \
        "Dictionary value should be list"
示例#4
0
def test_categorize_bad_input(generate_data):
    try:
        categorize('hello!')
    except AssertionError:
        pass
    try:
        categorize(np.ones((3, 3)))
    except AssertionError:
        pass
    try:
        categorize(generate_data, max_cat=1.3)
    except AssertionError:
        pass
    try:
        categorize(generate_data, max_cat=-1)
    except AssertionError:
        pass
    try:
        categorize(pd.DataFrame(([1, 2, 3, 4])))
    except AssertionError:
        pass
示例#5
0
def test_categorize_max_cat(generate_data):
    output = categorize(generate_data, max_cat=3)
    assert output['categorical'] == ['cat2'], "Only cat2 of generate_data()\
        should be categorical with max_cat = 3"
    assert set(output['numeric']) == set(['num1', 'num2', 'num3', 'cat1']),\
        "Cat 1 should be marked numeric with max_cat = 3"
示例#6
0
def test_categorize_numeric(generate_data):
    output = categorize(generate_data)
    assert set(output['numeric']) == set(['num1', 'num2', 'num3']), "num1, \
示例#7
0
def test_categorize_float_dtype(generate_data):
    df = pd.DataFrame({'col': [1.1, 1.1, 2.2, 2.2]})
    output = categorize(df)
    assert output['numeric'] == ['col'], "A column with dtype \
示例#8
0
def test_categorize_cat_dtype(generate_data):
    df = pd.DataFrame({'col': [1.1, 2.2, 3.3, 4.4]})
    df['col'] = df['col'].astype('category')
    output = categorize(df, max_cat=2)
    assert output['categorical'] == ['col'], "A column with \
示例#9
0
def test_categorize_categorical(generate_data):
    output = categorize(generate_data)
    assert set(output['categorical']) == set(['cat1', 'cat2', 'cat3']), "cat1,\