示例#1
0
def test_remove_columns_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert list(check.remove_columns(df).columns) == [
        'Location', 'population', 'nonwhite', 'density', 'crime'
    ]
示例#2
0
def test_ignore_identifier_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert list(check.ignore_identifier(df).columns) == [
        'population', 'nonwhite', 'density', 'crime'
    ]
示例#3
0
def test_ignore_identifier_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    assert list(check.ignore_identifier(df).columns) == [
        'genus', 'vore', 'order', 'conservation', 'sleep_total', 'sleep_rem',
        'sleep_cycle', 'awake', 'brainwt', 'bodywt'
    ]
示例#4
0
def test_remove_records():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    df1 = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data',
                     'Freedman_remove_records.csv'))
    check = Check()
    assert check.remove_records(df).equals(df1) == True
示例#5
0
def test_ignore_identifier_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'vgsales.csv'))
    df1 = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data',
                     'vgsales_ignore_identifier.csv'))
    check = Check()
    assert check.ignore_identifier(df).equals(df1) == False
示例#6
0
def test_encoding_categorical_2():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'student.csv'))
    assert check.encoding_categorical(df['sex']) == ([0, 0, 0, 0, 0, 1, 1,
                                                      0], {
                                                          0: 'F',
                                                          1: 'F'
                                                      })
示例#7
0
def test_encoding_categorical_1():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'cereal.csv'))
    assert check.encoding_categorical(df['mfr']) == ([1, 2, 0, 0, 3, 1], {
        1: 'Q',
        2: 'K',
        0: 'N',
        3: 'K'
    })
示例#8
0
def test_encoding_categorical_3():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'student.csv'))
    assert check.encoding_categorical(df['Mjob']) == ([0, 0, 0, 1, 2, 3, 2,
                                                       2], {
                                                           0: 'at_home',
                                                           1: 'at_home',
                                                           2: 'at_home',
                                                           3: 'health'
                                                       })
示例#9
0
def test_is_categorical():
    """
    Check if the given dataset given a columns
    is categorical or not. 
    
    :raises     AssertionError:  { exception_description }
    """
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_categorical(df['Location']) == True
示例#10
0
def test_percentage_missing():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'vgsales.csv'))
    check = Check()
    assert check.percentage_missing(df) == {
        'Rank': 0.0,
        'Name': 0.0,
        'Platform': 0.0,
        'Year': 1.63,
        'Genre': 0.0,
        'Publisher': 0.35,
        'NA_Sales': 0.0,
        'EU_Sales': 0.0,
        'JP_Sales': 0.0,
        'Other_Sales': 0.0,
        'Global_Sales': 0.0
    }
示例#11
0
def test_remove_records_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert len(check.remove_records(df)) == 110
示例#12
0
def test_remove_records_2():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    assert len(check.remove_records(df)) == 61
示例#13
0
def test_percentage_missing_2():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check=Check()
    assert check.percentage_missing(df) == {'name': 0.0, 'genus': 0.0, 'vore': 8.43, 'order': 0.0, 'conservation': 34.94, 'sleep_total': 0.0, 'sleep_rem': 26.51, 'sleep_cycle': 61.45, 'awake': 0.0, 'brainwt': 32.53, 'bodywt': 0.0}
示例#14
0
def test_is_discrete_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_discrete(df['Location']) == False
示例#15
0
def test_is_working():
    check = Check()
    print(check.is_working())
示例#16
0
def test_is_outlier():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    print(check.is_outlier(df['crime'], 3))
示例#17
0
def test_ignore_identifier_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    print(check.ignore_identifier(df))
示例#18
0
def test_is_continuous_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_continuous(df['population']) == True
示例#19
0
def test_is_discrete_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_discrete(df['crime']) == True
示例#20
0
def test_is_missing_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_missing(df['nonwhite']) != True
示例#21
0
def test_is_identifier_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_identifier(df['nonwhite']) == False
示例#22
0
def test_is_categorical_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_categorical(df['Location']) == True
示例#23
0
def test_is_missing_4():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_missing(df['density']) == True
示例#24
0
def test_percentage_missing_1():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check=Check()
    assert check.percentage_missing(df) == {'Location': 0.0, 'population': 9.09, 'nonwhite': 0.0, 'density': 9.09, 'crime': 0.0}