Пример #1
0
def test_ignore_identifier_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert list(check.ignore_identifier(df).columns) == [
        'population', 'nonwhite', 'density', 'crime'
    ]
Пример #2
0
def test_remove_columns_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert list(check.remove_columns(df).columns) == [
        'Location', 'population', 'nonwhite', 'density', 'crime'
    ]
Пример #3
0
def test_remove_records():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    df1 = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data',
                     'Freedman_remove_records.csv'))
    check = Check()
    assert check.remove_records(df).equals(df1) == True
Пример #4
0
def test_ignore_identifier_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'vgsales.csv'))
    df1 = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data',
                     'vgsales_ignore_identifier.csv'))
    check = Check()
    assert check.ignore_identifier(df).equals(df1) == False
Пример #5
0
def test_ignore_identifier_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    assert list(check.ignore_identifier(df).columns) == [
        'genus', 'vore', 'order', 'conservation', 'sleep_total', 'sleep_rem',
        'sleep_cycle', 'awake', 'brainwt', 'bodywt'
    ]
Пример #6
0
def test_encoding_categorical_2():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'student.csv'))
    assert check.encoding_categorical(df['sex']) == ([0, 0, 0, 0, 0, 1, 1,
                                                      0], {
                                                          0: 'F',
                                                          1: 'F'
                                                      })
Пример #7
0
def test_encoding_categorical_1():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'cereal.csv'))
    assert check.encoding_categorical(df['mfr']) == ([1, 2, 0, 0, 3, 1], {
        1: 'Q',
        2: 'K',
        0: 'N',
        3: 'K'
    })
Пример #8
0
def test_is_categorical():
    """
    Check if the given dataset given a columns
    is categorical or not. 
    
    :raises     AssertionError:  { exception_description }
    """
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_categorical(df['Location']) == True
Пример #9
0
def test_encoding_categorical_3():
    check = Check()
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'student.csv'))
    assert check.encoding_categorical(df['Mjob']) == ([0, 0, 0, 1, 2, 3, 2,
                                                       2], {
                                                           0: 'at_home',
                                                           1: 'at_home',
                                                           2: 'at_home',
                                                           3: 'health'
                                                       })
Пример #10
0
def test_is_outlier_2():
    samples = [
        322, 322, 336, 345, 351, 370, 390, 404, 409, 411, 436, 437, -7,
        80000000, 789654123, 0
    ]
    x = pd.Series(samples)
    assert Check.is_outlier(x, 5) == []
Пример #11
0
def test_is_outlier_6():
    samples = [
        30, 171, 184, 201, 212, 250, 265, 270, 272, 289, 305, 306, 100000, 8,
        5, 2000
    ]
    x = pd.Series(samples)
    assert Check.is_outlier(x, 5) == []
Пример #12
0
def test_percentage_missing():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'vgsales.csv'))
    check = Check()
    assert check.percentage_missing(df) == {
        'Rank': 0.0,
        'Name': 0.0,
        'Platform': 0.0,
        'Year': 1.63,
        'Genre': 0.0,
        'Publisher': 0.35,
        'NA_Sales': 0.0,
        'EU_Sales': 0.0,
        'JP_Sales': 0.0,
        'Other_Sales': 0.0,
        'Global_Sales': 0.0
    }
Пример #13
0
def test_is_discrete_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_discrete(df['crime']) == True
Пример #14
0
def test_is_continuous_2():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_continuous(df['population']) == True
Пример #15
0
def test_remove_records_2():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    assert len(check.remove_records(df)) == 61
Пример #16
0
def test_ignore_identifier_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check = Check()
    print(check.ignore_identifier(df))
Пример #17
0
def test_remove_records_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert len(check.remove_records(df)) == 110
Пример #18
0
def test_is_outlier():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    print(check.is_outlier(df['crime'], 3))
Пример #19
0
def test_is_outlier_4():
    samples = [4551, 7875, 931, 1322, 7795, 22005, 78, 95, 9874, 12365]
    x = pd.Series(samples)
    assert Check.is_outlier(x, 2) == [22005]
Пример #20
0
def test_percentage_missing_2():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    check=Check()
    assert check.percentage_missing(df) == {'name': 0.0, 'genus': 0.0, 'vore': 8.43, 'order': 0.0, 'conservation': 34.94, 'sleep_total': 0.0, 'sleep_rem': 26.51, 'sleep_cycle': 61.45, 'awake': 0.0, 'brainwt': 32.53, 'bodywt': 0.0}
Пример #21
0
def test_is_missing_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_missing(df['nonwhite']) != True
Пример #22
0
def test_is_discrete_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_discrete(df['Location']) == False
Пример #23
0
def test_is_working():
    check = Check()
    print(check.is_working())
Пример #24
0
def test_is_identifier_3():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_identifier(df['nonwhite']) == False
Пример #25
0
def test_is_categorical_1():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_categorical(df['Location']) == True
Пример #26
0
def test_is_missing_4():
    df = pd.read_csv(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check = Check()
    assert check.is_missing(df['density']) == True
Пример #27
0
def test_percentage_missing_1():
    df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    check=Check()
    assert check.percentage_missing(df) == {'Location': 0.0, 'population': 9.09, 'nonwhite': 0.0, 'density': 9.09, 'crime': 0.0}