示例#1
0
def test_parse_categories():
    data = convert_csv_pd("""
        str1,str2
        a1,a1
        2,2
        3,3
        4,4
        5,5
        6,6
        7,7
        8,8
        9,9
        10,10
        11,11
        12,12
        13,13
        14,14
        15,15
        16,16
        17,17
        18,18
        19,19
        20,20
        21,20
        """)
    parse_categories(data)
    _check_categories_parsed(data, ["str2"])
示例#2
0
def test_complex_df_report():
    """Test our dataframe importing with types of DFs user's upload"""
    tz_df = convert_csv_pd("""
        date,datetime,datetime_tz
        2017-01-10,2017-01-21T23:10:24,2020-03-23T00:00:00.000Z
        2017-01-11,2017-01-23T23:01:24,2020-04-23T00:00:00.000Z
    """)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "preTestScore": [4, 24, 31, 2, 3],
        "postTestScore": [25, 94, 57, 62, 70],
    }
    index_df = pd.DataFrame(raw_data,
                            columns=[
                                "first_name", "last_name", "age",
                                "preTestScore", "postTestScore"
                            ])
    df_desc = index_df.describe()
    df_desc_2 = df_desc.reset_index()

    tz_t = dp.Table(tz_df)
    index_t = dp.Table(index_df)
    df_desc_t = dp.Table(df_desc)
    df_desc_2_t = dp.Table(df_desc_2)

    with deletable(dp.Report(tz_t, index_t, df_desc_t,
                             df_desc_2_t)) as dp_report:
        dp_report.publish(name=gen_name())
示例#3
0
def test_parse_timedelta_regular():
    data = convert_csv_pd("""
        timedelta_col,float_col,string_col
        "1 day, 4:08:25.159814",0.23,bla1
        "1 day, 18:34:16.196687",0.23,bla1
        """)
    parse_timedelta(data)
    _check_timedelta_parsed(data, ["timedelta_col"])
示例#4
0
def test_parse_dates_wrong_date():
    data = convert_csv_pd("""
        data_col1,data_col2,float_col,string_col
        2017-01-10,2017-01-21T23:10:24,0.23,bla1
        2017-01-1s,2017-01-23T23:01:24,0.23,bla1
        """)
    parse_dates(data)
    _check_dates_parsed(data, ["data_col2"])
示例#5
0
def test_to_str_timedelta():
    data = convert_csv_pd("""
        timedelta_col,float_col,string_col
        "1 day, 4:08:25.159814",0.23,bla1
        "1 day, 18:34:16.196687",0.23,bla1
        """)
    parse_timedelta(data)
    to_str(data)
    _check_is_object(data, ["timedelta_col"])
示例#6
0
def test_parse_dates_nulls():
    # NOTE - using just Z, not .000Z means pandas doesn't infer timezone, so naive datetime object
    data = convert_csv_pd("""
        timestamp,value
        2019-02-19T03:00:02Z,1
        ,2
        2019-01-10T12:10:10Z,3
        """)
    parse_dates(data)
    _check_dates_parsed(data, ["timestamp"])
示例#7
0
def test_parse_timedelta_with_assertion_error():
    data = convert_csv_pd("""
        timedelta_col1,timedelta_col2,float_col,string_col
        4:08:25.159814,"1 day, 4:08:25.159814",0.23,bla1
        18:34:16.196687,"1 day, 18:34:16.196687",0.23,bla1
        """)
    parse_timedelta(data)
    # timedelta_col1's dtype is a datetime64[ns]
    _check_dates_parsed(data, ["timedelta_col1"])
    with pytest.raises(AssertionError):
        _check_timedelta_parsed(data, ["timedelta_col1", "timedelta_col2"])