def test_parse_categories(): data = convert_csv_pd(""" str1,str2 a1,a1 2,2 3,3 4,4 5,5 6,6 7,7 8,8 9,9 10,10 11,11 12,12 13,13 14,14 15,15 16,16 17,17 18,18 19,19 20,20 21,20 """) parse_categories(data) _check_categories_parsed(data, ["str2"])
def test_complex_df_report(): """Test our dataframe importing with types of DFs user's upload""" tz_df = convert_csv_pd(""" date,datetime,datetime_tz 2017-01-10,2017-01-21T23:10:24,2020-03-23T00:00:00.000Z 2017-01-11,2017-01-23T23:01:24,2020-04-23T00:00:00.000Z """) raw_data = { "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"], "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"], "age": [42, 52, 36, 24, 73], "preTestScore": [4, 24, 31, 2, 3], "postTestScore": [25, 94, 57, 62, 70], } index_df = pd.DataFrame(raw_data, columns=[ "first_name", "last_name", "age", "preTestScore", "postTestScore" ]) df_desc = index_df.describe() df_desc_2 = df_desc.reset_index() tz_t = dp.Table(tz_df) index_t = dp.Table(index_df) df_desc_t = dp.Table(df_desc) df_desc_2_t = dp.Table(df_desc_2) with deletable(dp.Report(tz_t, index_t, df_desc_t, df_desc_2_t)) as dp_report: dp_report.publish(name=gen_name())
def test_parse_timedelta_regular(): data = convert_csv_pd(""" timedelta_col,float_col,string_col "1 day, 4:08:25.159814",0.23,bla1 "1 day, 18:34:16.196687",0.23,bla1 """) parse_timedelta(data) _check_timedelta_parsed(data, ["timedelta_col"])
def test_parse_dates_wrong_date(): data = convert_csv_pd(""" data_col1,data_col2,float_col,string_col 2017-01-10,2017-01-21T23:10:24,0.23,bla1 2017-01-1s,2017-01-23T23:01:24,0.23,bla1 """) parse_dates(data) _check_dates_parsed(data, ["data_col2"])
def test_to_str_timedelta(): data = convert_csv_pd(""" timedelta_col,float_col,string_col "1 day, 4:08:25.159814",0.23,bla1 "1 day, 18:34:16.196687",0.23,bla1 """) parse_timedelta(data) to_str(data) _check_is_object(data, ["timedelta_col"])
def test_parse_dates_nulls(): # NOTE - using just Z, not .000Z means pandas doesn't infer timezone, so naive datetime object data = convert_csv_pd(""" timestamp,value 2019-02-19T03:00:02Z,1 ,2 2019-01-10T12:10:10Z,3 """) parse_dates(data) _check_dates_parsed(data, ["timestamp"])
def test_parse_timedelta_with_assertion_error(): data = convert_csv_pd(""" timedelta_col1,timedelta_col2,float_col,string_col 4:08:25.159814,"1 day, 4:08:25.159814",0.23,bla1 18:34:16.196687,"1 day, 18:34:16.196687",0.23,bla1 """) parse_timedelta(data) # timedelta_col1's dtype is a datetime64[ns] _check_dates_parsed(data, ["timedelta_col1"]) with pytest.raises(AssertionError): _check_timedelta_parsed(data, ["timedelta_col1", "timedelta_col2"])