def test_parquet(self): with tmpfile(".parquet") as path: df = UntypedDf(sample_data()) df.to_parquet(path) df2 = UntypedDf.read_parquet(path) assert list(df2.index.names) == [None] assert set(df2.columns) == {"abc", "123", "xyz"}
def test_of(self): expected = [[1, 2, 3], [4, 5, 6]] df = UntypedDf.convert(pd.DataFrame(sample_data())) assert df.to_numpy().tolist() == expected df = UntypedDf.of(pd.DataFrame(sample_data())) assert df.to_numpy().tolist() == expected df = UntypedDf.of(sample_data()) assert df.to_numpy().tolist() == expected
def test_concat_no_preserve_attrs(self): df0 = Col1([pd.Series(dict(abc="hippo"))]) df1 = df0.set_attrs(animal="fishies") df2 = df0.set_attrs(animal="hippos") df = UntypedDf.of([df1, df2]) assert df0.attrs == {} assert len(df) == len(df1) + len(df2) > 0 assert df.attrs == {}
def test_read_properties(self): data = r""" [section] # a comment ! another comment k\:e\\y = v:a\\lue """ s = StringIO(data) df = UntypedDf.read_properties(s) assert df.column_names() == ["key", "value"] assert df.values.tolist() == [[r"section.k:e\y", r"v:a\lue"]] data: str = df.to_properties() lines = [s.strip() for s in data.splitlines()] assert "[section]" in lines assert r"k\:e\\y = v:a\\lue" in lines s = StringIO(data) df2 = UntypedDf.read_properties(s) assert df2.values.tolist() == df.values.tolist()
def test_st(self): df = UntypedDf().convert(pd.DataFrame(sample_data())) assert len(df[df["xyz"] == 6]) == 1 assert len(df.st(df["xyz"] == 6)) == 1 assert len(df.st(xyz=6)) == 1 assert len(df.st(df["xyz"] == 6, df["xyz"] == 6)) == 1 assert len(df.st(df["xyz"] == 6, df["xyz"] == 3)) == 0 assert len(df.st(df["xyz"] == 6, xyz=6)) == 1 assert len(df.st(df["xyz"] == 6, xyz=2)) == 0 assert len(df.st(xyz=1, abc=1)) == 0 assert len(df.st(df["xyz"] == 6, df["xyz"] == 1, xyz=6)) == 0
def test_read_ini(self): data = """ [section] ; a comment key = value """ s = StringIO(data) df = UntypedDf.read_ini(s) assert df.column_names() == ["key", "value"] assert df.values.tolist() == [["section.key", "value"]]
def test_read_toml(self): data = """ [[row]] # a comment key = "value" """ s = StringIO(data) df = UntypedDf.read_toml(s) assert df.column_names() == ["key"] assert df.values.tolist() == [["value"]]
def test_read_toml_jagged(self): data = """ [[row]] key = "value1" [[row]] key = "value2" kitten = "elephant" cuteness = 10.3 """ s = StringIO(data) df = UntypedDf.read_toml(s) assert df.column_names() == ["key", "kitten", "cuteness"] xx = df.fillna(0).values.tolist() assert xx == [["value1", 0, 0], ["value2", "elephant", 10.3]]
def test_untyped_read_write_csv(self): with tmpfile(".csv") as path: for indices in [None, "abc", ["abc", "xyz"]]: df = UntypedDf(sample_data()) if indices is not None: df = df.set_index(indices) df.to_csv(path) df2 = UntypedDf.read_csv(path) assert list(df2.index.names) == [None] assert set(df2.columns) == {"abc", "123", "xyz"}
def test_html_empty(self): with tmpfile(".html") as path: path.write_text("<html></html>", encoding="utf8") with pytest.raises(NoValueError): UntypedDf.read_html(path)
def test_set_index(self): df = UntypedDf.convert(pd.DataFrame(sample_data()).set_index("abc")) assert df.set_index([]).index_names() == [] assert df.set_index([], append=True).index_names() == ["abc"] with pytest.raises(UnsupportedOperationError): df.set_index([], inplace=True)
def test_only(self): df = UntypedDf().convert(pd.DataFrame(sample_data_2())) with pytest.raises(ValueError): df.only("multi", exclude_na=True) with pytest.raises(KeyError): df.only("abc") assert df.only("only") == 1 assert df.only("only", exclude_na=True) == 1 assert pd.isna(df.only("none")) with pytest.raises(ValueError): df.only("none", exclude_na=True)
def test_of_concat(self): df1 = UntypedDf.of(pd.DataFrame(sample_data())) df2 = UntypedDf.of(pd.DataFrame(sample_data())) df = UntypedDf.of([df1, df2]) assert len(df) == len(df1) + len(df2) > 0
def test_set_attrs(self): df = UntypedDf.convert(pd.DataFrame(sample_data())) df2 = df.set_attrs(animal="fishies") assert df2.attrs == dict(animal="fishies") assert df.attrs == {}
def test_iter_rc(self): df = UntypedDf.convert(pd.DataFrame(sample_data())) expected = [((0, 0), 1), ((0, 1), 2), ((0, 2), 3), ((1, 0), 4), ((1, 1), 5), ((1, 2), 6)] assert list(df.iter_row_col()) == expected
def test_records(self): df = UntypedDf(sample_data()) records = df.to_records() df2 = UntypedDf.from_records(records) assert isinstance(df2, UntypedDf)
def test_write_passing_index(self): with tmpfile(".csv") as path: df = Trivial(sample_data()) df.to_csv(path, index=["abc"]) # fine df = UntypedDf(sample_data()) df.to_csv(path, index=["abc"]) # calls super immediately
def test_html_invalid(self): with tmpfile(".html") as path: path.write_text("", encoding="utf8") with pytest.raises(XMLSyntaxError): UntypedDf.read_html(path)
def test_pretty(self): assert (UntypedDf()._repr_html_().startswith( "<strong>UntypedDf: 0 rows × 0 columns</strong>"))
def test_pretty(self): assert (UntypedDf()._repr_html_().startswith( "<strong>UntypedDf: 0 rows × 0 columns</strong>")) df = UntypedDf(sample_data()).set_index(["abc", "123"]) assert (UntypedDf(df)._repr_html_().startswith( "<strong>UntypedDf: 2 rows × 1 columns, 2 index columns</strong>"))