def test_TableBundle_unique(): """ Verify that unique() is functioning as expected """ bundle1 = TableBundle(parse_blocks(cell_rows)) # bundle1 now contains one 'foo' and one 'infs' assert len(bundle1) == 2 with pytest.raises(LookupError): tab = bundle1.unique("-not there-") tab = bundle1.unique("foo") assert tab.name == "foo" tab = bundle1.unique("infs") assert tab.name == "infs" cells2 = [] cells2.extend(cell_rows) cells2.extend([]) cells2.extend(cell_rows) bundle2 = TableBundle(parse_blocks(cells2)) # bundle2 now contains two 'foo' and two 'infs' assert len(bundle2) == 4 with pytest.raises(LookupError): tab = bundle2.unique("-not there-") with pytest.raises(LookupError): tab = bundle2.unique("foo") with pytest.raises(LookupError): tab = bundle2.unique("infs")
def test_TableBundle_iterator(): """ Verify that iterator is functioning as expected """ bundle = TableBundle(parse_blocks(cell_rows, to="pdtable")) count = 0 seen = {} for tab in bundle: assert type(tab) is Table seen[tab.name] = tab count += 1 assert count == 2 assert len(seen) == 2 assert seen["foo"] is not None assert seen["infs"] is not None """ Verify that we can iterate other types than pdtable """ bundle = TableBundle(parse_blocks(cell_rows, to="cellgrid")) count = 0 for tab in bundle: assert type(tab) is list assert tab[0][0] in {"**foo", "**infs"} count += 1 assert count == 2 assert bundle["foo"] is not None assert bundle["infs"] is not None bundle = TableBundle(parse_blocks(cell_rows, to="jsondata")) count = 0 for tab in bundle: assert type(tab) is dict assert tab["name"] in {"foo", "infs"} count += 1 assert count == 2 assert bundle["foo"] is not None assert bundle["infs"] is not None
def test_TableBundle_as_dataframe(): """ Verify that as_dataframe is functioning as expected (switch TableType) """ # pdtable generator bundle = TableBundle(parse_blocks(cell_rows, to="pdtable"), as_dataframe=True) assert bundle.infs.file_bytes.values[1] == 15326.0 assert bundle is not None assert len(bundle) == 2 assert isinstance(bundle[0], TableDataFrame) # pdtable generator bundle = TableBundle(parse_blocks(cell_rows, to="pdtable"), as_dataframe=False) assert bundle.infs["file_bytes"].values[1] == 15326.0 assert bundle is not None assert len(bundle) == 2 assert isinstance(bundle[1], Table) # do not error on other table types bundle = TableBundle(parse_blocks(cell_rows, to="cellgrid"), as_dataframe=True) assert bundle is not None assert isinstance(bundle[0], list) # cellgrid
def test_read_write_excel__round_trip_with_styles(tmp_path): """Round-trip reading and writing and re-reading preserves tables""" from pdtable import TableBundle, read_excel bundle = TableBundle(read_excel("pdtable/test/io/input/foo.xlsx")) out_path = tmp_path / "foo_styled.xlsx" # Doesn't crash on write write_excel(bundle, out_path, styles=True) # Re-read bundle is same as first one bundle2 = TableBundle(read_excel(out_path)) for t, t2 in zip(bundle, bundle2): assert t.equals(t2)
def test_TableBundle_all(): """ Verify that all() is functioning as expected """ bundle1 = TableBundle(parse_blocks(cell_rows)) # bundle1 now contains one 'foo' and one 'infs' assert len(bundle1) == 2 lst = bundle1.all("-not there-") assert len(lst) == 0 lst = bundle1.all("foo") assert len(lst) == 1 for tab in lst: assert tab.name == "foo" lst = bundle1.all("infs") assert len(lst) == 1 for tab in lst: assert tab.name == "infs" cells2 = [] cells2.extend(cell_rows) cells2.extend([]) cells2.extend(cell_rows) bundle2 = TableBundle(parse_blocks(cells2)) # bundle2 now contains two 'foo' and two 'infs' assert len(bundle2) == 4 lst = bundle2.all("-not there-") assert len(lst) == 0 lst = bundle2.all("foo") assert len(lst) == 2 for tab in lst: assert tab.name == "foo" lst = bundle2.all("infs") assert len(lst) == 2 for tab in lst: assert tab.name == "infs"
def test_parse_blocks(): cell_rows = [ line.split(";") for line in dedent("""\ author: ;XYODA ; purpose:;Save the galaxy ***gunk grok jiggyjag **foo all column;pct;dash;mm; text;%;-;mm; bar;10;10;10; ::;Table foo describes ;the fooness of things :.column;Column is a column in foo **input_files_derived; all; file_bytes;file_date;has_table; -;text;onoff; 15373;a;0; 15326;b;1; """).strip().split("\n") ] blocks = list(parse_blocks(cell_rows)) metadata_blocks = [b for t, b in blocks if t == BlockType.METADATA] assert len(metadata_blocks) == 1 mb = metadata_blocks[0] assert mb["author"] == "XYODA" assert mb["purpose"] == "Save the galaxy" directives = [b for t, b in blocks if t == BlockType.DIRECTIVE] assert len(directives) == 1 d = directives[0] assert d.name == "gunk" assert d.lines == ["grok", "jiggyjag"] tabs = [b for t, b in blocks if t == BlockType.TABLE] assert len(tabs) == 2 t = tabs[0] assert t.name == "foo" assert t.df["column"].iloc[0] == "bar" # Bundle table_bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True) assert table_bundle.foo.column.values[0] == "bar" assert table_bundle.foo.dash.values[0] == 10
def test_TableBundle_from_file(): """ Verify that TableBundle can be generated from top level API methods: read_csv, read_excel """ input_file = input_dir() / "bundle.csv" bundle = TableBundle(read_csv(input_file), as_dataframe=True) assert bundle is not None assert len(bundle) == 3 assert isinstance(bundle[0], TableDataFrame) assert bundle.unique("spelling_numbers").spelling[1] == "six" assert bundle[1].spelling[0] == "one" assert len(bundle.all("places_to_go")) == 2 bundle = TableBundle(read_csv(input_file), as_dataframe=False) assert bundle is not None assert len(bundle) == 3 assert isinstance(bundle[1], Table) assert bundle.spelling_numbers["spelling"].values[0] == "one" assert len(bundle.all("places_to_go")) == 2 input_file = input_dir() / "bundle.xlsx" bundle = TableBundle(read_excel(input_file), as_dataframe=False) assert bundle is not None assert len(bundle) == 3 assert isinstance(bundle[1], Table) assert bundle.spelling_numbers["spelling"].values[0] == "one" assert len(bundle.all("places_to_go")) == 2 bundle = TableBundle(read_excel(input_file), as_dataframe=True) assert bundle is not None assert len(bundle) == 3 assert isinstance(bundle[0], TableDataFrame) assert bundle.unique("spelling_numbers").spelling[1] == "six" assert bundle[1].spelling[0] == "one" assert len(bundle.all("places_to_go")) == 2
def test_read_csv_compatible1(): """ test_read_csv_compatible handle '-' in cells handle leading and trailing wsp """ # fmt off cell_rows = [ ["**test_input"], ["all"], ["numerical", "dates", "onoffs"], ["-", "datetime", "onoff"], [123, "08/07/2020", 0], [123, "08-07-2020", 1], [123, "08-07-2020", 1], [1.23, None, None], [1.23, None, None], [1.23, None, None], [-1.23, None, None], [1.23, None, None], ] # fmt on fix = ParseFixer() fix.stop_on_errors = False fix._called_from_test = True table_bundle = TableBundle(parse_blocks(cell_rows, fixer=fix), as_dataframe=True) assert table_bundle assert not table_bundle.test_input.onoffs[0] assert table_bundle.test_input.onoffs[1] assert table_bundle.test_input.onoffs[2] for idx in range(0, 3): assert table_bundle.test_input.dates[idx].year == 2020 assert table_bundle.test_input.dates[idx].month == 7 assert table_bundle.test_input.dates[idx].day == 8 for idx in range(0, 3): assert table_bundle.test_input.numerical[idx] == 123 assert table_bundle.test_input.numerical[3] == 1.23 assert table_bundle.test_input.numerical[5] == 1.23 assert table_bundle.test_input.numerical[7] == 1.23 assert table_bundle.test_input.numerical[6] == -1.23
def read_bundle_from_csv( input_path: Union[str, PathLike, TextIO], sep: Optional[str] = ";", convert_units_to: TableUnitDispatcher = None, unit_converter: UnitConverter = None, ) -> TableBundle: """Read single csv-file to TableBundle With a demo of bulk unit conversion of all tables at read time. unit_converter must accept units of type returned by the TableUnitDispatcher. """ if convert_units_to and not unit_converter: raise ValueError("No unit converter supplied.") inputs = read_csv(input_path, sep) if convert_units_to is not None: inputs = normalized_table_generator(inputs, convert_units_to, unit_converter) return TableBundle(inputs)
def test_read_csv_compatible2(): """ test_read_csv_compatible2 handle leading and trailing wsp in column_name, unit """ cell_rows = [ line.split(";") for line in dedent(r""" **test_input; all; numerical ; dates; onoffs ; - ; datetime;onoff ; 123;08/07/2020;0; """).strip().split("\n") ] table_bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True) assert table_bundle assert not table_bundle.test_input.onoffs[0] assert table_bundle.test_input.dates[0].year == 2020 assert table_bundle.test_input.numerical[0] == 123
def test_bundle_from_csv(): bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True) assert bundle.foo.column.values[0] == "bar"
def test_TableBundle_in_operator(): bundle = TableBundle(parse_blocks(cell_rows)) assert "foo" in bundle assert "qux" not in bundle
def test_TableBundle_attribute_error(): bundle = TableBundle([]) with pytest.raises(AttributeError): bundle.invalid_attribute_name
def test_TableBundle_getitem(): """ Verify that unique() is functioning as expected """ bundle1 = TableBundle(parse_blocks(cell_rows)) # bundle1 now contains one 'foo' and one 'infs' assert len(bundle1) == 2 with pytest.raises(LookupError): tab = bundle1["-not there-"] # verify getitem with pytest.raises(TypeError): tab = bundle1[bundle1] # hashed tab = bundle1["foo"] assert tab.name == "foo" tab = bundle1["infs"] assert tab.name == "infs" # indexed tab = bundle1[0] assert tab.name == "foo" tab = bundle1[1] assert tab.name == "infs" with pytest.raises(IndexError): tab = bundle1[2] cells2 = [] cells2.extend(cell_rows) cells2.extend([]) cells2.extend(cell_rows) bundle2 = TableBundle(parse_blocks(cells2)) # bundle2 now contains two 'foo' and two 'infs' assert len(bundle2) == 4 with pytest.raises(LookupError): tab = bundle2["-not there-"] with pytest.raises(LookupError): tab = bundle2["foo"] with pytest.raises(LookupError): tab = bundle2["infs"] # indexed tab = bundle2[0] assert tab.name == "foo" tab = bundle2[1] assert tab.name == "infs" tab = bundle2[2] assert tab.name == "foo" tab = bundle2[3] assert tab.name == "infs" with pytest.raises(IndexError): tab = bundle2[4]