示例#1
0
def test_TableBundle_unique():
    """ Verify that unique() is functioning as expected
    """
    bundle1 = TableBundle(parse_blocks(cell_rows))
    # bundle1 now contains one 'foo' and one 'infs'
    assert len(bundle1) == 2

    with pytest.raises(LookupError):
        tab = bundle1.unique("-not there-")

    tab = bundle1.unique("foo")
    assert tab.name == "foo"

    tab = bundle1.unique("infs")
    assert tab.name == "infs"

    cells2 = []
    cells2.extend(cell_rows)
    cells2.extend([])
    cells2.extend(cell_rows)

    bundle2 = TableBundle(parse_blocks(cells2))
    # bundle2 now contains two 'foo' and two 'infs'
    assert len(bundle2) == 4

    with pytest.raises(LookupError):
        tab = bundle2.unique("-not there-")

    with pytest.raises(LookupError):
        tab = bundle2.unique("foo")

    with pytest.raises(LookupError):
        tab = bundle2.unique("infs")
示例#2
0
def test_TableBundle_iterator():
    """ Verify that iterator is functioning as expected
    """
    bundle = TableBundle(parse_blocks(cell_rows, to="pdtable"))
    count = 0
    seen = {}
    for tab in bundle:
        assert type(tab) is Table
        seen[tab.name] = tab
        count += 1
    assert count == 2
    assert len(seen) == 2
    assert seen["foo"] is not None
    assert seen["infs"] is not None
    """ Verify that we can iterate other types than pdtable
    """
    bundle = TableBundle(parse_blocks(cell_rows, to="cellgrid"))
    count = 0
    for tab in bundle:
        assert type(tab) is list
        assert tab[0][0] in {"**foo", "**infs"}
        count += 1
    assert count == 2
    assert bundle["foo"] is not None
    assert bundle["infs"] is not None

    bundle = TableBundle(parse_blocks(cell_rows, to="jsondata"))
    count = 0
    for tab in bundle:
        assert type(tab) is dict
        assert tab["name"] in {"foo", "infs"}
        count += 1
    assert count == 2
    assert bundle["foo"] is not None
    assert bundle["infs"] is not None
示例#3
0
def test_TableBundle_as_dataframe():
    """ Verify that as_dataframe is functioning as expected (switch TableType)
    """

    # pdtable generator
    bundle = TableBundle(parse_blocks(cell_rows, to="pdtable"),
                         as_dataframe=True)
    assert bundle.infs.file_bytes.values[1] == 15326.0
    assert bundle is not None
    assert len(bundle) == 2
    assert isinstance(bundle[0], TableDataFrame)

    # pdtable generator
    bundle = TableBundle(parse_blocks(cell_rows, to="pdtable"),
                         as_dataframe=False)
    assert bundle.infs["file_bytes"].values[1] == 15326.0
    assert bundle is not None
    assert len(bundle) == 2
    assert isinstance(bundle[1], Table)

    # do not error on other table types
    bundle = TableBundle(parse_blocks(cell_rows, to="cellgrid"),
                         as_dataframe=True)
    assert bundle is not None
    assert isinstance(bundle[0], list)  # cellgrid
示例#4
0
def test_read_write_excel__round_trip_with_styles(tmp_path):
    """Round-trip reading and writing and re-reading preserves tables"""
    from pdtable import TableBundle, read_excel
    bundle = TableBundle(read_excel("pdtable/test/io/input/foo.xlsx"))
    out_path = tmp_path / "foo_styled.xlsx"
    # Doesn't crash on write
    write_excel(bundle, out_path, styles=True)
    # Re-read bundle is same as first one
    bundle2 = TableBundle(read_excel(out_path))
    for t, t2 in zip(bundle, bundle2):
        assert t.equals(t2)
示例#5
0
def test_TableBundle_all():
    """ Verify that all() is functioning as expected
    """
    bundle1 = TableBundle(parse_blocks(cell_rows))
    # bundle1 now contains one 'foo' and one 'infs'
    assert len(bundle1) == 2

    lst = bundle1.all("-not there-")
    assert len(lst) == 0

    lst = bundle1.all("foo")
    assert len(lst) == 1
    for tab in lst:
        assert tab.name == "foo"

    lst = bundle1.all("infs")
    assert len(lst) == 1
    for tab in lst:
        assert tab.name == "infs"

    cells2 = []
    cells2.extend(cell_rows)
    cells2.extend([])
    cells2.extend(cell_rows)

    bundle2 = TableBundle(parse_blocks(cells2))
    # bundle2 now contains two 'foo' and two 'infs'
    assert len(bundle2) == 4

    lst = bundle2.all("-not there-")
    assert len(lst) == 0

    lst = bundle2.all("foo")
    assert len(lst) == 2
    for tab in lst:
        assert tab.name == "foo"

    lst = bundle2.all("infs")
    assert len(lst) == 2
    for tab in lst:
        assert tab.name == "infs"
示例#6
0
def test_parse_blocks():
    cell_rows = [
        line.split(";") for line in dedent("""\
        author: ;XYODA     ;
        purpose:;Save the galaxy

        ***gunk
        grok
        jiggyjag

        **foo
        all
        column;pct;dash;mm;
        text;%;-;mm;
        bar;10;10;10;

        ::;Table foo describes
        ;the fooness of things
        :.column;Column is a column in foo

        **input_files_derived;
        all;
        file_bytes;file_date;has_table;
        -;text;onoff;
        15373;a;0;
        15326;b;1;
        """).strip().split("\n")
    ]

    blocks = list(parse_blocks(cell_rows))

    metadata_blocks = [b for t, b in blocks if t == BlockType.METADATA]
    assert len(metadata_blocks) == 1
    mb = metadata_blocks[0]
    assert mb["author"] == "XYODA"
    assert mb["purpose"] == "Save the galaxy"

    directives = [b for t, b in blocks if t == BlockType.DIRECTIVE]
    assert len(directives) == 1
    d = directives[0]
    assert d.name == "gunk"
    assert d.lines == ["grok", "jiggyjag"]

    tabs = [b for t, b in blocks if t == BlockType.TABLE]
    assert len(tabs) == 2
    t = tabs[0]
    assert t.name == "foo"
    assert t.df["column"].iloc[0] == "bar"

    # Bundle
    table_bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True)
    assert table_bundle.foo.column.values[0] == "bar"
    assert table_bundle.foo.dash.values[0] == 10
示例#7
0
def test_read_csv_compatible1():
    """
      test_read_csv_compatible

      handle '-' in cells
      handle leading and trailing wsp
    """

    # fmt off
    cell_rows = [
        ["**test_input"],
        ["all"],
        ["numerical", "dates", "onoffs"],
        ["-", "datetime", "onoff"],
        [123, "08/07/2020", 0],
        [123, "08-07-2020", 1],
        [123, "08-07-2020", 1],
        [1.23, None, None],
        [1.23, None, None],
        [1.23, None, None],
        [-1.23, None, None],
        [1.23, None, None],
    ]
    # fmt on
    fix = ParseFixer()
    fix.stop_on_errors = False
    fix._called_from_test = True
    table_bundle = TableBundle(parse_blocks(cell_rows, fixer=fix),
                               as_dataframe=True)
    assert table_bundle

    assert not table_bundle.test_input.onoffs[0]
    assert table_bundle.test_input.onoffs[1]
    assert table_bundle.test_input.onoffs[2]
    for idx in range(0, 3):
        assert table_bundle.test_input.dates[idx].year == 2020
        assert table_bundle.test_input.dates[idx].month == 7
        assert table_bundle.test_input.dates[idx].day == 8

    for idx in range(0, 3):
        assert table_bundle.test_input.numerical[idx] == 123

    assert table_bundle.test_input.numerical[3] == 1.23
    assert table_bundle.test_input.numerical[5] == 1.23
    assert table_bundle.test_input.numerical[7] == 1.23
    assert table_bundle.test_input.numerical[6] == -1.23
示例#8
0
def read_bundle_from_csv(
    input_path: Union[str, PathLike, TextIO],
    sep: Optional[str] = ";",
    convert_units_to: TableUnitDispatcher = None,
    unit_converter: UnitConverter = None,
) -> TableBundle:
    """Read single csv-file to TableBundle

    With a demo of bulk unit conversion of all tables at read time.

    unit_converter must accept units of type returned by the TableUnitDispatcher.
    """
    if convert_units_to and not unit_converter:
        raise ValueError("No unit converter supplied.")

    inputs = read_csv(input_path, sep)

    if convert_units_to is not None:
        inputs = normalized_table_generator(inputs, convert_units_to,
                                            unit_converter)

    return TableBundle(inputs)
示例#9
0
def test_read_csv_compatible2():
    """
      test_read_csv_compatible2

      handle leading and trailing wsp in column_name, unit
    """

    cell_rows = [
        line.split(";") for line in dedent(r"""
    **test_input;
    all;
    numerical ; dates; onoffs ;
     - ; datetime;onoff ;
    123;08/07/2020;0;
    """).strip().split("\n")
    ]

    table_bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True)
    assert table_bundle

    assert not table_bundle.test_input.onoffs[0]
    assert table_bundle.test_input.dates[0].year == 2020
    assert table_bundle.test_input.numerical[0] == 123
示例#10
0
def test_bundle_from_csv():

    bundle = TableBundle(parse_blocks(cell_rows), as_dataframe=True)

    assert bundle.foo.column.values[0] == "bar"
示例#11
0
def test_TableBundle_in_operator():
    bundle = TableBundle(parse_blocks(cell_rows))
    assert "foo" in bundle
    assert "qux" not in bundle
示例#12
0
def test_TableBundle_attribute_error():
    bundle = TableBundle([])
    with pytest.raises(AttributeError):
        bundle.invalid_attribute_name
示例#13
0
def test_TableBundle_getitem():
    """ Verify that unique() is functioning as expected
    """
    bundle1 = TableBundle(parse_blocks(cell_rows))
    # bundle1 now contains one 'foo' and one 'infs'
    assert len(bundle1) == 2

    with pytest.raises(LookupError):
        tab = bundle1["-not there-"]

    # verify getitem
    with pytest.raises(TypeError):
        tab = bundle1[bundle1]

    # hashed
    tab = bundle1["foo"]
    assert tab.name == "foo"

    tab = bundle1["infs"]
    assert tab.name == "infs"

    # indexed
    tab = bundle1[0]
    assert tab.name == "foo"

    tab = bundle1[1]
    assert tab.name == "infs"

    with pytest.raises(IndexError):
        tab = bundle1[2]

    cells2 = []
    cells2.extend(cell_rows)
    cells2.extend([])
    cells2.extend(cell_rows)

    bundle2 = TableBundle(parse_blocks(cells2))
    # bundle2 now contains two 'foo' and two 'infs'
    assert len(bundle2) == 4

    with pytest.raises(LookupError):
        tab = bundle2["-not there-"]

    with pytest.raises(LookupError):
        tab = bundle2["foo"]

    with pytest.raises(LookupError):
        tab = bundle2["infs"]

    # indexed
    tab = bundle2[0]
    assert tab.name == "foo"

    tab = bundle2[1]
    assert tab.name == "infs"

    tab = bundle2[2]
    assert tab.name == "foo"

    tab = bundle2[3]
    assert tab.name == "infs"

    with pytest.raises(IndexError):
        tab = bundle2[4]
def test_TableBundle_from_file():
    """ Verify that TableBundle can be generated from top level API methods: read_csv, read_excel
    """
    input_file = input_dir() / "bundle.csv"
    bundle = TableBundle(read_csv(input_file), as_dataframe=True)
    assert bundle is not None
    assert len(bundle) == 3
    assert isinstance(bundle[0], TableDataFrame)

    assert bundle.unique("spelling_numbers").spelling[1] == "six"
    assert bundle[1].spelling[0] == "one"
    assert len(bundle.all("places_to_go")) == 2

    bundle = TableBundle(read_csv(input_file), as_dataframe=False)
    assert bundle is not None
    assert len(bundle) == 3
    assert isinstance(bundle[1], Table)
    assert bundle.spelling_numbers["spelling"].values[0] == "one"
    assert len(bundle.all("places_to_go")) == 2

    input_file = input_dir() / "bundle.xlsx"
    bundle = TableBundle(read_excel(input_file), as_dataframe=False)
    assert bundle is not None
    assert len(bundle) == 3
    assert isinstance(bundle[1], Table)
    assert bundle.spelling_numbers["spelling"].values[0] == "one"
    assert len(bundle.all("places_to_go")) == 2

    bundle = TableBundle(read_excel(input_file), as_dataframe=True)
    assert bundle is not None
    assert len(bundle) == 3
    assert isinstance(bundle[0], TableDataFrame)

    assert bundle.unique("spelling_numbers").spelling[1] == "six"
    assert bundle[1].spelling[0] == "one"
    assert len(bundle.all("places_to_go")) == 2