def test_clean_lattice_emits_multiple_lines(row): row["extraction_method"] = "lattice" row["data"].append([_get_cell("c1"), _get_cell("c2")]) row["data"].append([_get_cell(""), _get_cell("c4")]) r = bout.clean(row) assert r == [["c1", "c2"], ["", "c4"]]
def test_clean_stream_merge_cell_data(row): # First line doesn't have empty text, c1 and c will be merged row["data"].append([_get_cell("c1"), _get_cell("c2")]) row["data"].append([_get_cell("c"), _get_cell("c4")]) r = bout.clean(row) assert r == [["c1c", "c2c4"]]
def test_clean_ignores_zero_cell_data(row): cell = _get_cell("") cell["width"] = 0.0 row["data"].append([cell, _get_cell("c2")]) r = bout.clean(row) assert r == [["c2"]]
def test_clean_stream_not_merge_cell_data_empty_first_line(row): cell = _get_cell("") cell["width"] = 0.0 row["data"].append([cell, _get_cell("c2")]) row["data"].append([_get_cell("cc2"), _get_cell("c4")]) r = bout.clean(row) assert r == [["cc2", "c2c4"]]
def test_clean_ignores_zero_row_data(row): r = bout.clean(row) assert len(r) == 0