def test_xlsx_parser_adjust_floating_point_error_default(): source = "data/adjust-floating-point-error.xlsx" dialect = ExcelDialect(preserve_formatting=True) layout = Layout(skip_fields=["<blank>"]) with pytest.warns(UserWarning): with Resource(source, dialect=dialect, layout=layout) as resource: assert resource.read_rows()[1].cells[2] == 274.65999999999997
def test_xlsx_parser_workbook_cache(): source = BASEURL % "data/sheets.xlsx" for sheet in ["Sheet1", "Sheet2", "Sheet3"]: dialect = ExcelDialect(sheet=sheet, workbook_cache={}) with Resource(source, dialect=dialect) as resource: assert len(dialect.workbook_cache) == 1 assert resource.read_rows()
def test_xlsx_parser_workbook_cache(): source = BASE_URL % "data/special/sheets.xlsx" for sheet in ["Sheet1", "Sheet2", "Sheet3"]: dialect = ExcelDialect(sheet=sheet, workbook_cache={}) with Table(source, dialect=dialect) as table: assert len(dialect.workbook_cache) == 1 assert table.read_data()
def test_xlsx_parser_adjust_floating_point_error_default(): source = "data/adjust-floating-point-error.xlsx" dialect = ExcelDialect(preserve_formatting=True) query = Query(skip_fields=["<blank>"]) with pytest.warns(UserWarning): with Table(source, dialect=dialect, query=query) as table: assert table.read_data()[1][2] == 274.65999999999997
def test_resource_skip_rows_non_string_cell_issue_320(): source = "data/issue-320.xlsx" dialect = ExcelDialect(fill_merged_cells=True) layout = Layout(header_rows=[10, 11, 12]) with Resource(source, dialect=dialect, layout=layout) as resource: assert resource.header[ 7] == "Current Population Analysed % of total county Pop"
def test_xlsx_parser_preserve_formatting(): source = "data/preserve-formatting.xlsx" dialect = ExcelDialect(preserve_formatting=True) layout = Layout(header_rows=[1]) detector = Detector(field_type="any") if IS_UNIX: with Resource(source, dialect=dialect, layout=layout, detector=detector) as resource: assert resource.read_rows() == [{ # general "empty": None, # numeric "0": "1001", "0.00": "1000.56", "0.0000": "1000.5577", "0.00000": "1000.55770", "0.0000#": "1000.5577", # temporal "m/d/yy": "5/20/40", "d-mmm": "20-May", "mm/dd/yy": "05/20/40", "mmddyy": "052040", "mmddyyam/pmdd": "052040AM20", }]
def test_xlsx_parser_sheet_by_name(): source = "data/sheet2.xlsx" dialect = ExcelDialect(sheet="Sheet2") with Resource(source, dialect=dialect) as resource: assert resource.header == ["id", "name"] assert resource.read_rows() == [ {"id": 1.0, "name": "english"}, {"id": 2.0, "name": "中国人"}, ]
def test_xlsx_parser_format_error_sheet_by_index_not_existent(): source = "data/sheet2.xlsx" dialect = ExcelDialect(sheet=3) resource = Resource(source, dialect=dialect) with pytest.raises(FrictionlessException) as excinfo: resource.open() error = excinfo.value.error assert error.code == "format-error" assert error.note == 'Excel document "data/sheet2.xlsx" does not have a sheet "3"'
def test_xls_parser_sheet_by_index(): source = "data/sheet2.xls" dialect = ExcelDialect(sheet=2) with Resource(source, dialect=dialect) as resource: assert resource.header == ["id", "name"] assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_xlsx_parser_preserve_formatting_percentage(): source = "data/preserve-formatting-percentage.xlsx" dialect = ExcelDialect(preserve_formatting=True) with Resource(source, dialect=dialect) as resource: assert resource.read_rows() == [ {"col1": 123, "col2": "52.00%"}, {"col1": 456, "col2": "30.00%"}, {"col1": 789, "col2": "6.00%"}, ]
def test_xlsx_parser_preserve_formatting_percentage(): source = "data/preserve-formatting-percentage.xlsx" dialect = ExcelDialect(preserve_formatting=True) with Table(source, dialect=dialect) as table: assert table.read_data() == [ [123, "52.00%"], [456, "30.00%"], [789, "6.00%"], ]
def test_xlsx_parser_format_errors_sheet_by_name_not_existent(): source = "data/sheet2.xlsx" dialect = ExcelDialect(sheet="bad") table = Table(source, dialect=dialect) with pytest.raises(FrictionlessException) as excinfo: table.open() error = excinfo.value.error assert error.code == "format-error" assert error.note == 'Excel document "data/sheet2.xlsx" does not have a sheet "bad"'
def test_xls_parser_write_sheet_name(tmpdir): source = "data/table.csv" target = str(tmpdir.join("table.xls")) dialect = ExcelDialect(sheet="sheet") with Table(source) as table: table.write(target, dialect=dialect) with Table(target, dialect=dialect) as table: assert table.header == ["id", "name"] assert table.read_data() == [[1, "english"], [2, "中国人"]]
def test_xlsx_parser_merged_cells_fill(): source = "data/merged-cells.xlsx" dialect = ExcelDialect(fill_merged_cells=True) layout = Layout(header=False) with Resource(source, dialect=dialect, layout=layout) as resource: assert resource.read_rows() == [ {"field1": "data", "field2": "data"}, {"field1": "data", "field2": "data"}, {"field1": "data", "field2": "data"}, ]
def test_xlsx_parser_preserve_formatting_number_multicode(): source = "data/number-format-multicode.xlsx" dialect = ExcelDialect(preserve_formatting=True) layout = Layout(skip_fields=["<blank>"]) with Resource(source, dialect=dialect, layout=layout) as resource: assert resource.read_rows() == [ {"col1": Decimal("4.5")}, {"col1": Decimal("-9.032")}, {"col1": Decimal("15.8")}, ]
def test_xls_parser_write_sheet_name(tmpdir): dialect = ExcelDialect(sheet="sheet") source = Resource("data/table.csv") target = Resource(str(tmpdir.join("table.xls")), dialect=dialect) source.write(target) with target: assert target.header == ["id", "name"] assert target.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_table_xlsx_adjust_floating_point_error(): source = "data/adjust-floating-point-error.xlsx" dialect = ExcelDialect( fill_merged_cells=False, preserve_formatting=True, adjust_floating_point_error=True, ) query = Query(skip_fields=["<blank>"]) with pytest.warns(UserWarning): with Table(source, dialect=dialect, query=query) as table: assert table.read_data()[1][2] == 274.66
def test_table_header_xlsx_multiline(): source = "data/multiline-headers.xlsx" dialect = ExcelDialect(fill_merged_cells=True) with Table(source, dialect=dialect, headers=[1, 2, 3, 4, 5]) as table: assert table.header == [ "Region", "Caloric contribution (%)", "Cumulative impact of changes on cost of food basket from previous quarter", "Cumulative impact of changes on cost of food basket from baseline (%)", ] assert table.read_data() == [["A", "B", "C", "D"]]
def test_resource_layout_header_xlsx_multiline(): source = "data/multiline-headers.xlsx" dialect = ExcelDialect(fill_merged_cells=True) layout = Layout(header_rows=[1, 2, 3, 4, 5]) with Resource(source, dialect=dialect, layout=layout) as resource: header = resource.header assert header == [ "Region", "Caloric contribution (%)", "Cumulative impact of changes on cost of food basket from previous quarter", "Cumulative impact of changes on cost of food basket from baseline (%)", ] assert resource.read_rows() == [ { header[0]: "A", header[1]: "B", header[2]: "C", header[3]: "D" }, ]
def test_xlsx_parser_preserve_formatting(): source = "data/preserve-formatting.xlsx" dialect = ExcelDialect(preserve_formatting=True) with Table(source, dialect=dialect, headers=1, infer_type="any") as table: assert table.read_rows() == [{ # general "empty": None, # numeric "0": "1001", "0.00": "1000.56", "0.0000": "1000.5577", "0.00000": "1000.55770", "0.0000#": "1000.5577", # temporal "m/d/yy": "5/20/40", "d-mmm": "20-May", "mm/dd/yy": "05/20/40", "mmddyy": "052040", "mmddyyam/pmdd": "052040AM20", }]
def test_table_skip_rows_non_string_cell_issue_320(): source = "data/issue320.xlsx" dialect = ExcelDialect(fill_merged_cells=True) with pytest.warns(UserWarning): with Table(source, dialect=dialect, headers=[10, 11, 12]) as table: assert table.header[7] == "Current Population Analysed % of total county Pop"
def test_xls_parser_sheet_by_name_not_existent(): source = "data/sheet2.xls" dialect = ExcelDialect(sheet="bad") with pytest.raises(FrictionlessException) as excinfo: Resource(source, dialect=dialect).open() assert 'sheet "bad"' in str(excinfo.value)
def test_xlsx_parser_merged_cells_fill(): source = "data/merged-cells.xlsx" dialect = ExcelDialect(fill_merged_cells=True) with Table(source, dialect=dialect, headers=False) as table: assert table.read_data() == [["data", "data"], ["data", "data"], ["data", "data"]]
def test_xlsx_parser_preserve_formatting_number_multicode(): source = "data/number-format-multicode.xlsx" dialect = ExcelDialect(preserve_formatting=True) query = Query(skip_fields=["<blank>"]) with Table(source, dialect=dialect, query=query) as table: assert table.read_data() == [["4.5"], ["-9.032"], ["15.8"]]
def test_xlsx_parser_sheet_by_name(): source = "data/sheet2.xlsx" dialect = ExcelDialect(sheet="Sheet2") with Table(source, dialect=dialect) as table: assert table.header == ["id", "name"] assert table.read_data() == [[1.0, "english"], [2.0, "中国人"]]
def test_table_xls_sheet_by_index_not_existent(): source = "data/sheet2.xls" dialect = ExcelDialect(sheet=3) with pytest.raises(FrictionlessException) as excinfo: Table(source, dialect=dialect).open() assert 'sheet "3"' in str(excinfo.value)
def test_xlsx_parser_merged_cells_fill_boolean(): source = "data/merged-cells-boolean.xls" dialect = ExcelDialect(fill_merged_cells=True) with Table(source, dialect=dialect, headers=False) as table: assert table.read_data() == [[True, True], [True, True], [True, True]]
def test_xls_parser_sheet_by_index(): source = "data/sheet2.xls" dialect = ExcelDialect(sheet=2) with Table(source, dialect=dialect) as table: assert table.header == ["id", "name"] assert table.read_data() == [[1, "english"], [2, "中国人"]]