示例#1
0
 def test_xlsx_nix_control_characters_from_colnames(self):
     path = TestDataPath / "headers-have-control-characters.xlsx"
     with tempfile_context(suffix=".arrow") as output_path:
         result = parse_xlsx_file(path,
                                  output_path=output_path,
                                  has_header=True,
                                  autoconvert_types=False)
     assert_arrow_table_equals(result.table, {"AB": ["a"], "C": ["b"]})
     self.assertEqual(result.errors, [])
示例#2
0
 def test_xlsx_cast_colnames_to_str(self):
     path = TestDataPath / "all-numeric.xlsx"
     with tempfile_context(suffix=".arrow") as output_path:
         result = parse_xlsx_file(path,
                                  output_path=output_path,
                                  has_header=True,
                                  autoconvert_types=True)
     assert_arrow_table_equals(result.table, {"1": [2]})
     self.assertEqual(result.errors, [])
示例#3
0
 def test_xlsx_uniquify_colnames(self):
     path = TestDataPath / "headers-have-duplicate-colnames.xlsx"
     with tempfile_context(suffix=".arrow") as output_path:
         result = parse_xlsx_file(path,
                                  output_path=output_path,
                                  has_header=True,
                                  autoconvert_types=False)
     # Should be:
     # assert_arrow_table_equals(result.table, {"A": ["a"], "A 2": ["b"]})
     assert_arrow_table_equals(result.table, {"A": ["a"], "A.1": ["b"]})
     self.assertEqual(result.errors, [])
示例#4
0
 def test_xlsx(self):
     path = TestDataPath / "test.xlsx"
     with tempfile_context(suffix=".arrow") as output_path:
         result = parse_xlsx_file(path,
                                  output_path=output_path,
                                  has_header=True,
                                  autoconvert_types=True)
     assert_arrow_table_equals(result.table, {
         "Month": ["Jan", "Feb"],
         "Amount": [10, 20]
     })
     self.assertEqual(result.errors, [])
示例#5
0
 def test_xlsx_replace_empty_colnames(self):
     path = TestDataPath / "headers-empty.xlsx"
     with tempfile_context(suffix=".arrow") as output_path:
         result = parse_xlsx_file(path,
                                  output_path=output_path,
                                  has_header=True,
                                  autoconvert_types=False)
     # Should be:
     # assert_arrow_table_equals(result.table, {"A": ["a"], "Column 2": ["b"]})
     assert_arrow_table_equals(result.table, {
         "A": ["a"],
         "Unnamed: 1": ["b"]
     })
     self.assertEqual(result.errors, [])
示例#6
0
    def test_xlsx_invalid(self):
        with tempfile_context(prefix="invalid", suffix=".xlsx") as path:
            path.write_bytes(b"not an xlsx")
            with tempfile_context(suffix=".arrow") as output_path:
                result = parse_xlsx_file(
                    path,
                    output_path=output_path,
                    has_header=True,
                    autoconvert_types=True,
                )

        assert_arrow_table_equals(result.table, {})
        self.assertEqual(
            result.errors,
            [
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Error reading Excel file: Unsupported format, "
                        "or corrupt file: Expected BOF record; found b'not an x'"
                    ))
            ],
        )