Exemplo n.º 1
0
 def test_render_xlsx_bad_content(self):
     with tempfile_context("fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", XLSX_MIME_TYPE)],
             io.BytesIO("ceçi n'est pas une .xlsx".encode("utf-8")),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(
         result,
         RenderResult(
             ArrowTable(),
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         'Error reading Excel file: Unsupported format, or corrupt file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'
                     )
                 )
             ],
         ),
     )
Exemplo n.º 2
0
 def test_render_deprecated_parquet(self):
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         result = render_arrow(
             ArrowTable(), P(), "tab-x", FetchResult(fetched_path), self.output_path
         )
     assert_arrow_table_equals(result.table, {"A": [1, 2], "B": [3, 4]})
     self.assertEqual(result.errors, [])
Exemplo n.º 3
0
 def test_render_deprecated_parquet_warning(self):
     errors = [RenderError(I18nMessage.TODO_i18n("truncated table"))]
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         result = render_arrow(
             ArrowTable(),
             P(),
             "tab-x",
             FetchResult(fetched_path, errors=errors),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, {"A": [1, 2], "B": [3, 4]})
     self.assertEqual(result.errors, errors)
Exemplo n.º 4
0
 def test_render_fetch_error(self):
     errors = [RenderResult(I18nMessage("x", {"y": "z"}))]
     with tempfile_context() as empty_path:
         result = render_arrow(
             ArrowTable(),
             P(),
             "tab-x",
             FetchResult(empty_path, errors),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, ArrowTable())
     self.assertEqual(result.errors, errors)
Exemplo n.º 5
0
 def test_render_json(self):
     with tempfile_context("fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", "application/json")],
             io.BytesIO(b'[{"A": "a"}]'),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(result.errors, [])
     assert_arrow_table_equals(result.table, {"A": ["a"]})
Exemplo n.º 6
0
 def test_render_has_header_true(self):
     with tempfile_context("http") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", "text/csv")],
             io.BytesIO(b"A,B\na,b"),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, {"A": ["a"], "B": ["b"]})
     self.assertEqual(result.errors, [])
Exemplo n.º 7
0
 def test_render_xlsx(self):
     with tempfile_context("fetch-") as http_path:
         with (TestDataPath / "example.xlsx").open("rb") as xlsx_f:
             httpfile.write(
                 http_path,
                 {"url": "http://example.com/hello"},
                 "200 OK",
                 [("content-type", XLSX_MIME_TYPE)],
                 xlsx_f,
             )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(result.errors, [])
     assert_arrow_table_equals(result.table, {"foo": [1, 2], "bar": [2, 3]})
Exemplo n.º 8
0
 def test_render_text_plain(self):
     # guess_mime_type_or_none() treats text/plain specially.
     with tempfile_context(prefix="fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/file.unknownext"},
             "200 OK",
             [("content-type", "text/plain")],
             io.BytesIO(b"A;B\na;b"),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(result.errors, [])
     assert_arrow_table_equals(result.table, {"A": ["a"], "B": ["b"]})
Exemplo n.º 9
0
 def test_render_csv_handle_nonstandard_mime_type(self):
     # Transform 'application/csv' into 'text/csv', etc.
     #
     # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake
     # MIME types we've seen in the wild that seem unambiguous.
     with tempfile_context(prefix="fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", "application/x-csv")],
             io.BytesIO(b"A,B\na,b"),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, {"A": ["a"], "B": ["b"]})
     self.assertEqual(result.errors, [])
Exemplo n.º 10
0
 def test_render_deprecated_parquet_has_header_false(self):
     # This behavior is totally awful, but we support it for backwards
     # compatibility.
     #
     # Back in the day, we parsed during fetch. But has_header can change
     # between fetch and render. We were lazy, so we made fetch() follow the
     # most-common path: has_header=True. Then, in render(), we would "undo"
     # the change if has_header=False. This was lossy. It took a lot of time
     # to figure it out. It was _never_ wise to code this. Now we need to
     # support these lossy, mangled files.
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         result = render_arrow(
             ArrowTable(),
             P(has_header=False),
             "tab-x",
             FetchResult(fetched_path),
             self.output_path,
         )
     assert_arrow_table_equals(
         result.table, {"0": ["A", "1", "2"], "1": ["B", "3", "4"]}
     )
     self.assertEqual(result.errors, [])
Exemplo n.º 11
0
 def test_render_csv_use_url_ext_given_bad_content_type(self):
     # Use text/plain type and rely on filename detection, as
     # https://raw.githubusercontent.com/ does
     with tempfile_context(prefix="fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/file.csv"},
             "200 OK",
             [("content-type", "text/plain")],
             # bytes will prove we used "csv" explicitly -- we didn't
             # take "text/plain" and decide to use a CSV sniffer to
             # find the delimiter.
             io.BytesIO(b"A;B\na;b"),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, {"A;B": ["a;b"]})
     self.assertEqual(result.errors, [])
Exemplo n.º 12
0
 def test_render_has_header_false(self):
     with tempfile_context("http") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", "text/csv")],
             io.BytesIO(b"1,2\n3,4"),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=False),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     assert_arrow_table_equals(
         result.table,
         {
             "Column 1": pyarrow.array([1, 3], pyarrow.int8()),
             "Column 2": pyarrow.array([2, 4], pyarrow.int8()),
         },
     )
     self.assertEqual(result.errors, [])
Exemplo n.º 13
0
 def test_render_missing_fetch_result_returns_empty(self):
     result = render_arrow(ArrowTable(), P(), "tab-x", None, self.output_path)
     assert_arrow_table_equals(result.table, {})
     self.assertEqual(result.errors, [])
Exemplo n.º 14
0
 def test_render_no_file(self):
     result = render_arrow(ArrowTable(), P(), "tab-x", None, self.output_path)
     assert_arrow_table_equals(result.table, ArrowTable())
     self.assertEqual(result.errors, [])