Пример #1
0
 def test_boolean_becomes_utf8(self):
     # Workbench does not support booleans; use True/False.
     # Support null, too -- don't overwrite it.
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":true},{"A":false},{"A":null}]'),
         ParseJsonResult(pyarrow.table({"A": ["true", "false", None]}), []),
     )
Пример #2
0
 def test_null_int(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":1},{"A":null}]'),
         ParseJsonResult(
             pyarrow.table({"A": pyarrow.array([1, None], pyarrow.int8())}),
             []),
     )
Пример #3
0
 def test_utf8_dates_are_utf8(self):
     # JSON does not support dates
     assert_json_result_equals(
         _parse_json_with_defaults(
             '[{"date":"2019-02-20"},{"date":"2019-02-21"}]'),
         ParseJsonResult(
             pyarrow.table({"date": ["2019-02-20", "2019-02-21"]}), []),
     )
Пример #4
0
 def test_json_syntax_error(self):
     assert_json_result_equals(
         _parse_json_with_defaults("not JSON"),
         ParseJsonResult(
             pyarrow.table({}),
             [
                 ParseJsonWarning.TODO_i18n(
                     "JSON parse error at byte 1: Invalid value.")
             ],
         ),
     )
Пример #5
0
 def test_json_find_subarray(self):
     assert_json_result_equals(
         _parse_json_with_defaults({
             "meta": {
                 "foo": "bar"
             },
             "data": [{
                 "x": "y"
             }]
         }),
         ParseJsonResult(pyarrow.table({"x": ["y"]}), []),
     )
Пример #6
0
 def test_json_not_array(self):
     assert_json_result_equals(
         _parse_json_with_defaults('"foo"'),
         ParseJsonResult(
             pyarrow.table({}),
             [
                 ParseJsonWarning.TODO_i18n(
                     'JSON is not an Array or Object containing an Array; got: "foo"'
                 )
             ],
         ),
     )
Пример #7
0
 def test_json_not_records(self):
     assert_json_result_equals(
         _parse_json_with_defaults(["foo", "bar"]),
         ParseJsonResult(
             pyarrow.table({}),
             [
                 ParseJsonWarning.TODO_i18n(
                     'skipped 2 non-Object records; example Array item 0: "foo"'
                 )
             ],
         ),
     )
Пример #8
0
 def test_json_replace_badly_encoded_characters(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"x": "café"}]'.encode("windows-1252"),
                                   encoding="utf-8"),
         ParseJsonResult(
             pyarrow.table({"x": ["caf�"]}),
             [
                 ParseJsonWarning.RepairedEncoding(
                     encoding="utf-8",
                     first_invalid_byte=233,
                     first_invalid_byte_position=11,
                 )
             ],
         ),
     )
Пример #9
0
 def test_max_rows(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "A": "a"
         }, {
             "A": "b"
         }, {
             "A": "c"
         }]),
         ParseJsonResult(
             pyarrow.table({"A": ["a", "b"]}),
             [
                 ParseJsonWarning.TODO_i18n(
                     "skipped 1 rows (after row limit of 2)")
             ],
         ),
     )
Пример #10
0
 def test_max_bytes_per_column_name(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "ABCD": "x",
             "BCDEFG": "y"
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "AB": ["x"],
                 "BC": ["y"]
             }),
             [
                 ParseJsonWarning.TODO_i18n(
                     "truncated 2 column names; example AB")
             ],
         ),
     )
Пример #11
0
 def test_undefined(self):
     assert_json_result_equals(
         _parse_json_with_defaults("""
             [
                 {"A": "a", "C": "c"},
                 {"A": "aa", "B": "b"},
                 {"C": "cc"}
             ]
             """),
         ParseJsonResult(
             pyarrow.table({
                 "A": ["a", "aa", None],
                 "C": ["c", None, "cc"],
                 "B": [None, "b", None],
             }),
             [],
         ),
     )
Пример #12
0
 def test_max_bytes_per_value(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "A": ["abc", "def"],
             "B": "ghij"
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "A": ['["a'],
                 "B": ["ghi"]
             }),
             [
                 ParseJsonWarning.TODO_i18n(
                     "truncated 2 values (value byte limit is 3; see row 0 column A)"
                 )
             ],
         ),
     )
Пример #13
0
 def test_encode_nested_arrays_and_objects(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "value": {
                 "x": ["y", {
                     "z": True,
                     "Z": ["a", None]
                 }, ["b", "c"]],
                 "X": {},
             }
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "value":
                 ['{"x":["y",{"z":true,"Z":["a",null]},["b","c"]],"X":{}}']
             }),
             [],
         ),
     )
Пример #14
0
 def test_max_bytes_text(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "A": "abcd",
             "B": "bcde"
         }, {
             "A": "c",
             "B": "def"
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "A": ["abcd"],
                 "B": ["bcde"]
             }),
             [
                 ParseJsonWarning.TODO_i18n(
                     "stopped at limit of 8 bytes of data")
             ],
         ),
     )
Пример #15
0
 def test_dictionary_encode(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "A": "a",
             "B": "b"
         }, {
             "A": "a",
             "B": "bb"
         }, {
             "A": "a",
             "B": "bbb"
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "A":
                 pyarrow.array(["a", "a", "a"]).dictionary_encode(),
                 "B": ["b", "bb", "bbb"],
             }),
             [],
         ),
     )
Пример #16
0
 def test_max_columns(self):
     assert_json_result_equals(
         _parse_json_with_defaults([{
             "A": "a",
             "B": "b",
             "C": "c"
         }, {
             "A": "aa",
             "B": "bb"
         }]),
         ParseJsonResult(
             pyarrow.table({
                 "A": ["a", "aa"],
                 "B": ["b", "bb"]
             }),
             [
                 ParseJsonWarning.TODO_i18n(
                     "skipped column C (after column limit of 2)")
             ],
         ),
     )
Пример #17
0
 def test_int64(self):
     # e.g., Twitter IDs
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":1093943422262697985}]'),
         ParseJsonResult(pyarrow.table({"A": [1093943422262697985]}), []),
     )
Пример #18
0
 def test_utf8_numbers_are_utf8(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":"1"},{"A":"2"}]'),
         ParseJsonResult(pyarrow.table({"A": ["1", "2"]}), []),
     )
Пример #19
0
 def test_null_utf8(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":"a"},{"A":"b"},{"A":null}]'),
         ParseJsonResult(pyarrow.table({"A": ["a", "b", None]}), []),
     )
Пример #20
0
 def test_json_force_encoding(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"x": "café"}]'.encode("windows-1252"),
                                   encoding="windows-1252"),
         ParseJsonResult(pyarrow.table({"x": ["café"]}), []),
     )
Пример #21
0
 def test_json_empty(self):
     assert_json_result_equals(_parse_json_with_defaults("[]"),
                               ParseJsonResult(pyarrow.table({}), []))
Пример #22
0
 def test_object_becomes_utf8(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":{"foo": "bar"}}]'),
         ParseJsonResult(pyarrow.table({"A": ['{"foo":"bar"}']}), []),
     )
Пример #23
0
 def test_array_becomes_utf8(self):
     assert_json_result_equals(
         _parse_json_with_defaults('[{"A":["foo", "bar"]}]'),
         ParseJsonResult(pyarrow.table({"A": ['["foo","bar"]']}), []),
     )