Exemplo n.º 1
0
    def test_skip_columns(self):
        rows = b'a  b  c  \r\n11 ab 123\r\n33 cde456\r\n-60 fg789'
        parse_options = pf.ParseOptions([3, 3, 3], skip_columns=[0, 2])
        table = read_bytes(rows, parse_options)

        assert isinstance(table, pa.Table)
        assert table.to_pydict() == {'b': ['ab', 'cde', 'fg']}
Exemplo n.º 2
0
    def test_small(self):
        parse_options = pf.ParseOptions([4, 4])
        fwf, expected = make_random_fwf()  # generate 2 col, width 4 by default
        table = read_bytes(fwf, parse_options)

        assert table.schema == expected.schema
        assert table.equals(expected)
        assert table.to_pydict() == expected.to_pydict()
Exemplo n.º 3
0
    def test_nulls_bools(self):
        rows = b'a     b     \r\n null N/A   \r\n123456  true'
        parse_options = pf.ParseOptions([6, 6])
        table = read_bytes(rows, parse_options)

        assert (table.column(0).type == 'int64')
        assert (table.column(1).type == 'bool')
        assert table.to_pydict() == {'a': [None, 123456], 'b': [None, True]}
Exemplo n.º 4
0
 def test_header(self):
     rows = b'abcdef'
     parse_options = pf.ParseOptions([2, 3, 1])
     table = read_bytes(rows, parse_options)
     assert isinstance(table, pa.Table)
     assert table.num_columns == 3
     assert table.column_names == ['ab', 'cde', 'f']
     assert table.num_rows == 0
Exemplo n.º 5
0
    def test_small_encoded(self):
        parse_options = pf.ParseOptions([4, 4])
        read_options = pf.ReadOptions(encoding='Big5')
        fwf, expected = make_random_fwf(encoding='big5')
        table = read_bytes(fwf, parse_options, read_options=read_options)

        assert table.schema == expected.schema
        assert table.equals(expected)
        assert table.to_pydict() == expected.to_pydict()
Exemplo n.º 6
0
    def test_serial_read(self):
        parse_options = pf.ParseOptions([4, 4])
        read_options = pf.ReadOptions(use_threads=False)
        fwf, expected = make_random_fwf()  # generate 2 col, width 4 by default
        table = read_bytes(fwf, parse_options, read_options=read_options)

        assert table.schema == expected.schema
        assert table.equals(expected)
        assert table.to_pydict() == expected.to_pydict()
Exemplo n.º 7
0
    def test_big(self):
        field_widths = []
        for i in range(30):
            field_widths.append(4)
        parse_options = pf.ParseOptions(field_widths)
        fwf, expected = make_random_fwf(num_cols=30, num_rows=10000)
        table = read_bytes(fwf, parse_options)

        assert table.schema == expected.schema
        assert table.equals(expected)
        assert table.to_pydict() == expected.to_pydict()
Exemplo n.º 8
0
 def test_no_header(self):
     rows = b'123456789'
     parse_options = pf.ParseOptions([1, 2, 3, 3])
     read_options = pf.ReadOptions(column_names=['a', 'b', 'c', 'd'])
     table = read_bytes(rows, parse_options, read_options=read_options)
     assert table.to_pydict() == {
         'a': [1],
         'b': [23],
         'c': [456],
         'd': [789]
     }
Exemplo n.º 9
0
    def test_cobol(self):
        rows = b'a  b  c \r\n1A ab 12\r\n33Jcde34\r\n6}  fg56\r\n 3Dhij78'
        parse_options = pf.ParseOptions([3, 3, 2])
        convert_options = pf.ConvertOptions(is_cobol=True)
        table = read_bytes(rows,
                           parse_options,
                           convert_options=convert_options)

        assert isinstance(table, pa.Table)
        assert table.to_pydict() == {
            'a': [11, -331, -60, 34],
            'b': ['ab', 'cde', 'fg', 'hij'],
            'c': [12, 34, 56, 78]
        }
        assert table.column(0).type == 'int64'