def test_skip_columns(self): rows = b'a b c \r\n11 ab 123\r\n33 cde456\r\n-60 fg789' parse_options = pf.ParseOptions([3, 3, 3], skip_columns=[0, 2]) table = read_bytes(rows, parse_options) assert isinstance(table, pa.Table) assert table.to_pydict() == {'b': ['ab', 'cde', 'fg']}
def test_small(self): parse_options = pf.ParseOptions([4, 4]) fwf, expected = make_random_fwf() # generate 2 col, width 4 by default table = read_bytes(fwf, parse_options) assert table.schema == expected.schema assert table.equals(expected) assert table.to_pydict() == expected.to_pydict()
def test_nulls_bools(self): rows = b'a b \r\n null N/A \r\n123456 true' parse_options = pf.ParseOptions([6, 6]) table = read_bytes(rows, parse_options) assert (table.column(0).type == 'int64') assert (table.column(1).type == 'bool') assert table.to_pydict() == {'a': [None, 123456], 'b': [None, True]}
def test_header(self): rows = b'abcdef' parse_options = pf.ParseOptions([2, 3, 1]) table = read_bytes(rows, parse_options) assert isinstance(table, pa.Table) assert table.num_columns == 3 assert table.column_names == ['ab', 'cde', 'f'] assert table.num_rows == 0
def test_small_encoded(self): parse_options = pf.ParseOptions([4, 4]) read_options = pf.ReadOptions(encoding='Big5') fwf, expected = make_random_fwf(encoding='big5') table = read_bytes(fwf, parse_options, read_options=read_options) assert table.schema == expected.schema assert table.equals(expected) assert table.to_pydict() == expected.to_pydict()
def test_serial_read(self): parse_options = pf.ParseOptions([4, 4]) read_options = pf.ReadOptions(use_threads=False) fwf, expected = make_random_fwf() # generate 2 col, width 4 by default table = read_bytes(fwf, parse_options, read_options=read_options) assert table.schema == expected.schema assert table.equals(expected) assert table.to_pydict() == expected.to_pydict()
def test_big(self): field_widths = [] for i in range(30): field_widths.append(4) parse_options = pf.ParseOptions(field_widths) fwf, expected = make_random_fwf(num_cols=30, num_rows=10000) table = read_bytes(fwf, parse_options) assert table.schema == expected.schema assert table.equals(expected) assert table.to_pydict() == expected.to_pydict()
def test_no_header(self): rows = b'123456789' parse_options = pf.ParseOptions([1, 2, 3, 3]) read_options = pf.ReadOptions(column_names=['a', 'b', 'c', 'd']) table = read_bytes(rows, parse_options, read_options=read_options) assert table.to_pydict() == { 'a': [1], 'b': [23], 'c': [456], 'd': [789] }
def test_cobol(self): rows = b'a b c \r\n1A ab 12\r\n33Jcde34\r\n6} fg56\r\n 3Dhij78' parse_options = pf.ParseOptions([3, 3, 2]) convert_options = pf.ConvertOptions(is_cobol=True) table = read_bytes(rows, parse_options, convert_options=convert_options) assert isinstance(table, pa.Table) assert table.to_pydict() == { 'a': [11, -331, -60, 34], 'b': ['ab', 'cde', 'fg', 'hij'], 'c': [12, 34, 56, 78] } assert table.column(0).type == 'int64'