Exemplo n.º 1
0
    def test_integer_with_nulls(self):
        # pandas requires upcast to float dtype
        path = random_path()
        self.test_files.append(path)

        int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
        num_values = 100

        writer = FeatherWriter()
        writer.open(path)

        null_mask = np.random.randint(0, 10, size=num_values) < 3
        expected_cols = []
        for name in int_dtypes:
            values = np.random.randint(0, 100, size=num_values)
            writer.write_array(name, values, null_mask)

            expected = values.astype('f8')
            expected[null_mask] = np.nan

            expected_cols.append(expected)

        ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)),
                                columns=int_dtypes)

        writer.close()

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)
Exemplo n.º 2
0
    def test_integer_with_nulls(self):
        # pandas requires upcast to float dtype
        path = random_path()
        self.test_files.append(path)

        int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
        num_values = 100

        writer = FeatherWriter()
        writer.open(path)

        null_mask = np.random.randint(0, 10, size=num_values) < 3
        expected_cols = []
        for name in int_dtypes:
            values = np.random.randint(0, 100, size=num_values)
            writer.write_array(name, values, null_mask)

            expected = values.astype('f8')
            expected[null_mask] = np.nan

            expected_cols.append(expected)

        ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)),
                                columns=int_dtypes)

        writer.close()

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)
Exemplo n.º 3
0
    def test_float_nulls(self):
        num_values = 100

        path = random_path()
        self.test_files.append(path)
        writer = FeatherWriter()
        writer.open(path)

        null_mask = np.random.randint(0, 10, size=num_values) < 3
        dtypes = ['f4', 'f8']
        expected_cols = []
        null_counts = []
        for name in dtypes:
            values = np.random.randn(num_values).astype(name)
            writer.write_array(name, values, null_mask)

            values[null_mask] = np.nan

            expected_cols.append(values)
            null_counts.append(null_mask.sum())

        writer.close()

        ex_frame = pd.DataFrame(dict(zip(dtypes, expected_cols)),
                                columns=dtypes)

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)
        assert_array_equal(self._get_null_counts(path), null_counts)
Exemplo n.º 4
0
    def test_float_nulls(self):
        num_values = 100

        path = random_path()
        self.test_files.append(path)
        writer = FeatherWriter()
        writer.open(path)

        null_mask = np.random.randint(0, 10, size=num_values) < 3
        dtypes = ['f4', 'f8']
        expected_cols = []
        null_counts = []
        for name in dtypes:
            values = np.random.randn(num_values).astype(name)
            writer.write_array(name, values, null_mask)

            values[null_mask] = np.nan

            expected_cols.append(values)
            null_counts.append(null_mask.sum())

        writer.close()

        ex_frame = pd.DataFrame(dict(zip(dtypes, expected_cols)),
                                columns=dtypes)

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)
        assert_array_equal(self._get_null_counts(path), null_counts)
Exemplo n.º 5
0
    def test_buffer_bounds_error(self):
        # ARROW-1676
        path = random_path()
        self.test_files.append(path)

        for i in range(16, 256):
            values = pa.array([None] + list(range(i)), type=pa.float64())

            writer = FeatherWriter()
            writer.open(path)

            writer.write_array('arr', values)
            writer.close()

            result = read_feather(path)
            expected = pd.DataFrame({'arr': values.to_pandas()})
            assert_frame_equal(result, expected)

            self._check_pandas_roundtrip(expected, null_counts=[1])
Exemplo n.º 6
0
    def test_buffer_bounds_error(self):
        # ARROW-1676
        path = random_path()
        self.test_files.append(path)

        for i in range(16, 256):
            values = pa.array([None] + list(range(i)), type=pa.float64())

            writer = FeatherWriter()
            writer.open(path)

            writer.write_array('arr', values)
            writer.close()

            result = read_feather(path)
            expected = pd.DataFrame({'arr': values.to_pandas()})
            assert_frame_equal(result, expected)

            self._check_pandas_roundtrip(expected, null_counts=[1])
Exemplo n.º 7
0
    def test_read_table(self):
        num_values = (100, 100)
        path = random_path()

        self.test_files.append(path)
        writer = FeatherWriter()
        writer.open(path)

        values = np.random.randint(0, 100, size=num_values)

        for i in range(100):
            writer.write_array('col_' + str(i), values[:, i])

        writer.close()

        data = pd.DataFrame(values,
                            columns=['col_' + str(i) for i in range(100)])
        table = pa.Table.from_pandas(data)

        result = read_table(path)

        assert_frame_equal(table.to_pandas(), result.to_pandas())
Exemplo n.º 8
0
    def test_read_table(self):
        num_values = (100, 100)
        path = random_path()

        self.test_files.append(path)
        writer = FeatherWriter()
        writer.open(path)

        values = np.random.randint(0, 100, size=num_values)

        for i in range(100):
            writer.write_array('col_' + str(i), values[:, i])

        writer.close()

        data = pd.DataFrame(values,
                            columns=['col_' + str(i) for i in range(100)])
        table = pa.Table.from_pandas(data)

        result = read_table(path)

        assert_frame_equal(table.to_pandas(), result.to_pandas())
Exemplo n.º 9
0
    def test_dataset(self):
        num_values = (100, 100)
        num_files = 5
        paths = [random_path() for i in range(num_files)]
        df = pd.DataFrame(
            np.random.randn(*num_values),
            columns=['col_' + str(i) for i in range(num_values[1])])

        self.test_files.extend(paths)
        for index, path in enumerate(paths):
            rows = (index * (num_values[0] // num_files),
                    (index + 1) * (num_values[0] // num_files))
            writer = FeatherWriter()
            writer.open(path)

            for col in range(num_values[1]):
                writer.write_array(df.columns[col], df.iloc[rows[0]:rows[1],
                                                            col])

            writer.close()

        data = FeatherDataset(paths).read_pandas()
        assert_frame_equal(data, df)
Exemplo n.º 10
0
    def test_dataset(self):
        num_values = (100, 100)
        num_files = 5
        paths = [random_path() for i in range(num_files)]
        df = pd.DataFrame(np.random.randn(*num_values),
                          columns=['col_' + str(i)
                                   for i in range(num_values[1])])

        self.test_files.extend(paths)
        for index, path in enumerate(paths):
            rows = (index * (num_values[0] // num_files),
                    (index + 1) * (num_values[0] // num_files))
            writer = FeatherWriter()
            writer.open(path)

            for col in range(num_values[1]):
                writer.write_array(df.columns[col],
                                   df.iloc[rows[0]:rows[1], col])

            writer.close()

        data = FeatherDataset(paths).read_pandas()
        assert_frame_equal(data, df)
Exemplo n.º 11
0
    def test_boolean_nulls(self):
        # pandas requires upcast to object dtype
        path = random_path()
        self.test_files.append(path)

        num_values = 100
        np.random.seed(0)

        writer = FeatherWriter()
        writer.open(path)

        mask = np.random.randint(0, 10, size=num_values) < 3
        values = np.random.randint(0, 10, size=num_values) < 5
        writer.write_array('bools', values, mask)

        expected = values.astype(object)
        expected[mask] = None

        writer.close()

        ex_frame = pd.DataFrame({'bools': expected})

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)
Exemplo n.º 12
0
    def test_boolean_nulls(self):
        # pandas requires upcast to object dtype
        path = random_path()
        self.test_files.append(path)

        num_values = 100
        np.random.seed(0)

        writer = FeatherWriter()
        writer.open(path)

        mask = np.random.randint(0, 10, size=num_values) < 3
        values = np.random.randint(0, 10, size=num_values) < 5
        writer.write_array('bools', values, mask)

        expected = values.astype(object)
        expected[mask] = None

        writer.close()

        ex_frame = pd.DataFrame({'bools': expected})

        result = read_feather(path)
        assert_frame_equal(result, ex_frame)