Пример #1
0
def test_dataset(version):
    num_values = (100, 100)
    num_files = 5
    paths = [random_path() for i in range(num_files)]
    data = {
        "col_" + str(i): np.random.randn(num_values[0])
        for i in range(num_values[1])
    }
    table = pa.table(data)

    TEST_FILES.extend(paths)
    for index, path in enumerate(paths):
        rows = (
            index * (num_values[0] // num_files),
            (index + 1) * (num_values[0] // num_files),
        )

        write_feather(table[rows[0]:rows[1]], path, version=version)

    data = FeatherDataset(paths).read_table()
    assert data.equals(table)
Пример #2
0
def test_dataset(version):
    num_values = (100, 100)
    num_files = 5
    paths = [random_path() for i in range(num_files)]
    df = pd.DataFrame(np.random.randn(*num_values),
                      columns=['col_' + str(i) for i in range(num_values[1])])

    TEST_FILES.extend(paths)
    for index, path in enumerate(paths):
        rows = (index * (num_values[0] // num_files),
                (index + 1) * (num_values[0] // num_files))

        write_feather(df.iloc[rows[0]:rows[1]], path, version=version)

    data = FeatherDataset(paths).read_pandas()
    assert_frame_equal(data, df)
Пример #3
0
    def test_dataset(self):
        num_values = (100, 100)
        num_files = 5
        paths = [random_path() for i in range(num_files)]
        df = pd.DataFrame(
            np.random.randn(*num_values),
            columns=['col_' + str(i) for i in range(num_values[1])])

        self.test_files.extend(paths)
        for index, path in enumerate(paths):
            rows = (index * (num_values[0] // num_files),
                    (index + 1) * (num_values[0] // num_files))
            writer = FeatherWriter()
            writer.open(path)

            for col in range(num_values[1]):
                writer.write_array(df.columns[col], df.iloc[rows[0]:rows[1],
                                                            col])

            writer.close()

        data = FeatherDataset(paths).read_pandas()
        assert_frame_equal(data, df)