Exemplo n.º 1
0
def test_fetch_data_orc(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.orc"))
    cudf.io.orc.to_orc(expected_df, fname)
    config = {"type": "dask_fs", "input_path": fname, "input_format": "orc"}

    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 2
0
def test_fetch_data_orc(test_input_base_path, expected_df):
    test_input_path = "%s/person.orc" % (test_input_base_path)
    config = {
        "type": "dask_fs",
        "input_path": test_input_path,
        "input_format": "orc"
    }

    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 3
0
def test_fetch_data_parquet(test_input_base_path, expected_df):
    test_input_path = "%s/person.parquet" % (test_input_base_path)
    config = {
        "type": "dask_fs",
        "input_path": test_input_path,
        "columns": ["firstname", "lastname", "gender"],
        "input_format": "parquet",
    }

    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 4
0
def test_fetch_data_text(test_input_base_path, expected_df):
    test_input_path = "%s/person.csv" % (test_input_base_path)
    config = {
        "type": "dask_fs",
        "input_path": test_input_path,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "csv",
    }
    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 5
0
def test_fetch_data_parquet(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.parquet"))
    cudf.io.parquet.to_parquet(expected_df, fname)
    config = {
        "type": "dask_fs",
        "input_path": fname,
        "columns": ["firstname", "lastname", "gender"],
        "input_format": "parquet",
        "gather_statistics": False,
        "split_row_groups": False
    }

    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 6
0
def test_fetch_data_csv(tmpdir, expected_df):
    fname = tmpdir.mkdir("tmp_test_fs_reader").join("person.csv")
    expected_df.to_csv(fname, index=False)
    config = {
        "type": "dask_fs",
        "input_path": fname,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "csv",
    }
    reader = DaskFileSystemReader(config)
    fetched_df = reader.fetch_data().compute()

    assert fetched_df.equals(expected_df)
Exemplo n.º 7
0
    def get_reader(self):
        """
        Get instance of DaskFileSystemReader
        """

        return DaskFileSystemReader(self.config)
Exemplo n.º 8
0
 def get_reader(self):
     return DaskFileSystemReader(self.config)