def test_fetch_data_orc(tmpdir, expected_df): fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.orc")) cudf.io.orc.to_orc(expected_df, fname) config = {"type": "fs", "input_path": fname, "input_format": "orc"} reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_parquet(tmpdir, expected_df): fname = tmpdir.mkdir("tmp_test_fs_reader").join("person.parquet") cudf.io.parquet.to_parquet(expected_df, fname) config = {"type": "fs", "input_path": fname, "input_format": "parquet"} reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_orc(test_input_base_path, expected_df): test_input_path = "%s/person.orc" % (test_input_base_path) config = { "type": "fs", "input_path": test_input_path, "input_format": "orc" } reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_parquet(test_input_base_path, expected_df): test_input_path = "%s/person.parquet" % (test_input_base_path) config = { "type": "fs", "input_path": test_input_path, "columns": ["firstname", "lastname", "gender"], "input_format": "parquet" } reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_json(tmpdir, expected_df): fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.json")) cudf.io.json.to_json(expected_df, fname, orient="records") config = { "type": "fs", "input_path": fname, "orient": "records", "input_format": "json" } reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_text(test_input_base_path, expected_df): test_input_path = "%s/person.csv" % (test_input_base_path) config = { "type": "fs", "input_path": test_input_path, "names": ["firstname", "lastname", "gender"], "delimiter": ",", "usecols": ["firstname", "lastname", "gender"], "dtype": ["str", "str", "str"], "header": 0, "input_format": "text" } reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def test_fetch_data_csv(tmpdir, expected_df): fname = tmpdir.mkdir("tmp_test_fs_reader").join("person.csv") expected_df.to_csv(fname, index=False) config = { "type": "fs", "input_path": fname, "names": ["firstname", "lastname", "gender"], "delimiter": ",", "usecols": ["firstname", "lastname", "gender"], "dtype": ["str", "str", "str"], "header": 0, "input_format": "csv" } reader = FileSystemReader(config) fetched_df = reader.fetch_data() assert fetched_df.equals(expected_df)
def get_reader(self): return FileSystemReader(self.config)
def get_reader(self): """ Get instance of FileSystemReader """ return FileSystemReader(self.config)