Пример #1
0
def test_write_data_parquet(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_writer").join("person.parquet"))
    config = {"type": "fs", "output_path": fname, "output_format": "parquet"}
    writer = FileSystemWriter(config)
    writer.write_data(expected_df)

    result_df = cudf.read_parquet(fname)
    assert result_df.equals(expected_df)
Пример #2
0
def test_write_data_json(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_writer").join("person.json"))
    config = {
        "type": "fs",
        "output_path": fname,
        "output_format": "json",
        "orient": "records"
    }
    writer = FileSystemWriter(config)
    writer.write_data(expected_df)

    result_df = cudf.read_json(fname, orient="records")
    assert result_df.equals(expected_df)
Пример #3
0
def test_write_data_parquet(test_output_base_path, expected_df, df):
    test_output_path = "%s/person_parquet" % (test_output_base_path)
    if os.path.exists(test_output_path) and os.path.isdir(test_output_path):
        shutil.rmtree(test_output_path)
    config = {
        "type": "fs",
        "output_path": test_output_path,
        "output_format": "parquet"
    }
    writer = FileSystemWriter(config)
    writer.write_data(df)
    output_files = glob.glob("%s/*" % (test_output_path))
    result = pd.read_parquet(output_files[0], engine="pyarrow")
    assert result.equals(expected_df)
Пример #4
0
def test_write_data_json(test_output_base_path, expected_df):
    test_output_path = "%s/person.json" % (test_output_base_path)
    if os.path.exists(test_output_path):
        os.remove(test_output_path)
    config = {
        "type": "fs",
        "output_path": test_output_path,
        "output_format": "json",
        "orient": "records"
    }

    writer = FileSystemWriter(config)
    writer.write_data(expected_df)

    result_gdf = cudf.io.json.read_json(test_output_path, orient="records")

    assert result_gdf.equals(expected_df)
Пример #5
0
def test_write_data_text(test_output_base_path, df):
    test_output_path = "%s/person.csv" % (test_output_base_path)
    if os.path.exists(test_output_path):
        os.remove(test_output_path)
    config = {
        "type": "fs",
        "output_path": test_output_path,
        "output_format": "text",
        "index": False
    }
    writer = FileSystemWriter(config)
    writer.write_data(df)

    with open(test_output_path) as f:
        reader = csv.reader(f)
        data = []
        for row in reader:
            data.append(row)
    assert data[0] == ["firstname", "lastname", "gender"]
    assert data[1] == ["Emma", "Olivia", "F"]
    assert data[2] == ["Ava", "Isabella", "F"]
    assert data[3] == ["Sophia", "Charlotte", "F"]
Пример #6
0
 def get_writer(self):
     return FileSystemWriter(self.config)