Пример #1
0
def test_dataset_to_csv_multiproc(csv_path, tmp_path):
    cache_dir = tmp_path / "cache"
    output_csv = os.path.join(cache_dir, "tmp.csv")
    dataset = CsvDatasetReader({"train": csv_path}, cache_dir=cache_dir).read()
    CsvDatasetWriter(dataset["train"], output_csv, index=False, num_proc=2).write()

    original_csv = iter_csv_file(csv_path)
    expected_csv = iter_csv_file(output_csv)

    for row1, row2 in zip(original_csv, expected_csv):
        assert row1 == row2
Пример #2
0
def test_dataset_to_csv_invalidproc(csv_path, tmp_path):
    cache_dir = tmp_path / "cache"
    output_csv = os.path.join(cache_dir, "tmp.csv")
    dataset = CsvDatasetReader({"train": csv_path}, cache_dir=cache_dir).read()
    with pytest.raises(ValueError):
        CsvDatasetWriter(dataset["train"], output_csv, index=False, num_proc=0)