Python _main示例

编程语言: Python

命名空间/包名称: petastorm.etl.petastorm_generate_metadata

方法/功能: _main

hotexamples.com的示例: 4

Python _main - 已找到4个示例。这些是从开源项目中提取的最受好评的petastorm.etl.petastorm_generate_metadata._main现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_regenerate_metadata(synthetic_dataset, tmpdir):
    a_moved_path = tmpdir.join('moved').strpath
    copytree(synthetic_dataset.path, a_moved_path)

    # Make sure we can read dataset before
    _check_reader(a_moved_path)

    # Delete both metadata files
    dataset = pq.ParquetDataset(a_moved_path)
    os.remove(dataset.common_metadata_path)

    # Should now raise a value error
    with pytest.raises(PetastormMetadataError):
        _check_reader(a_moved_path)

    # Regenerate all metadata including unischema information
    petastorm_generate_metadata._main([
        '--dataset_url',
        'file://{}'.format(a_moved_path),
        '--unischema_class',
        'petastorm.tests.test_common.TestSchema',
    ])

    # Reader should now work again (row group selector will not since we removed all metadata)
    _check_reader(a_moved_path)

示例#2

显示文件

def test_regenerate_using_row_group_summary_metadata(synthetic_dataset, tmpdir):
    a_moved_path = tmpdir.join('moved').strpath
    copytree(synthetic_dataset.path, a_moved_path)

    # Make sure we can read dataset before
    _check_reader(a_moved_path)

    # Regenerate the metadata (taking the schema information from the common_metadata which exists)
    petastorm_generate_metadata._main(['--dataset_url', 'file://{}'.format(a_moved_path), '--use-summary-metadata'])

    dataset = pq.ParquetDataset(a_moved_path)
    # Metadata path should not exist still (should be only _common_metadata)
    assert dataset.metadata

    # Reader should now work again with rowgroup selector since it was in original metadata
    _check_reader(a_moved_path, SingleIndexSelector(TestSchema.id.name, [2, 18]))

示例#3

显示文件

def test_cannot_find_unischema(synthetic_dataset, tmpdir):
    a_moved_path = tmpdir.join('moved').strpath
    copytree(synthetic_dataset.path, a_moved_path)

    # Make sure we can read dataset before
    _check_reader(a_moved_path)

    # Delete both metadata files
    dataset = pq.ParquetDataset(a_moved_path)
    os.remove(dataset.metadata_path)
    os.remove(dataset.common_metadata_path)

    # Should now raise a value error
    with pytest.raises(ValueError):
        _check_reader(a_moved_path)

    # Regeneration should fail since it cannot find the unischema
    with pytest.raises(ValueError):
        petastorm_generate_metadata._main(['--dataset_url', 'file://{}'.format(a_moved_path)])

示例#4

显示文件

def test_regenerate_row_group_metadata(synthetic_dataset, tmpdir):
    a_moved_path = tmpdir.join('moved').strpath
    copytree(synthetic_dataset.path, a_moved_path)

    # Make sure we can read dataset before
    _check_reader(a_moved_path)

    # Delete only the metadata file
    dataset = pq.ParquetDataset(a_moved_path)
    os.remove(dataset.metadata_path)

    # Should now raise a value error
    with pytest.raises(ValueError):
        _check_reader(a_moved_path)

    # Regenerate the metadata (taking the schema information from the common_metadata which exists)
    petastorm_generate_metadata._main(['--dataset_url', 'file://{}'.format(a_moved_path)])

    # Reader should now work again with rowgroup selector since it was in original metadata
    _check_reader(a_moved_path, SingleIndexSelector(TestSchema.id.name, [2, 18]))