Python Dataset примеры использования

Язык программирования: Python

Пространство имен/Пакет: basedosdados

Класс/Тип: Dataset

Примеров на hotexamples.com: 9

Python Dataset - 9 примеров найдено. Это лучшие примеры Python кода для basedosdados.Dataset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(9)

Основные методы

Dataset (9)

Пример #1

Показать файл

Файл: test_table.py Проект: basedosdados/mais

def test_create_storage_data_exist_table_config_exist(table, metadatadir,
                                                      data_path, sample_data):

    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="pass")

    Storage(dataset_id=DATASET_ID,
            table_id=TABLE_ID,
            metadata_path=metadatadir).upload(data_path,
                                              mode="staging",
                                              if_exists="replace")

    table.init(
        data_sample_path=data_path,
        if_folder_exists="replace",
        if_table_config_exists="replace",
    )

    for file in TABLE_FILES:
        shutil.copy(sample_data / file, table.table_folder / file)

    table.delete(mode="all")

    table.create(
        data_path,
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )
    assert table_exists(table, "staging")

Пример #2

Показать файл

Файл: test_table.py Проект: rxjunior/mais

def test_create(table, metadatadir):

    shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID,
                  ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="pass")

    Storage(dataset_id=DATASET_ID,
            table_id=TABLE_ID,
            metadata_path=metadatadir).upload(
                "tests/sample_data/municipios.csv",
                mode="staging",
                if_exists="replace")

    table.init(data_sample_path="tests/sample_data/municipios.csv",
               if_exists="replace")

    table.delete(mode="all")

    table.create()

    assert table_exists(table, mode="staging")

    table.create(if_exists="replace")

    assert table_exists(table, mode="staging")

    table.create("tests/sample_data/municipios.csv", if_exists="replace")

Пример #3

Показать файл

Файл: test_table.py Проект: rxjunior/mais

def test_init(table, metadatadir):

    # remove folder
    shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID,
                  ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).init(replace=True)

    table.init()

    folder = Path(metadatadir) / DATASET_ID / TABLE_ID

    check_files(folder)

    with pytest.raises(FileExistsError):
        table.init()
        table.init(if_exists="raise")

    table.init(if_exists="replace")

    check_files(folder)

    table.init(if_exists="pass")

    check_files(folder)

    table.init(if_exists="replace",
               data_sample_path="tests/sample_data/municipios.csv")

    check_files(folder)

    with pytest.raises(NotImplementedError):
        table.init(if_exists="replace",
                   data_sample_path="tests/sample_data/municipios.json")

Пример #4

Показать файл

Файл: table_approve.py Проект: basedosdados/mais

def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-backup",
):
    # copy proprosed data between storage buckets
    # create a backup of old data, then delete it and copies new data into the destination bucket
    modes = ["staging", "raw", "auxiliary_files", "architecture", "header"]

    for mode in modes:
        try:
            sync_bucket(
                source_bucket_name=source_bucket_name,
                dataset_id=dataset_id,
                table_id=table_id,
                destination_bucket_name=destination_bucket_name,
                backup_bucket_name=backup_bucket_name,
                mode=mode,
            )
            tprint()
        except Exception as error:
            tprint(f"DATA ERROR ON {mode}.{dataset_id}.{table_id}")
            traceback.print_exc(file=sys.stderr)
            tprint()

    # load the table_config.yaml to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    # adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)
    # create table object of selected table and dataset ID
    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)

    # delete table from staging and prod if exists
    tb.delete("all")

    # create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )

    # publish the table in prod bigquery
    tb.publish(if_exists="replace")

    # updates the table description
    tb.update("prod")

    # updates the dataset description
    Dataset(dataset_id).update(mode="prod")

    ### save table header in storage
    save_header_files(dataset_id, table_id)

Пример #5

Показать файл

def test_create_no_path_error(table, metadatadir, data_path, sample_data):

    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="pass")

    with pytest.raises(BaseDosDadosException):
        table.create(if_storage_data_exists="replace")

    with pytest.raises(BaseDosDadosException):
        table.create(if_table_config_exists="replace")

Пример #6

Показать файл

def test_init(
    table,
    metadatadir,
    folder,
    data_path,
):

    # remove folder
    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).init(replace=True)

    table.init(data_sample_path=data_path)

    check_files(folder)

Пример #7

Показать файл

Файл: test_table.py Проект: rxjunior/mais

def test_publish(table, metadatadir):

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="replace")

    table.create("tests/sample_data/municipios.csv", if_exists="replace")

    shutil.copy(
        "tests/sample_data/table/table_config.yaml",
        Path(metadatadir) / "pytest" / "pytest" / "table_config.yaml",
    )
    shutil.copy(
        "tests/sample_data/table/publish.sql",
        Path(metadatadir) / "pytest" / "pytest" / "publish.sql",
    )

    table.publish(if_exists="replace")

    assert table_exists(table, "prod")

Пример #8

Показать файл

Файл: main.py Проект: avila/mais

def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-staging",
):

    ### Copies proprosed data between storage buckets.
    ### Creates a backup of old data, then delete it and copies new data into the destination bucket.
    sync_bucket(
        source_bucket_name,
        dataset_id,
        table_id,
        destination_bucket_name,
        backup_bucket_name,
    )

    ### laod the table_config.yalm to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    ### adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)

    ### create Table object of selected table and dataset ID
    tb = bd.Table(table_id, dataset_id)

    ### delete table from staging and prod if exists
    tb.delete("all")

    ### create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )
    ### publish the table in prod bigquery
    tb.publish(if_exists="replace")
    ### updates the table description
    tb.update("prod")
    ### updates the dataset description
    Dataset(dataset_id).update("prod")

Пример #9

Показать файл

def dataset(metadatadir):
    return Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir)