Python Dataset примеры использования

Язык программирования: Python

Пространство имен/Пакет: basedosdados.upload.dataset

Класс/Тип: Dataset

Примеров на hotexamples.com: 6

Python Dataset - 6 примеров найдено. Это лучшие примеры Python кода для basedosdados.upload.dataset.Dataset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(6)

create(1)

init(1)

Основные методы

Dataset (6)

create (1)

init (1)

Пример #1

Показать файл

def update_dataset(ctx, dataset_id, mode):

    Dataset(dataset_id=dataset_id, **ctx.obj).update(mode=mode)

    click.echo(
        click.style(
            mode_text(mode, "updated", dataset_id),
            fg="green",
        ))

Пример #2

Показать файл

def init_dataset(ctx, dataset_id, replace):

    d = Dataset(dataset_id=dataset_id, **ctx.obj).init(replace=replace)

    click.echo(
        click.style(
            f"Dataset `{dataset_id}` folder and metadata were created at {d.metadata_path}",
            fg="green",
        ))

Пример #3

Показать файл

def publicize_dataset(ctx, dataset_id):

    Dataset(dataset_id=dataset_id, **ctx.obj).publicize()

    click.echo(
        click.style(
            f"Dataset `{dataset_id}` became public!",
            fg="green",
        ))

Пример #4

Показать файл

def create_dataset(ctx, dataset_id, mode, if_exists):

    Dataset(dataset_id=dataset_id, **ctx.obj).create(mode=mode,
                                                     if_exists=if_exists)

    click.echo(
        click.style(
            mode_text(mode, "created", dataset_id),
            fg="green",
        ))

Пример #5

Показать файл

def delete_dataset(ctx, dataset_id, mode):

    if click.confirm(f"Are you sure you want to delete `{dataset_id}`?"):

        Dataset(dataset_id=dataset_id, **ctx.obj).delete(mode=mode)

    click.echo(
        click.style(
            mode_text(mode, "deleted", dataset_id),
            fg="green",
        ))

Пример #6

Показать файл

    def create(
        self,
        path=None,
        job_config_params=None,
        force_dataset=True,
        if_table_exists="raise",
        if_storage_data_exists="raise",
        if_table_config_exists="raise",
        source_format="csv",
        columns_config_url=None,
    ):
        """Creates BigQuery table at staging dataset.

        If you add a path, it automatically saves the data in the storage,
        creates a datasets folder and BigQuery location, besides creating the
        table and its configuration files.

        The new table should be located at `<dataset_id>_staging.<table_id>` in BigQuery.

        It looks for data saved in Storage at `<bucket_name>/staging/<dataset_id>/<table_id>/*`
        and builds the table.

        It currently supports the types:

        - Comma Delimited CSV

        Data can also be partitioned following the hive partitioning scheme
        `<key1>=<value1>/<key2>=<value2>` - for instance,
        `year=2012/country=BR`. The partition is automatcally detected
        by searching for `partitions` on the `table_config.yaml`.

        Args:
            path (str or pathlib.PosixPath): Where to find the file that you want to upload to create a table with
            job_config_params (dict): Optional.
                Job configuration params from bigquery
            if_table_exists (str): Optional
                What to do if table exists

                * 'raise' : Raises Conflict exception
                * 'replace' : Replace table
                * 'pass' : Do nothing
            force_dataset (bool): Creates `<dataset_id>` folder and BigQuery Dataset if it doesn't exists.
            if_table_config_exists (str): Optional.
                What to do if config files already exist

                 * 'raise': Raises FileExistError
                 * 'replace': Replace with blank template
                 * 'pass'; Do nothing
            if_storage_data_exists (str): Optional.
                What to do if data already exists on your bucket:

                * 'raise' : Raises Conflict exception
                * 'replace' : Replace table
                * 'pass' : Do nothing
            source_format (str): Optional
                Data source format. Only 'csv' is supported. Defaults to 'csv'.

            columns_config_url (str): google sheets URL.
                The URL must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>.
                The sheet must contain the column name: "coluna" and column description: "descricao"

        """

        if path is None:

            # Look if table data already exists at Storage
            data = self.client["storage_staging"].list_blobs(
                self.bucket_name,
                prefix=f"staging/{self.dataset_id}/{self.table_id}")

            # Raise: Cannot create table without external data
            if not data:
                raise BaseDosDadosException(
                    "You must provide a path for uploading data")

        # Add data to storage
        if isinstance(
                path,
            (
                str,
                Path,
            ),
        ):

            Storage(self.dataset_id, self.table_id,
                    **self.main_vars).upload(path,
                                             mode="staging",
                                             if_exists=if_storage_data_exists)

        # Create Dataset if it doesn't exist
        if force_dataset:

            dataset_obj = Dataset(self.dataset_id, **self.main_vars)

            try:
                dataset_obj.init()
            except FileExistsError:
                pass

            dataset_obj.create(if_exists="pass")

        self.init(
            data_sample_path=path,
            if_folder_exists="replace",
            if_table_config_exists=if_table_config_exists,
            columns_config_url=columns_config_url,
        )

        table = bigquery.Table(self.table_full_name["staging"])

        table.external_data_configuration = Datatype(
            self, source_format, "staging",
            partitioned=self._is_partitioned()).external_config

        # Lookup if table alreay exists
        table_ref = None
        try:
            table_ref = self.client["bigquery_staging"].get_table(
                self.table_full_name["staging"])

        except google.api_core.exceptions.NotFound:
            pass

        if isinstance(table_ref, google.cloud.bigquery.table.Table):

            if if_table_exists == "pass":

                return None

            elif if_table_exists == "raise":

                raise FileExistsError(
                    "Table already exists, choose replace if you want to overwrite it"
                )

        if if_table_exists == "replace":

            self.delete(mode="staging")

        self.client["bigquery_staging"].create_table(table)