Python Client.get_table示例，google.cloud.bigquery.Client.get_table Python示例

示例#1

0

显示文件

文件： conftest.py 项目： plamut/python-bigquery-sqlalchemy

def bigquery_dataset(bigquery_client: bigquery.Client,
                     bigquery_schema: List[bigquery.SchemaField]):
    project_id = bigquery_client.project
    dataset_id = "test_pybigquery"
    dataset = bigquery.Dataset(f"{project_id}.{dataset_id}")
    dataset = bigquery_client.create_dataset(dataset, exists_ok=True)
    sample_table_id = f"{project_id}.{dataset_id}.sample"
    try:
        # Since the data changes rarely and the tests are mostly read-only,
        # only create the tables if they don't already exist.
        # TODO: Create shared sample data tables in bigquery-public-data that
        #       include test values for all data types.
        bigquery_client.get_table(sample_table_id)
    except google.api_core.exceptions.NotFound:
        job1 = load_sample_data(sample_table_id, bigquery_client,
                                bigquery_schema)
        job1.result()
    one_row_table_id = f"{project_id}.{dataset_id}.sample_one_row"
    try:
        bigquery_client.get_table(one_row_table_id)
    except google.api_core.exceptions.NotFound:
        job2 = load_sample_data(
            one_row_table_id,
            bigquery_client,
            bigquery_schema,
            filename="sample_one_row.json",
        )
        job2.result()
    view = bigquery.Table(f"{project_id}.{dataset_id}.sample_view", )
    view.view_query = f"SELECT string FROM `{dataset_id}.sample`"
    bigquery_client.create_table(view, exists_ok=True)
    return dataset_id

示例#2

0

显示文件

文件： report_loader.py 项目： NeoTim/report2bq

  def _table_exists(self, bq: bigquery.Client, table_ref: bigquery.TableReference) -> bool:
    try:
        bq.get_table(table_ref)
        return True

    except NotFound:
        return False

示例#3

0

显示文件

文件： bigquery_util.py 项目： kurama554101/YouyakuAI

def exist_table(client: bq.Client, dataset_id: str, table_id: str) -> bool:
    table_full_id = get_full_table_name(client, dataset_id, table_id)
    try:
        client.get_table(table_full_id)
    except NotFound:
        return False
    return True

示例#4

0

显示文件

def clean_up_bq_tables(client: cloud_bigquery.Client,
                       table_names: List[str]) -> None:
    for table_name in table_names:
        try:
            client.get_table(table_name)
            client.delete_table(table_name)
        except NotFound:
            pass

示例#5

0

显示文件

def does_bigquery_table_exist(client: bigquery.Client, dataset_name: str,
                              table_name: str):
    dataset_ref = client.dataset(dataset_name)
    table_ref = dataset_ref.table(table_name)

    try:
        client.get_table(table_ref)
        return True
    except NotFound:
        return False

示例#6

0

显示文件

文件： conftest.py 项目： plamut/python-bigquery-sqlalchemy

def bigquery_alt_dataset(bigquery_client: bigquery.Client,
                         bigquery_schema: List[bigquery.SchemaField]):
    project_id = bigquery_client.project
    dataset_id = "test_pybigquery_alt"
    dataset = bigquery.Dataset(f"{project_id}.{dataset_id}")
    dataset = bigquery_client.create_dataset(dataset, exists_ok=True)
    sample_table_id = f"{project_id}.{dataset_id}.sample_alt"
    try:
        bigquery_client.get_table(sample_table_id)
    except google.api_core.exceptions.NotFound:
        job = load_sample_data(sample_table_id, bigquery_client,
                               bigquery_schema)
        job.result()
    return dataset_id

示例#7

0

显示文件

文件： helpers.py 项目： johnahuffman/bq-dts-partner-sdk

def load_bigquery_table_via_bq_apis(bq_client: bigquery.Client, dataset_id,
                                    table_name, imported_data_info, src_uris):
    """
    Load tables using BigQuery Load jobs, using the same configuration as BQ DTS ImportedDataInfo
    :return:
    """
    # https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/client.html#Client.load_table_from_uri
    # Step 1 - Translate required fields for BigQuery Python SDK
    tgt_tabledef = imported_data_info['table_defs'][0]

    # Step 2 - Create target table if it doesn't exist
    dataset_ref = bq_client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_name)
    try:
        bq_client.get_table(table_ref)
    except exceptions.NotFound:
        # Step 2a - Attach schema
        tgt_schema = RPCRecordSchema_to_GCloudSchema(tgt_tabledef['schema'])
        tgt_table = bigquery.Table(table_ref, schema=tgt_schema)

        # Step 2b - Attach description
        tgt_table.description = imported_data_info[
            'destination_table_description']

        # Step 2c - Conditionally set partitioning type
        if '$' in table_name:
            tgt_table.partitioning_type = 'DAY'
            tgt_table._properties['tableReference'][
                'tableId'], _, _ = table_name.partition('$')

        # Step 2d - Create BigQuery table
        bq_client.create_table(tgt_table)

    # Step 3a - Create BigQuery Load Job ID
    current_datetime = datetime.datetime.utcnow().isoformat()
    raw_job_id = f'{table_name}_{current_datetime}'
    clean_job_id = BQ_JOB_ID_MATCHER.sub('___', raw_job_id)

    # Step 3b - Create BigQuery Job Config
    job_config = DTSTableDefinition_to_BQLoadJobConfig(tgt_tabledef)

    # Step 4 - Execute BigQuery Load Job using Python SDK
    load_job = bq_client.load_table_from_uri(source_uris=src_uris,
                                             destination=table_ref,
                                             job_id=clean_job_id,
                                             job_config=job_config)

    return load_job

示例#8

0

显示文件

def load_data_to_bq(df=None,
                    table_name='CRY',
                    dataset='price_data',
                    project=None):
    client = Client()
    table = client.get_table(".".join([client.project, dataset, table_name]))
    if table.num_rows == 0 and df is not None:
        df.to_gbq(".".join([dataset, table_name]), if_exists='append')
    else:
        delete_qry = f'''DELETE FROM `{dataset+"."+table_name}` AS t2
                         WHERE concat(symbol, cast(date as string)) IN
                         (SELECT concat(symbol, cast(MAX(date) as string))
                         FROM `{dataset+"."+table_name}`
                         GROUP BY symbol) AND symbol IN
                         {'("'+'","'.join(df.symbol.unique())+'")'}'''
        delete_DML = client.query(delete_qry)
        delete_DML.result()
        existing = pd.read_gbq(f'''select symbol, max(date) as max_date
                               from {dataset+"."+table_name}
                               group by symbol''',
                               dialect="legacy")
        df = df.merge(existing, on='symbol', how='left')
        df = df.loc[(df.date.dt.tz_localize('UTC') > df.max_date)
                    | df.max_date.isnull(), :]
        df.drop('max_date', axis=1, inplace=True)
        df.to_gbq(".".join([dataset, table_name]),
                  if_exists='append',
                  project_id=project)

示例#9

0

显示文件

文件： conftest.py 项目： ruinan-liu/bigquery-utils

def dest_partitioned_table(request, bq: bigquery.Client, mock_env,
                           dest_dataset) -> bigquery.Table:
    public_table: bigquery.Table = bq.get_table(
        bigquery.TableReference.from_string(
            "bigquery-public-data.new_york_311.311_service_requests"))
    schema = public_table.schema

    table: bigquery.Table = bigquery.Table(
        f"{os.environ.get('GCP_PROJECT')}"
        f".{dest_dataset.dataset_id}.cf_test_nyc_311_"
        f"{str(uuid.uuid4()).replace('-','_')}",
        schema=schema,
    )

    table.time_partitioning = bigquery.TimePartitioning()
    table.time_partitioning.type_ = bigquery.TimePartitioningType.HOUR
    table.time_partitioning.field = "created_date"

    table = bq.create_table(table)

    def teardown():
        bq.delete_table(table, not_found_ok=True)

    request.addfinalizer(teardown)
    return table

示例#10

0

显示文件

文件： bigquery.py 项目： feast-dev/feast

def _upload_entity_df(
    client: Client,
    table_name: str,
    entity_df: Union[pd.DataFrame, str],
) -> Table:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    if isinstance(entity_df, str):
        job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})")

    elif isinstance(entity_df, pd.DataFrame):
        # Drop the index so that we don't have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)
        job = client.load_table_from_dataframe(entity_df, table_name)
    else:
        raise InvalidEntityType(type(entity_df))

    block_until_done(client, job)

    # Ensure that the table expires after some time
    table = client.get_table(table=table_name)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return table

示例#11

0

显示文件

def _upload_entity_df_into_bigquery(
    client: Client,
    project: str,
    dataset_name: str,
    entity_df: Union[pandas.DataFrame, str],
) -> Table:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    table_id = _get_table_id_for_new_entity(client, project, dataset_name)

    if type(entity_df) is str:
        job = client.query(f"CREATE TABLE {table_id} AS ({entity_df})")
        job.result()
    elif isinstance(entity_df, pandas.DataFrame):
        # Drop the index so that we dont have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)

        # Upload the dataframe into BigQuery, creating a temporary table
        job_config = bigquery.LoadJobConfig()
        job = client.load_table_from_dataframe(entity_df,
                                               table_id,
                                               job_config=job_config)
        job.result()
    else:
        raise ValueError(
            f"The entity dataframe you have provided must be a Pandas DataFrame or BigQuery SQL query, "
            f"but we found: {type(entity_df)} ")

    # Ensure that the table expires after some time
    table = client.get_table(table=table_id)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return table

示例#12

0

显示文件

def _add_new_columns(client: bigquery.Client, table_id: str,
                     columns: List[str]) -> List[Dict]:
    """Adds any new columns if they are missing.

  Creates new string columns for every column if it doesn't exist.

  Args:
    client: The BigQuery client.
    table_id: Table id.
    columns: List of columns.

  Returns:
    The table schema.
  """
    try:
        table = client.get_table(table_id)
    except google.api_core.exceptions.NotFound:
        logging.error(
            'Table: \'%s\' not found - please create the table. It is okay to create it with no columns.',
            table_id)
        raise
    new_fields = []
    for c in columns:
        field = bigquery.SchemaField(c, 'STRING')
        if field not in table.schema:
            new_fields.append(field)

    if new_fields:
        logging.info('Found new fields: %s', new_fields)
        table.schema += new_fields
        client.update_table(table, ['schema'])

    return table.schema

示例#13

0

显示文件

def _upload_entity_df_and_get_entity_schema(
    client: Client, table_name: str, entity_df: Union[pd.DataFrame, str],
) -> Dict[str, np.dtype]:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    if type(entity_df) is str:
        job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})")
        block_until_done(client, job)

        limited_entity_df = (
            client.query(f"SELECT * FROM {table_name} LIMIT 1").result().to_dataframe()
        )

        entity_schema = dict(zip(limited_entity_df.columns, limited_entity_df.dtypes))
    elif isinstance(entity_df, pd.DataFrame):
        # Drop the index so that we dont have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)
        job = client.load_table_from_dataframe(entity_df, table_name)
        block_until_done(client, job)
        entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
    else:
        raise InvalidEntityType(type(entity_df))

    # Ensure that the table expires after some time
    table = client.get_table(table=table_name)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return entity_schema

示例#14

0

显示文件

def iter_results(
    bigquery_client: bigquery.Client,
    query: str,
    job_config: QueryJobConfig,
    df_cleaner: Callable[[pd.DataFrame], pd.DataFrame] = None,
) -> Generator[pd.Series, None, None]:
    """
    Page through the results of a query and yield each row as a pandas Series

    Args:
        bigquery_client (bigquery.Client): The BigQuery client
        query (str): The query to run
        job_config (QueryJobConfig): The BigQuery job config

    Returns:
        Generator[pd.Series, None, None]: A generator of pandas Series
    """

    query_job = bigquery_client.query(query, job_config=job_config)
    query_job.result()

    # Get reference to destination table
    destination = bigquery_client.get_table(query_job.destination)

    rows = bigquery_client.list_rows(destination, page_size=10000)

    dfs = rows.to_dataframe_iterable()

    for df in dfs:
        if df_cleaner is not None:
            df = df_cleaner(df)

        for index, row in df.iterrows():
            yield row

示例#15

0

显示文件

def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
                     expected_num_rows: int):
    """
  polls tables.get API for number of rows until reaches expected value or
  times out.

  This is mostly an optimization to speed up the test suite without making it
  flaky.
  """

    start_poll = time.monotonic()
    actual_num_rows = 0
    while time.monotonic() - start_poll < LOAD_JOB_POLLING_TIMEOUT:
        bq_table: bigquery.Table = bq_client.get_table(table)
        actual_num_rows = bq_table.num_rows
        if actual_num_rows == expected_num_rows:
            return
        if actual_num_rows > expected_num_rows:
            raise AssertionError(
                f"{table.project}.{table.dataset_id}.{table.table_id} has"
                f"{actual_num_rows} rows. expected {expected_num_rows} rows.")
    raise AssertionError(
        f"Timed out after {LOAD_JOB_POLLING_TIMEOUT} seconds waiting for "
        f"{table.project}.{table.dataset_id}.{table.table_id} to "
        f"reach {expected_num_rows} rows."
        f"last poll returned {actual_num_rows} rows.")

示例#16

0

显示文件

文件： schema_diff.py 项目： bozhidar-stefanov/bq-schema

def find_schema_differences(
    module_path: str,
    bigquery_client: BigQueryClient,
    global_project: Optional[str],
    global_dataset: Optional[str],
) -> _SchemaDiffs:
    schema_diffs: _SchemaDiffs = {}
    for local_table in find_tables(module_path):
        project = global_project or local_table.project
        assert project, "Project has not been set."
        dataset = global_dataset or local_table.dataset
        assert dataset, "Dataset has not been set."

        table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}"

        try:
            remote_table = bigquery_client.get_table(table_identifier)
            if list(
                    check_schemas(local_table.get_schema_fields(),
                                  remote_table.schema)):
                schema_diffs[table_identifier] = ExistingTable(
                    local_table=local_table,
                    remote_table=remote_table,
                    schema_diffs=list(
                        check_schemas(local_table.get_schema_fields(),
                                      remote_table.schema)),
                )
        except NotFound:
            schema_diffs[table_identifier] = MissingTable(
                local_table=local_table)

    return schema_diffs

示例#17

0

显示文件

def dest_partitioned_table_allow_jagged(bq: bigquery.Client, dest_dataset,
                                        monkeypatch) -> bigquery.Table:
    public_table: bigquery.Table = bq.get_table(
        bigquery.TableReference.from_string(
            "bigquery-public-data.new_york_311.311_service_requests"))
    schema = public_table.schema

    if os.getenv('GCP_PROJECT') is None:
        monkeypatch.setenv("GCP_PROJECT", bq.project)

    extra_field_for_jagged_row_test = bigquery.schema.SchemaField(
        "extra_jagged_row_test_column", "STRING")
    schema.append(extra_field_for_jagged_row_test)
    table: bigquery.Table = bigquery.Table(
        f"{os.getenv('GCP_PROJECT')}"
        f".{dest_dataset.dataset_id}.cf_test_nyc_311_"
        f"{str(uuid.uuid4()).replace('-', '_')}",
        schema=schema,
    )

    table.time_partitioning = bigquery.TimePartitioning()
    table.time_partitioning.type_ = bigquery.TimePartitioningType.HOUR
    table.time_partitioning.field = "created_date"

    table = bq.create_table(table)
    return table

示例#18

0

显示文件

文件： python_entrypoint.py 项目： Matts966/alphasql

def main():
    from google.cloud.bigquery import Client

    tables = {}
    with open("/vol/required_tables.txt") as rt:
        table_names = rt.read().split()
        bq_client = Client()
        for table_name in table_names:
            splited_table_name = table_name.split(".")
            if len(splited_table_name) == 3:
                dataset_ref = bq_client.dataset(splited_table_name[1],
                                                project=splited_table_name[0])
            else:
                dataset_ref = bq_client.dataset(splited_table_name[0])
            table_ref = dataset_ref.table(splited_table_name[-1])
            table = bq_client.get_table(table_ref)
            tables[table_name] = [
                field.to_api_repr() for field in table.schema
            ]
            if table_name.endswith("*"):
                tables[table_name].append({
                    "name": "_TABLE_SUFFIX",
                    "type": "STRING",
                    "mode": "REQUIRED"
                })
    with open("/vol/schema.json", mode="w") as schema:
        schema.write(json.dumps(tables))

示例#19

0

显示文件

def update_recently_unixtime(client: bigquery.Client, df_unixtime):

    table_id = f'{project_id}.{dataset}.{recently_unixtime_table}'
    # unixtimeデータフレームをunixtime管理テーブルへinsert
    client.insert_rows_from_dataframe(client.get_table(table_id), df_unixtime)

    # unixtime管理テーブルでTABLE_NAMEカラムが重複してるデータを削除
    duplicate_query = f"""
        SELECT
            * EXCEPT(rowNumber)
        FROM (
            SELECT
                *,
                ROW_NUMBER() OVER (
                    PARTITION BY
                        TABLE_NAME
                    ORDER BY
                        UNIX_TIME DESC
                ) as rowNumber
            FROM
                {table_id}
        )
        WHERE
            rowNumber = 1;
    """

    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_id
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job = client.query(duplicate_query, job_config=job_config)
    job.result()

示例#20

0

显示文件

文件： bigquery.py 项目： 4mile/BQPipe

def does_table_exist(bigquery_client: bigquery.Client, table: str, dataset: str = 'analytics') -> bool:
    """Check if given table from given Dataset exists in BigQuery, return True if so."""
    try:
        table_reference = bigquery_client.dataset(dataset).table(table)
        is_table = bigquery_client.get_table(table_reference)
        if is_table:
            logging.info('Table "{}" in Dataset "{}" already exists in BigQuery.'.format(table, dataset))
            return True
    except NotFound as error:
        logging.warning('Table "{}" does not exist in BigQuery Dataset "{}". Ref: {}.'.format(table, dataset, error))
        return False

示例#21

0

显示文件

文件： common.py 项目： fyntex/exp-gcp-bigquery-python-1

def get_bq_table(
    client: bigquery.Client,
    dataset_id: str,
    table_id: str,
    project_id: str = None,
) -> bigquery.Table:
    # If `project_id is None` then the default project of `client` will be used.
    table_ref = client.dataset(dataset_id, project=project_id).table(
        table_id)  # type: bigquery.TableReference  # noqa: E501

    # API request
    return client.get_table(table_ref)  # type: bigquery.Table

示例#22

0

显示文件

def update_or_create_view(client: bigquery.Client, view_name: str,
                          view_query: str, dataset: str):
    LOGGER.debug("update_view: %s=%s", view_name, [view_query])
    dataset_ref = client.dataset(dataset)
    view_ref = dataset_ref.table(view_name)
    view = bigquery.Table(view_ref)
    view.view_query = view_query

    query_job = client.query(get_create_or_replace_view_query(view))
    query_job.result()  # wait for query job to finish

    updated_view = client.get_table(view)
    LOGGER.info("updated or replaced view: %s", updated_view.full_table_id)
    LOGGER.debug("view schema (%s): %s", updated_view.full_table_id,
                 updated_view.schema)

示例#23

0

显示文件

def get_or_create_table(client: bigquery.Client) -> bigquery.Table:
    try:
        dataset = client.get_dataset("sensors")
    except NotFound as _:
        dataset = client.create_dataset("sensors")

    # The default project ID is not set and hence a fully-qualified ID is required.
    table_ref = bigquery.TableReference(dataset, table_id="particulate_matter")
    try:
        return client.get_table(table_ref)
    except NotFound as _:
        return client.create_table(
            bigquery.Table(
                table_ref,
                schema=[
                    bigquery.SchemaField(
                        "humidity",
                        "NUMERIC",
                        description="Sensor DHT22humidity in %"),
                    bigquery.SchemaField("max_micro",
                                         "NUMERIC",
                                         description=""),
                    bigquery.SchemaField("min_micro",
                                         "NUMERIC",
                                         description=""),
                    bigquery.SchemaField("samples", "NUMERIC", description=""),
                    bigquery.SchemaField(
                        "sds_p1",
                        "NUMERIC",
                        description="Sensor SDS011 PM10 in µg/m³"),
                    bigquery.SchemaField(
                        "sds_p2",
                        "NUMERIC",
                        description="Sensor SDS011 PM2.5 in µg/m³"),
                    bigquery.SchemaField(
                        "signal",
                        "NUMERIC",
                        description="WiFi signal strength in dBm"),
                    bigquery.SchemaField(
                        "temperature",
                        "NUMERIC",
                        description="Sensor DHT22 temperature in °C"),
                    bigquery.SchemaField("datetime",
                                         "DATETIME",
                                         description="Datetime of measurement",
                                         mode="REQUIRED"),
                ],
            ))

示例#24

0

显示文件

 def scrape(
     self,
     bq_client: bigquery.Client,
     table_path: str,
     timestamp: datetime.datetime,
     dry_run: bool = False,
 ):
     table = bq_client.get_table(table_path)
     rows = [{
         "provider": self.name,
         "timestamp": timestamp,
         **asdict(row),
     } for row in self.fetch_spaces()]
     if not dry_run:
         errors = bq_client.insert_rows(table, rows)
         if len(errors) > 0:
             raise ValueError(errors)

示例#25

0

显示文件

文件： lookml_utils.py 项目： Iinh/lookml-generator

def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, Any]]:
    """Generate dimensions and dimension groups from a bigquery table.

    When schema contains both submission_timestamp and submission_date, only produce
    a dimension group for submission_timestamp.

    Raise ClickException if schema results in duplicate dimensions.
    """
    dimensions = {}
    for dimension in _generate_dimensions_helper(client.get_table(table).schema):
        name = dimension["name"]
        # overwrite duplicate "submission" dimension group, thus picking the
        # last value sorted by field name, which is submission_timestamp
        if name in dimensions and name != "submission":
            raise click.ClickException(
                f"duplicate dimension {name!r} for table {table!r}"
            )
        dimensions[name] = dimension
    return list(dimensions.values())

示例#26

0

显示文件

def dest_table(request, bq: bigquery.Client, dest_dataset) -> bigquery.Table:
    public_table: bigquery.Table = bq.get_table(
        bigquery.TableReference.from_string(
            "bigquery-public-data.new_york_311.311_service_requests"))
    schema = public_table.schema

    table: bigquery.Table = bigquery.Table(
        f"{os.environ.get('TF_VAR_project_id', 'bqutil')}"
        f".{dest_dataset.dataset_id}.cf_e2e_test_nyc_311_"
        f"{os.getenv('SHORT_SHA', 'manual')}",
        schema=schema,
    )

    table = bq.create_table(table)

    def teardown():
        bq.delete_table(table, not_found_ok=True)

    request.addfinalizer(teardown)
    return table

示例#27

0

显示文件

文件： ppi_branching.py 项目： all-of-us/curation

    def validate_rule(self, client: bigquery.Client, *args, **keyword_args):
        """
        Raise an error if there are still rows to delete

        :param client: active BigQuery client object
        :param args:
        :param keyword_args:
        :return: None
        """
        backup_table_obj = client.get_table(self.backup_table)
        if not backup_table_obj.created:
            raise RuntimeError(
                f'Backup table {backup_table_obj.table_id} for branching cleaning rule was not '
                f'found on the server')
        query = BACKUP_ROWS_QUERY.render(lookup_table=self.lookup_table,
                                         src_table=self.observation_table)
        result = client.query(query).result()
        if result.total_rows > 0:
            raise RuntimeError(
                f'Branching cleaning rule was run but still identifies {result.total_rows} '
                f'rows from the observation table to drop')

示例#28

0

显示文件

    def _get_table(self, table: str, client: bigquery.Client) -> DbTableSchema:
        bq_table = client.get_table(table)
        if not bq_table._properties:
            return
        table = bq_table._properties

        if not table.get('schema') or not table.get('schema').get('fields'):
            return

        fields = table.get('schema').get('fields')
        columns = [
            DbColumn(name=fields[i].get('name'),
                     type=fields[i].get('type'),
                     description=fields[i].get('description'),
                     ordinal_position=i) for i in range(len(fields))
        ]
        self.log.info(DbTableName(table.get('tableReference').get('tableId')))
        return DbTableSchema(
            schema_name=table.get('tableReference').get('projectId') + '.' +
            table.get('tableReference').get('datasetId'),
            table_name=DbTableName(table.get('tableReference').get('tableId')),
            columns=columns)

示例#29

0

显示文件

def get_tables(project_id: str,
               client: Client,
               dataset_id: Optional[str] = None) -> Iterator[Table]:
    """
    Gets BigQuery tables from a Google Cloud project.

    Args:
        project_id (str): ID of the project.
        dataset_id (Optional[str]): The ID of the dataset.
            If `None`, will retrieve tables from all datasets in project.
        client (Client): A Google Cloud Client instance.

    Yields:
        Table: A BigQuery table.
    """
    dataset_refs = ([f"{project_id}.{dataset_id}"] if dataset_id else
                    (dataset.reference
                     for dataset in client.list_datasets(project=project_id)))
    datasets = (client.get_dataset(dataset_ref)
                for dataset_ref in dataset_refs)
    for dataset in datasets:
        for table in client.list_tables(dataset):
            yield client.get_table(table)

示例#30

0

显示文件

文件： manual_e2e_test.py 项目： Jigsaw-Code/censoredplanet-analysis

def clean_up_bq_table(client: cloud_bigquery.Client, table_name: str) -> None:
  try:
    client.get_table(table_name)
    client.delete_table(table_name)
  except NotFound:
    pass