def __create_job_config( self, ems_query_job_config: EmsQueryJobConfig) -> QueryJobConfig: job_config = QueryJobConfig() job_config.priority = ems_query_job_config.priority.value job_config.use_legacy_sql = False job_config.use_query_cache = ems_query_job_config.use_query_cache job_config.labels = ems_query_job_config.labels if ems_query_job_config.destination_table is not None: job_config.time_partitioning = TimePartitioning("DAY") table_reference = TableReference( DatasetReference( ems_query_job_config.destination_project_id or self.__project_id, ems_query_job_config.destination_dataset), ems_query_job_config.destination_table) job_config.destination = table_reference job_config.write_disposition = ems_query_job_config.write_disposition.value job_config.create_disposition = ems_query_job_config.create_disposition.value partitioning = ems_query_job_config.time_partitioning if partitioning is not None: job_config.time_partitioning = TimePartitioning( partitioning.type.value, partitioning.field, partitioning.expiration_ms, partitioning.require_partition_filter) if ems_query_job_config.table_definitions is not None: job_config.table_definitions = ems_query_job_config.table_definitions return job_config
def create_table(client, dataset_ref, table_name, is_partitioned=False): table_ref = dataset_ref.table(table_name) table_obj = Table(table_ref, schema=TABLE_SCHEMA) if is_partitioned: time_partitioning = TimePartitioning() time_partitioning.field = 'partition_value' table_obj.time_partitioning = time_partitioning return client.create_table(table_obj)
def process_response_rows_for_bigquery(self, rows: list, table_reference: TableReference): rows_dataframe = DataFrame.from_records(rows) rows_dataframe = concat( [rows_dataframe, rows_dataframe['dimensions'].apply(Series)], axis=1, join='inner') rows_dataframe = rows_dataframe.drop(['dimensions'], axis=1) rows_dataframe['date'] = rows_dataframe['date'].apply( lambda x: x.date()) job_config = LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_APPEND job_config.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, field='date') job_config.schema = [ self._get_schema_for_field(column) for column in list(rows_dataframe.columns.values) ] try: load_job = self.bigquery.client.load_table_from_dataframe( rows_dataframe, table_reference, job_config=job_config) load_job.result() except BadRequest as error: print(error.errors)
def setUp(self): self.data = [ Observation(**dict(zip(TEST_DATA_FIELDS, row))).__dict__ for row in TEST_DATA_ROWS ] self.client.delete_table(f'{self.dataset_id}.observation', not_found_ok=True) job_config = bigquery.LoadJobConfig() job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND # TODO figure out how to handle if clustering does NOT exist # CREATE OR REPLACE fails if partitioning specs differ job_config.clustering_fields = ['person_id'] job_config.time_partitioning = TimePartitioning( type_=bigquery.TimePartitioningType.DAY) job_config.schema = Observation.SCHEMA job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED load_job = self.client.load_table_from_json( self.data, destination=f'{self.dataset_id}.{ppi_branching.OBSERVATION}', job_config=job_config) try: load_job.result() except google.api_core.exceptions.BadRequest: self.assertEqual(0, len(load_job.errors), f'job errors={load_job.errors}')
def load_task(): client = bigquery.Client() job_config = bigquery.LoadJobConfig() schema_path = os.path.join( dags_folder, 'resources/stages/load/schemas/{task}.json'.format(task=task)) job_config.schema = read_bigquery_schema_from_file(schema_path) job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON job_config.write_disposition = 'WRITE_TRUNCATE' job_config.ignore_unknown_values = True job_config.time_partitioning = TimePartitioning( field=time_partitioning_field) export_location_uri = 'gs://{bucket}/export'.format( bucket=output_bucket) uri = '{export_location_uri}/{task}/*.json'.format( export_location_uri=export_location_uri, task=task) table_ref = create_dataset( client, dataset_name, destination_dataset_project_id).table(task) load_job = client.load_table_from_uri(uri, table_ref, job_config=job_config) submit_bigquery_job(load_job, job_config) assert load_job.state == 'DONE'
def load_query_result_to_table(dest_table, query, part_col_name=None, clustering_fields=None): bq_client = get_bigquery_client() qjc = None print(query) if bq_table_exists(dest_table): table = bq_client.get_table(dest_table) qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", time_partitioning=table.time_partitioning, range_partitioning=table.range_partitioning, clustering_fields=table.clustering_fields, ) job = bq_client.query(query, job_config=qjc) job.result() else: import time temp_table_name = f"load_query_result_to_table__{str(int(time.time()))}" bq_client.query( f"CREATE OR REPLACE TABLE temp_1d.{temp_table_name} AS {query}" ).result() if part_col_name: schema = bq_client.get_table(f"temp_1d.{temp_table_name}").schema partition_type = [ f for f in schema if f.name.lower() == part_col_name.lower() ][0].field_type if partition_type == "DATE": qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", time_partitioning=TimePartitioning(field=part_col_name), clustering_fields=clustering_fields, ) elif partition_type == "INTEGER": qjc = QueryJobConfig( destination=dest_table, write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", range_partitioning=RangePartitioning(PartitionRange( start=200001, end=209912, interval=1), field=part_col_name), clustering_fields=clustering_fields, ) else: print(partition_type) raise Exception( f"Partition column[{part_col_name}] is neither DATE or INTEGER type." ) job = bq_client.query(f"SELECT * FROM temp_1d.{temp_table_name}", job_config=qjc) job.result()
def __create_test_table(self, table_name, dataset_id): table_schema = [ SchemaField("int_data", "INT64"), SchemaField("str_data", "STRING") ] table_reference = TableReference(dataset_id, table_name) test_table = Table(table_reference, table_schema) test_table.time_partitioning = TimePartitioning("DAY") self.__delete_if_exists(test_table) self.GCP_BIGQUERY_CLIENT.create_table(test_table) return test_table
def store_digital_health_status_data(project_id, json_data, destination_table, schema=None): """ Stores the fetched digital_health_sharing_status data in a BigQuery dataset. If the table doesn't exist, it will create that table. If the table does exist, it will create a partition in the designated table or append to the same partition. This is necessary for storing data has "RECORD" type fields which do not conform to a dataframe. The data is stored using a JSON file object since it is one of the ways BigQuery expects it. :param project_id: identifies the project :param json_data: list of json objects retrieved from process_digital_health_data_to_json :param destination_table: fully qualified destination table name as 'project.dataset.table' :param schema: a list of SchemaField objects corresponding to the destination table :return: returns the bq job_id for the loading of digital health data """ # Parameter check if not isinstance(project_id, str): raise RuntimeError( f'Please specify the project in which to create the table') client = get_client(project_id) if not schema: schema = get_table_schema(DIGITAL_HEALTH_SHARING_STATUS) try: table = client.get_table(destination_table) except NotFound: table = Table(destination_table, schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY) table = client.create_table(table) file_obj = StringIO() for json_obj in json_data: json.dump(json_obj, file_obj) file_obj.write('\n') job_config = LoadJobConfig( source_format=SourceFormat.NEWLINE_DELIMITED_JSON, schema=schema) job = client.load_table_from_file(file_obj, table, rewind=True, job_config=job_config, job_id_prefix='ps_digital_health_load_') job.result() return job.job_id
def _process_data_for_bigquery(self, data: DataFrame, output_tablereference: TableReference): job_config = LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_APPEND job_config.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, field='date') try: load_job = self.bigquery.client.load_table_from_dataframe( data, output_tablereference, job_config=job_config) load_job.result() except BadRequest as error: print(error.errors)
def setUp(self): self.client_mock = Mock() self.query_job_mock = Mock(QueryJob) self.query_job_mock.priority = "INTERACTIVE" self.query_job_mock.create_disposition = None self.query_job_mock.write_disposition = None self.query_job_mock.time_partitioning = TimePartitioning( "DAY", "a", None, None) self.query_config = EmsQueryJobConfig( destination_project_id="some_destination_project_id", destination_dataset="some_dataset", destination_table="some_table", labels={"label1": "label1_value"})
def setUp(self): self.project_id = os.environ.get(PROJECT_ID) self.dataset_id = os.environ.get('COMBINED_DATASET_ID') self.dataset_ref = DatasetReference(self.project_id, self.dataset_id) self.client = bq.get_client(self.project_id) self.schema = [ SchemaField("person_id", "INT64"), SchemaField("first_name", "STRING"), SchemaField("last_name", "STRING"), SchemaField("algorithm", "STRING") ] self.ps_api_fields = [ dict(name='person_id', type='integer', mode='nullable'), dict(name='first_name', type='string', mode='nullable'), dict(name='last_name', type='string', mode='nullable') ] self.id_match_fields = [ dict(name='person_id', type='integer', mode='nullable'), dict(name='first_name', type='string', mode='nullable'), dict(name='last_name', type='string', mode='nullable'), dict(name='algorithm', type='string', mode='nullable') ] self.hpo_id = 'fake_site' self.id_match_table_id = f'{IDENTITY_MATCH_TABLE}_{self.hpo_id}' self.ps_values_table_id = f'ps_api_values_{self.hpo_id}' # Create and populate the ps_values site table schema = bq.get_table_schema(PS_API_VALUES) tablename = self.ps_values_table_id table = Table(f'{self.project_id}.{self.dataset_id}.{tablename}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table) populate_query = POPULATE_PS_VALUES.render( project_id=self.project_id, drc_dataset_id=self.dataset_id, ps_values_table_id=self.ps_values_table_id) job = self.client.query(populate_query) job.result()
def create_table(client, table_name, schema, project=None, dataset=None, partitioning_type=None, partitioned_field=None, clustering_fields=None): """ Create Table with Schema :param client: BQ Client :param table_name: Table name :param schema: Table Schema :param project: default to client.project :param dataset: default to client.dataset :param partitioning_type: either : `time` or `range` partitioned :param partitioned_field: field name use for partitionning :param clustering_fields: fields to use for clustering :return: created table """ partitioning_types = { "time" : TimePartitioning(type_= TimePartitioningType.HOUR, field=partitioned_field, require_partition_filter=True), "range" : RangePartitioning(range_= PartitionRange(start=0, end=100, interval=10), field=partitioned_field) } try: if project is None: project = client.project if dataset is None: dataset = client.dataset logging.info("Project: {}\tDataset: {}\tTable: {}\t\tPartitioning Type:{}".format(project, dataset.dataset_id, table_name, partitioning_type)) table_id = "{}.{}.{}".format(project, dataset.dataset_id, table_name) table = bigquery.Table(table_id, schema=schema) if partitioning_type is not None: partitioning_type = partitioning_type.lower() if partitioning_type == "time": logging.info("Table Partitioning: {}".format(partitioning_type)) schema.append(bigquery.SchemaField("ZONE_PARTITIONTIME","TIMESTAMP")) table.schema = schema table.time_partitioning = partitioning_types.get(partitioning_type) elif partitioning_type == "range": table.range_partitioning = partitioning_types.get(partitioning_type) if clustering_fields is not None: table.clustering_fields = clustering_fields client.create_table(table, exists_ok=True) table = client.get_table(table) logging.info("Table {} created successfully.".format(table_id)) return table except Exception as error: raise error
def test_should_create_table_from_table_object(self): # given table_id = f'{self.dataset_manager.project_id}.{self.dataset_manager.dataset_name}.example_test_table' table = Table(table_id, schema=[ { "mode": "NULLABLE", "name": "example_field", "type": "STRING" }, ]) table.time_partitioning = TimePartitioning() # when self.dataset_manager.create_table_from_schema('example_test_table', schema=None, table=table) # then self.table_should_exists()
def __create_query_job_mock(self, job_id: str, has_error: bool, created: datetime = datetime.now()): error_result = { 'reason': 'someReason', 'location': 'query', 'message': 'error occurred' } query_job_mock = Mock(QueryJob) query_job_mock.job_id = job_id query_job_mock.priority = "INTERACTIVE" query_job_mock.destination = None query_job_mock.query = "SIMPLE QUERY" query_job_mock.labels = {"label1": "label1_value"} query_job_mock.state = "DONE" query_job_mock.create_disposition = None query_job_mock.write_disposition = None query_job_mock.error_result = error_result if has_error else None query_job_mock.created = created query_job_mock.time_partitioning = TimePartitioning( "DAY", "a", None, None) return query_job_mock
def create_table_from_schema(self, table_id: str, schema: typing.Union[typing.List[dict], Path, None] = None, table=None): from google.cloud.bigquery import Table, TimePartitioning if schema and table: raise ValueError( "You can't provide both schema and table, because the table you provide" "should already contain the schema.") if not schema and not table: raise ValueError("You must provide either schema or table.") if isinstance(schema, Path): schema = json.loads(schema.read_text()) if table is None: table = Table(table_id, schema=schema) table.time_partitioning = TimePartitioning() self.logger.info(f'CREATING TABLE FROM SCHEMA: {table.schema}') self.bigquery_client.create_table(table)
def parse_task(ds, **kwargs): template_context = kwargs.copy() template_context['ds'] = ds template_context['params'] = environment template_context['params']['table_name'] = table_name template_context['params']['columns'] = columns template_context['params']['parser'] = parser template_context['params']['abi'] = abi if parser_type == 'log': template_context['params']['event_topic'] = abi_to_event_topic(parser['abi']) elif parser_type == 'trace': template_context['params']['method_selector'] = abi_to_method_selector(parser['abi']) template_context['params']['struct_fields'] = create_struct_string_from_schema(schema) template_context['params']['parse_all_partitions'] = parse_all_partitions client = bigquery.Client() # # # Create a temporary table dataset_name_temp = 'parse_temp' create_dataset(client, dataset_name_temp) temp_table_name = 'temp_{table_name}_{milliseconds}'\ .format(table_name=table_name, milliseconds=int(round(time.time() * 1000))) temp_table_ref = client.dataset(dataset_name_temp).table(temp_table_name) temp_table = bigquery.Table(temp_table_ref, schema=read_bigquery_schema_from_dict(schema, parser_type)) temp_table.description = table_description temp_table.time_partitioning = TimePartitioning(field='block_timestamp') logging.info('Creating table: ' + json.dumps(temp_table.to_api_repr())) temp_table = client.create_table(temp_table) assert temp_table.table_id == temp_table_name # # # Query to temporary table job_config = bigquery.QueryJobConfig() job_config.priority = bigquery.QueryPriority.INTERACTIVE job_config.destination = temp_table_ref sql_template = get_parse_sql_template(parser_type) sql = kwargs['task'].render_template('', sql_template, template_context) logging.info(sql) query_job = client.query(sql, location='US', job_config=job_config) submit_bigquery_job(query_job, job_config) assert query_job.state == 'DONE' # # # Copy / merge to destination if parse_all_partitions: # Copy temporary table to destination copy_job_config = bigquery.CopyJobConfig() copy_job_config.write_disposition = 'WRITE_TRUNCATE' dest_table_ref = client.dataset(dataset_name, project=parse_destination_dataset_project_id).table(table_name) copy_job = client.copy_table(temp_table_ref, dest_table_ref, location='US', job_config=copy_job_config) submit_bigquery_job(copy_job, copy_job_config) assert copy_job.state == 'DONE' # Need to do update description as copy above won't repect the description in case destination table # already exists table = client.get_table(dest_table_ref) table.description = table_description table = client.update_table(table, ["description"]) assert table.description == table_description else: # Merge # https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement merge_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 merge_job_config.priority = bigquery.QueryPriority.INTERACTIVE merge_sql_template = get_merge_table_sql_template() merge_template_context = template_context.copy() merge_template_context['params']['source_table'] = temp_table_name merge_template_context['params']['destination_dataset_project_id'] = parse_destination_dataset_project_id merge_template_context['params']['destination_dataset_name'] = dataset_name merge_template_context['params']['dataset_name_temp'] = dataset_name_temp merge_template_context['params']['columns'] = columns merge_sql = kwargs['task'].render_template('', merge_sql_template, merge_template_context) print('Merge sql:') print(merge_sql) merge_job = client.query(merge_sql, location='US', job_config=merge_job_config) submit_bigquery_job(merge_job, merge_job_config) assert merge_job.state == 'DONE' # Delete temp table client.delete_table(temp_table_ref)
def enrich_task(ds, **kwargs): template_context = kwargs.copy() template_context['ds'] = ds template_context['params'] = environment client = bigquery.Client() # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null # when writeDisposition is WRITE_TRUNCATE # Create a temporary table temp_table_name = '{task}_{milliseconds}'.format( task=task, milliseconds=int(round(time.time() * 1000))) temp_table_ref = client.dataset(dataset_name_temp).table( temp_table_name) schema_path = os.path.join( dags_folder, 'resources/stages/enrich/schemas/{task}.json'.format( task=task)) schema = read_bigquery_schema_from_file(schema_path) table = bigquery.Table(temp_table_ref, schema=schema) description_path = os.path.join( dags_folder, 'resources/stages/enrich/descriptions/{task}.txt'.format( task=task)) table.description = read_file(description_path) if time_partitioning_field is not None: table.time_partitioning = TimePartitioning( field=time_partitioning_field) logging.info('Creating table: ' + json.dumps(table.to_api_repr())) table = client.create_table(table) assert table.table_id == temp_table_name # Query from raw to temporary table query_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 query_job_config.priority = bigquery.QueryPriority.INTERACTIVE query_job_config.destination = temp_table_ref sql_path = os.path.join( dags_folder, 'resources/stages/enrich/sqls/{task}.sql'.format(task=task)) sql_template = read_file(sql_path) sql = kwargs['task'].render_template('', sql_template, template_context) print('Enrichment sql:') print(sql) query_job = client.query(sql, location='US', job_config=query_job_config) submit_bigquery_job(query_job, query_job_config) assert query_job.state == 'DONE' if load_all_partitions or always_load_all_partitions: # Copy temporary table to destination copy_job_config = bigquery.CopyJobConfig() copy_job_config.write_disposition = 'WRITE_TRUNCATE' dest_table_name = '{task}'.format(task=task) dest_table_ref = client.dataset( dataset_name, project=destination_dataset_project_id).table( dest_table_name) copy_job = client.copy_table(temp_table_ref, dest_table_ref, location='US', job_config=copy_job_config) submit_bigquery_job(copy_job, copy_job_config) assert copy_job.state == 'DONE' else: # Merge # https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement merge_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 merge_job_config.priority = bigquery.QueryPriority.INTERACTIVE merge_sql_path = os.path.join( dags_folder, 'resources/stages/enrich/sqls/merge/merge_{task}.sql'. format(task=task)) merge_sql_template = read_file(merge_sql_path) merge_template_context = template_context.copy() merge_template_context['params'][ 'source_table'] = temp_table_name merge_template_context['params'][ 'destination_dataset_project_id'] = destination_dataset_project_id merge_template_context['params'][ 'destination_dataset_name'] = dataset_name merge_sql = kwargs['task'].render_template( '', merge_sql_template, merge_template_context) print('Merge sql:') print(merge_sql) merge_job = client.query(merge_sql, location='US', job_config=merge_job_config) submit_bigquery_job(merge_job, merge_job_config) assert merge_job.state == 'DONE' # Delete temp table client.delete_table(temp_table_ref)
def bq_time_partitioning(self): return TimePartitioning( type_=bigquery.TimePartitioningType.DAY, field=self.field, expiration_ms=int(self.expire.milli), )
def enrich_task(): client = bigquery.Client() # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null # when writeDisposition is WRITE_TRUNCATE # Create a temporary table temp_table_name = '{task}_{milliseconds}'.format( task=task, milliseconds=int(round(time.time() * 1000))) temp_table_ref = client.dataset(dataset_name_temp).table( temp_table_name) schema_path = os.path.join( dags_folder, 'resources/stages/enrich/schemas/{task}.json'.format( task=task)) schema = read_bigquery_schema_from_file(schema_path) table = bigquery.Table(temp_table_ref, schema=schema) description_path = os.path.join( dags_folder, 'resources/stages/enrich/descriptions/{task}.txt'.format( task=task)) table.description = read_file(description_path) if time_partitioning_field is not None: table.time_partitioning = TimePartitioning( field=time_partitioning_field) logging.info('Creating table: ' + json.dumps(table.to_api_repr())) table = client.create_table(table) assert table.table_id == temp_table_name # Query from raw to temporary table query_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 query_job_config.priority = bigquery.QueryPriority.INTERACTIVE query_job_config.destination = temp_table_ref sql_path = os.path.join( dags_folder, 'resources/stages/enrich/sqls/{task}.sql'.format(task=task)) sql = read_file(sql_path) query_job = client.query(sql, location='US', job_config=query_job_config) submit_bigquery_job(query_job, query_job_config) assert query_job.state == 'DONE' # Copy temporary table to destination copy_job_config = bigquery.CopyJobConfig() copy_job_config.write_disposition = 'WRITE_TRUNCATE' all_destination_projects = [(destination_dataset_project_id, dataset_name)] if copy_dataset_project_id is not None and len(copy_dataset_project_id) > 0 \ and copy_dataset_name is not None and len(copy_dataset_name) > 0: all_destination_projects.append( (copy_dataset_project_id, copy_dataset_name)) for dest_project, dest_dataset_name in all_destination_projects: dest_table_name = '{task}'.format(task=task) dest_table_ref = client.dataset( dest_dataset_name, project=dest_project).table(dest_table_name) copy_job = client.copy_table(temp_table_ref, dest_table_ref, location='US', job_config=copy_job_config) submit_bigquery_job(copy_job, copy_job_config) assert copy_job.state == 'DONE' # Delete temp table client.delete_table(temp_table_ref)
def setUp(self): self.maxDiff = None self.project_id = os.environ.get(PROJECT_ID) self.dataset_id = os.environ.get('COMBINED_DATASET_ID') self.dataset_ref = DatasetReference(self.project_id, self.dataset_id) self.client = bq.get_client(self.project_id) self.hpo_id = 'fake_site' self.id_match_table_id = f'{IDENTITY_MATCH_TABLE}_{self.hpo_id}' self.ps_values_table_id = f'{PS_API_VALUES}_{self.hpo_id}' self.pii_address_table_id = f'{self.hpo_id}_pii_address' self.pii_email_table_id = f'{self.hpo_id}_pii_email' self.pii_phone_number_table_id = f'{self.hpo_id}_pii_phone_number' self.pii_name_table_id = f'{self.hpo_id}_pii_name' self.person_table_id = f'{self.hpo_id}_person' self.location_table_id = f'{self.hpo_id}_location' self.fq_concept_table = f'{self.project_id}.{self.dataset_id}.concept' # Create and populate the ps_values site table schema = resources.fields_for(f'{PS_API_VALUES}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.ps_values_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) populate_query = POPULATE_PS_VALUES.render( project_id=self.project_id, drc_dataset_id=self.dataset_id, ps_values_table_id=self.ps_values_table_id) job = self.client.query(populate_query) job.result() # Create and populate the drc_id_match_table schema = resources.fields_for(f'{IDENTITY_MATCH_TABLE}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.id_match_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) populate_query = POPULATE_ID_MATCH.render( project_id=self.project_id, drc_dataset_id=self.dataset_id, id_match_table_id=self.id_match_table_id) job = self.client.query(populate_query) job.result() # Create and populate pii_name, pii_email, pii_phone_number, and pii_address table schema = resources.fields_for(f'{PII_NAME}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.pii_name_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) schema = resources.fields_for(f'{PII_EMAIL}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.pii_email_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) schema = resources.fields_for(f'{PII_PHONE_NUMBER}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.pii_phone_number_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) schema = resources.fields_for(f'{PII_ADDRESS}') table = Table( f'{self.project_id}.{self.dataset_id}.{self.pii_address_table_id}', schema=schema) table.time_partitioning = TimePartitioning( type_=TimePartitioningType.HOUR) table = self.client.create_table(table, exists_ok=True) person_table = Table( f'{self.project_id}.{self.dataset_id}.{self.person_table_id}', schema=person_schema) person_table = self.client.create_table(person_table, exists_ok=True) location_table = Table( f'{self.project_id}.{self.dataset_id}.{self.location_table_id}', schema=location_schema) location_table = self.client.create_table(location_table, exists_ok=True) concept_table = Table(f'{self.project_id}.{self.dataset_id}.concept', schema=concept_schema) concept_table = self.client.create_table(concept_table, exists_ok=True)