def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation .. note: This method assumes that the project found in the resource matches the client's project. :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) source_urls = config.get('sourceUris', ()) job = cls(name, destination, source_urls, client=client) job._set_properties(resource) return job
def download_table_as_file(self, table_id, dest, staging_location, file_type): """ Download a bigquery table as file Args: table_id (str): fully qualified BigQuery table id dest (str): destination filename staging_location (str): url to staging_location (currently support a folder in GCS) file_type (feast.sdk.resources.feature_set.FileType): (default: FileType.CSV) exported file format Returns: (str) path to the downloaded file """ if not is_gs_path(staging_location): raise ValueError("staging_uri must be a directory in GCS") temp_file_name = 'temp_{}'.format(int(round(time.time() * 1000))) staging_file_path = os.path.join(staging_location, temp_file_name) job_config = ExtractJobConfig() job_config.destination_format = file_type src_table = Table.from_string(table_id) job = self.bq.extract_table(src_table, staging_file_path, job_config=job_config) # await completion job.result() bucket_name, blob_name = split_gs_path(staging_file_path) bucket = self.gcs.get_bucket(bucket_name) blob = bucket.blob(blob_name) blob.download_to_filename(dest) return dest
def download_table_as_df(self, full_table_id, staging_location=None): """ Download a BigQuery table as Pandas Dataframe Args: full_table_id (src) : fully qualified BigQuery table id staging_location: url to staging_location (currently support a folder in GCS) Returns: pandas.DataFrame: dataframe of the training dataset """ if not staging_location: table = bigquery.TableReference.from_string(full_table_id) rows = self.bqclient.list_rows(table) return rows.to_dataframe(bqstorage_client=self.bqstorageclient) if not is_gs_path(staging_location): raise ValueError("staging_uri must be a directory in GCS") temp_file_name = "temp_{}".format(int(round(time.time() * 1000))) staging_file_path = os.path.join(staging_location, temp_file_name) job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.CSV job = self.bqclient.extract_table(Table.from_string(full_table_id), staging_file_path, job_config=job_config) # await completion job.result() return gcs_to_df(staging_file_path)
def test_download_table_as_df(self, mocker): self._stop_time(mocker) mocked_gcs_to_df = mocker.patch( "feast.sdk.utils.bq_util.gcs_to_df", return_value=None) staging_path = "gs://temp/" staging_file_name = "temp_0" table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() exp_staging_path = os.path.join(staging_path, staging_file_name) table_dldr._bq = _Mock_BQ_Client() mocker.patch.object(table_dldr._bq, "extract_table", return_value=_Job()) table_dldr.download_table_as_df(table_id, staging_location=staging_path) assert len( table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = \ table_dldr._bq.extract_table.call_args_list[0] assert args[0].full_table_id == Table.from_string( table_id).full_table_id assert args[1] == exp_staging_path assert kwargs['job_config'].destination_format == "CSV" mocked_gcs_to_df.assert_called_once_with(exp_staging_path)
def _test_download_file(self, mocker, type): staging_path = "gs://temp/" staging_file_name = "temp_0" dst_path = "/tmp/myfile.csv" table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() mock_blob = _Blob() mocker.patch.object(mock_blob, "download_to_filename") table_dldr._bq = _Mock_BQ_Client() mocker.patch.object(table_dldr._bq, "extract_table", return_value=_Job()) table_dldr._gcs = _Mock_GCS_Client() mocker.patch.object(table_dldr._gcs, "get_bucket", return_value=_Bucket(mock_blob)) table_dldr.download_table_as_file(table_id, dst_path, staging_location=staging_path, file_type=type) exp_staging_path = os.path.join(staging_path, staging_file_name) assert len( table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = \ table_dldr._bq.extract_table.call_args_list[0] assert args[0].full_table_id == Table.from_string(table_id).full_table_id assert args[1] == exp_staging_path assert kwargs['job_config'].destination_format == str(type) mock_blob.download_to_filename.assert_called_once_with(dst_path)
def main( project: Optional[str], dataset: Optional[str], module_path: str, apply: bool, validate: bool, ) -> None: client = create_connection() for local_table in set(find_tables(module_path)): project = project or local_table.project assert project, "Project has not been set." dataset = dataset or local_table.dataset assert dataset, "Dataset has not been set." table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}" print(f"Checking migrations for: {table_identifier}") try: remote_table = client.get_table(table_identifier) except NotFound as not_found: table_exists_msg = f"Table does not exist in bq: {table_identifier}" if validate: raise Exception(table_exists_msg) from not_found print(table_exists_msg) if apply: print("Creating table.") table = Table( table_identifier, schema=local_table.get_schema_fields(), ) if local_table.time_partitioning: table.time_partitioning = local_table.time_partitioning print(client.create_table(table)) else: new_columns = list( find_new_columns(local_table.get_schema_fields(), remote_table.schema)) if new_columns: new_columns_message = f"Found new columns: {new_columns}" if validate: raise Exception(new_columns_message) print(new_columns_message) if apply: print("Applying changes") remote_table.schema = local_table.get_schema_fields() print(client.update_table(remote_table, ["schema"]))
def populate_table(self, table_path, schema, data=[], make_immediately_available=False, replace_existing_table=False): # type: (str, List[SchemaField], Optional[List[Any]], Optional[bool], Optional[bool]) -> None """Creates a table and populates it with a list of rows. If make_immediately_available is False, the table will be created using streaming inserts. Note that streaming inserts are immediately available for querying, but not for exporting or copying, so if you need that capability you should set make_immediately_available to True. https://cloud.google.com/bigquery/streaming-data-into-bigquery If the table is already created, it will raise a RuntimeError, unless replace_existing_table is True. Args: table_path: A string of the form '<dataset id>.<table name>' or '<project id>.<dataset id>.<table name>'. schema: A list of SchemaFields to represent the table's schema. data: A list of rows, each of which corresponds to a row to insert into the table. make_immediately_available: If False, the table won't immediately be available for copying or exporting, but will be available for querying. If True, after this operation returns, it will be available for copying and exporting too. replace_existing_table: If set to True, the table at table_path will be deleted and recreated if it's already present. Raises: RuntimeError if the table at table_path is already there and replace_existing_table is False """ # Use the Table object so we can pass through the schema. table = Table(self.get_table_reference_from_path(table_path), schema) if self.table_exists(table): if replace_existing_table: self.delete_table(table) else: raise RuntimeError('The table {} already exists.'.format(table_path)) self.create_table(table) if data: if make_immediately_available: output = cStringIO.StringIO() csv_out = csv.writer(output) for row in data: csv_out.writerow(row) job_config = LoadJobConfig() job_config.source_format = 'text/csv' # By default this does six retries. It does not accept any other timeout or # retry parameters. job = self.gclient.load_table_from_file(output, table.reference, job_config=job_config, rewind=True) job.result() output.close() else: self._stream_chunks_of_rows(table, data, schema)
def query_to_table(self, query, job_name, dataset_name=None): # external logging if required if self.log_lambda is not None: self.log_lambda(query) # Do nothing if use_cache if BigQueryExporter._use_cache: return #logging logging.info('[BigQueryExporter] ['+job_name+'] ::query_to_table start') startTime= datetime.now() # initialize variables if dataset_name is None: dataset_name = self.dataset_name logging.info('[BigQueryExporter] ['+job_name+'] ::dataset is set to %s' % dataset_name ) bigquery_client = self.bigquery_client # Point to the dataset and table destination_dataset = self.bigquery_client.dataset(dataset_name) destination_table = destination_dataset.table(job_name) # Create an empty table try: logging.info('[BigQueryExporter] ['+job_name+'] ::bigqueyr_client.get_table(%s) ...' % destination_table ) self.bigquery_client.get_table(destination_table) logging.info('[BigQueryExporter] ['+job_name+'] ::bigqueyr_client.delete_table(%s) ...' % destination_table ) self.bigquery_client.delete_table(destination_table) except: logging.info('[BigQueryExporter] ['+job_name+'] ::execption point 01 ...' % dataset_name ) pass logging.info('[BigQueryExporter] ['+job_name+'] ::bigqueyr_client.create_table( Table( %s ) ) ...' % destination_table ) self.bigquery_client.create_table(Table(destination_table)) # destination_table.create() # Execute the job and save to table # unique_id = str(uuid.uuid4()) # job = bigquery_client.run_async_query(unique_id, query) job_config = bigquery.QueryJobConfig() job_config.allow_large_results = True job_config.use_legacy_sql = False job_config.destination = destination_table logging.info('[BigQueryExporter] ['+job_name+'] ::bigqueyr_client.query() starts ...' ) logging.info('[BigQueryExporter] ['+job_name+'] ::job_config: %s' % str(job_config) ) job = self.bigquery_client.query(query, job_config=job_config) # Wait till the job done while not job.done(): time.sleep(1) # logging timeElapsed=datetime.now()-startTime logging.info('[BigQueryExporter] ['+job_name+'] ::query_to_table completed, elpased {}s'.format(timeElapsed.seconds)) return destination_table
def _use_query_results(self, response_json): # NB: be sure to remove the jobReference from the api response used to # create the Table instance. response_json_copy = response_json.copy() del response_json_copy['jobReference'] mock_dataset = mock.Mock() mock_dataset._client = self._client mock_table = Table('mock_table', mock_dataset) self._client._connection.api_request.return_value = response_json self._client.dataset.return_value = mock_dataset mock_dataset.table.return_value = mock_table
def test__row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = Table(table_ref) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
def create_table(self, is_temporary=False): stream_schema_message = self.stream_schema_message client = self.open_connection() project_id = self.connection_config['project_id'] dataset_id = self.schema_name table_name = self.table_name(stream_schema_message['stream'], is_temporary, without_schema=True) schema = [ column_type(name, schema) for (name, schema) in self.flatten_schema.items() ] table = Table('{}.{}.{}'.format(project_id, dataset_id, table_name), schema) if is_temporary: table.expires = datetime.datetime.now() + datetime.timedelta( days=1) client.create_table(table, schema)
def _item_to_table(iterator, resource): """Convert a JSON table to the native object. :type iterator: :class:`~google.cloud.iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict :param resource: An item to be converted to a table. :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ return Table.from_api_repr(resource, iterator.dataset)
def table(self, name, schema=()): """Construct a table bound to this dataset. :type name: str :param name: Name of the table. :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` :param schema: The table's schema :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a new ``Table`` instance """ return Table(name, dataset=self, schema=schema)
def _item_to_table(iterator, resource): """Convert a JSON table to the native object. :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict :param resource: An item to be converted to a table. :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ return Table.from_api_repr(resource, iterator.dataset)
def apply_schema_differences( schema_diffs: _SchemaDiffs, bigquery_client: BigQueryClient, ) -> None: print("Applying changes...") for table_identifier, difference in schema_diffs.items(): if isinstance(difference, MissingTable): print("Creating table...") table = Table( table_identifier, schema=difference.local_table.get_schema_fields(), ) if difference.local_table.time_partitioning: table.time_partitioning = difference.local_table.time_partitioning remote_table = bigquery_client.create_table(table) print(remote_table) elif isinstance(difference, ExistingTable): difference.remote_table.schema = difference.local_table.get_schema_fields( ) print( bigquery_client.update_table(difference.remote_table, ["schema"]))
def to_table(dataset_ref, model): schema = model.schema if schema: schema = tuple( BigQuerySchemaField.to_schema_field(s) for s in schema) else: schema = None table_ref = TableReference(dataset_ref, model.table_id) table = Table(table_ref, schema) table.friendly_name = model.friendly_name table.description = model.description table.expires = model.expires table.partitioning_type = model.partitioning_type if model.view_use_legacy_sql is not None: table.view_use_legacy_sql = model.view_use_legacy_sql if model.view_query is not None: table.view_query = model.view_query table.labels = model.labels if model.labels is not None else dict() return table
def download_table_as_file(self, full_table_id, dest, file_type, staging_location=None): """ Download a bigquery table as file Args: full_table_id (str): fully qualified BigQuery table id dest (str): destination filename file_type (feast.sdk.resources.feature_set.FileType): (default: FileType.CSV) exported file format staging_location (str, optional): url to staging_location (currently support a folder in GCS) Returns: (str) path to the downloaded file """ if not staging_location: df = self.download_table_as_df(full_table_id) if file_type == FileType.CSV: df.to_csv(dest, index=False) elif file_type == FileType.JSON: df.to_json(dest, index=False) else: raise ValueError( "Only FileType: CSV and JSON are supported for download_table_as_file without staging location" ) return dest if not is_gs_path(staging_location): raise ValueError("staging_uri must be a directory in GCS") temp_file_name = "temp_{}".format(int(round(time.time() * 1000))) staging_file_path = os.path.join(staging_location, temp_file_name) job_config = ExtractJobConfig() job_config.destination_format = file_type src_table = Table.from_string(full_table_id) job = self.bqclient.extract_table(src_table, staging_file_path, job_config=job_config) # await completion job.result() bucket_name, blob_name = split_gs_path(staging_file_path) bucket = self.storageclient.get_bucket(bucket_name) blob = bucket.blob(blob_name) blob.download_to_filename(dest) return dest
def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation .. note: This method assumes that the project found in the resource matches the client's project. :type resource: dict :param resource: dataset job representation returned from the API :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) sources = [] source_configs = config.get('sourceTables') if source_configs is None: single = config.get('sourceTable') if single is None: raise KeyError( "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) sources.append(Table(source_config['tableId'], dataset)) job = cls(name, destination, sources, client=client) job._set_properties(resource) return job
def create_table(self, table # type: Table, TableReference ): # type: (Table) -> None """ Creates a table. Args: table: The Table or TableReference object to create. Note that if you pass a TableReference the table will be created with no schema. """ if isinstance(table, TableReference): # Normally you'd pass in the schema here upon Table instantiation table = Table(table) self.gclient.create_table(table)
def __extract_table_to_shard_folder(self, full_table_id, staging_location, file_type): shard_folder = os.path.join(staging_location, 'temp_%d' % int(round(time.time() * 1000))) staging_file_path = os.path.join(shard_folder, "shard_*") job_config = ExtractJobConfig() job_config.destination_format = file_type job = self.bqclient.extract_table( Table.from_string(full_table_id), staging_file_path, job_config=job_config ) # await completion job.result() return shard_folder
def test__row_from_mapping_w_schema(self): from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'extra': 'IGNORED', } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual(self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None))
def test__row_from_mapping_w_invalid_schema(self): from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'bogus': 'WHATEVER', } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) self.assertIn('Unknown field mode: BOGUS', str(exc.exception))
def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. See: https://cloud.google.com/bigquery/docs/reference/v2/tables/list :type max_results: int :param max_results: maximum number of tables to return, If not passed, defaults to a value set by the API. :type page_token: string :param page_token: opaque marker for the next "page" of datasets. If not passed, the API will return the first page of datasets. :rtype: tuple, (list, str) :returns: list of :class:`google.cloud.bigquery.table.Table`, plus a "next page token" string: if not ``None``, indicates that more tables can be retrieved with another call (pass that value as ``page_token``). """ params = {} if max_results is not None: params['maxResults'] = max_results if page_token is not None: params['pageToken'] = page_token path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) connection = self._client.connection resp = connection.api_request(method='GET', path=path, query_params=params) tables = [ Table.from_api_repr(resource, self) for resource in resp.get('tables', ()) ] return tables, resp.get('nextPageToken')
def test_ctor(self): from google.cloud.bigquery.table import Table client = _make_client(project=self.PROJECT) source = Table(self.TABLE_REF) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertEqual(job.source.project, self.PROJECT) self.assertEqual(job.source.dataset_id, self.DS_ID) self.assertEqual(job.source.table_id, self.TABLE_ID) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) # set/read from resource['configuration']['extract'] self.assertIsNone(job.compression) self.assertIsNone(job.destination_format) self.assertIsNone(job.field_delimiter) self.assertIsNone(job.print_header)
def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. See: https://cloud.google.com/bigquery/docs/reference/v2/tables/list :type max_results: int :param max_results: maximum number of tables to return, If not passed, defaults to a value set by the API. :type page_token: str :param page_token: opaque marker for the next "page" of datasets. If not passed, the API will return the first page of datasets. :rtype: tuple, (list, str) :returns: list of :class:`google.cloud.bigquery.table.Table`, plus a "next page token" string: if not ``None``, indicates that more tables can be retrieved with another call (pass that value as ``page_token``). """ params = {} if max_results is not None: params['maxResults'] = max_results if page_token is not None: params['pageToken'] = page_token path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) connection = self._client.connection resp = connection.api_request(method='GET', path=path, query_params=params) tables = [Table.from_api_repr(resource, self) for resource in resp.get('tables', ())] return tables, resp.get('nextPageToken')
from google.api_core.exceptions import NotFound from google.cloud.bigquery import SchemaField from google.cloud.bigquery.table import Table from pontoz.bigquery.client import client for pontoz_dataset in client.list_datasets(): pass _transactions_ref = pontoz_dataset.table('transactions') try: transactions_table = client.get_table(_transactions_ref) except NotFound: transactions_table = Table(_transactions_ref) SCHEMA = [ SchemaField('id', 'INT64', 'REQUIRED', None, ()), SchemaField('sale', 'FLOAT64', 'REQUIRED', None, ()), SchemaField('pointz_sale', 'FLOAT64', 'REQUIRED', None, ()), SchemaField('year', 'INT64', 'REQUIRED', None, ()), SchemaField('month', 'INT64', 'REQUIRED', None, ()), SchemaField('day', 'INT64', 'REQUIRED', None, ()), SchemaField('store_name', 'string', 'REQUIRED', None, ()), SchemaField('store_id', 'INT64', 'REQUIRED', None, ()), SchemaField('region_name', 'string', 'REQUIRED', None, ()), SchemaField('region_id', 'INT64', 'REQUIRED', None, ()), SchemaField('client_name', 'string', 'REQUIRED', None, ()), SchemaField('client_id', 'INT64', 'REQUIRED', None, ()), SchemaField('segment_name', 'string', 'REQUIRED', None, ()), ] transactions_table.schema = SCHEMA transactions_table = client.create_table(transactions_table)
def create_temporary_data_source(source_uri): """Create a temporary data source so BigQuery can query the CSV in Google Cloud Storage. Nothing like this is currently implemented in the google-cloud-python library. Returns a table reference suitable for using in a BigQuery SQL query (legacy format). """ schema = [ { "name": "Regional_Office_Name", "type": "string" }, { "name": "Regional_Office_Code", "type": "string" }, { "name": "Area_Team_Name", "type": "string" }, { "name": "Area_Team_Code", "type": "string", "mode": "required" }, { "name": "PCO_Name", "type": "string" }, { "name": "PCO_Code", "type": "string" }, { "name": "Practice_Name", "type": "string" }, { "name": "Practice_Code", "type": "string", "mode": "required" }, { "name": "BNF_Code", "type": "string", "mode": "required" }, { "name": "BNF_Description", "type": "string", "mode": "required" }, { "name": "Items", "type": "integer", "mode": "required" }, { "name": "Quantity", "type": "integer", "mode": "required" }, { "name": "ADQ_Usage", "type": "float" }, { "name": "NIC", "type": "float", "mode": "required" }, { "name": "Actual_Cost", "type": "float", "mode": "required" }, ] resource = { "tableReference": { "tableId": TEMP_SOURCE_NAME }, "externalDataConfiguration": { "csvOptions": { "skipLeadingRows": "1" }, "sourceFormat": "CSV", "sourceUris": [source_uri], "schema": { "fields": schema } } } client = bigquery.client.Client(project='ebmdatalab') # delete the table if it exists dataset = Dataset("tmp_eu", client) table = Table.from_api_repr(resource, dataset) try: table.delete() except NotFound: pass # Now create it path = "/projects/ebmdatalab/datasets/%s/tables" % TEMP_DATASET client._connection.api_request(method='POST', path=path, data=resource) return "[ebmdatalab:%s.%s]" % (TEMP_DATASET, TEMP_SOURCE_NAME)
def get_table_object(table_id, dataset, *args): return Table(table_id, dataset, *args)
def bq_insert(rows: List): """ Inserts rows into BigQuery :param rows: list of dictionaries which are representing rows :return: """ from google.cloud import bigquery if not rows: logging.error("no rows to upload") return bq = bigquery.Client(project=GCP_PROJECT) table_ref = TableReference.from_string( f"{GCP_PROJECT}.live.om_state_latencies") schema = [ { "name": "date", "type": "DATE" }, { "name": "sym", "type": "STRING" }, { "name": "from_state", "type": "STRING" }, { "name": "to_state", "type": "STRING" }, { "name": "count", "type": "INTEGER" }, { "name": "average", "type": "FLOAT" }, { "name": "percentile_10", "type": "FLOAT" }, { "name": "percentile_50", "type": "FLOAT" }, { "name": "percentile_90", "type": "FLOAT" }, { "name": "percentile_99", "type": "FLOAT" }, { "name": "percentile_99_99", "type": "FLOAT" }, ] table = Table(table_ref) table.schema = schema table = bq.create_table(table, exists_ok=True) logging.info("inserting {} rows".format(len(rows))) res = bq.insert_rows(table, rows) logging.info(res)