def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self.gclient.query(query, job_config=config, retry=self.default_retry_for_api_calls) # The above retry is for errors encountered in executing the jobs. The below retry is # for errors encountered in polling to see whether the job is done. query_job._retry = self.default_retry_for_async_jobs rows = self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs) if query_job.errors: logging.warning('Errors in get_query_results: {}'.format(query_job.errors)) return [x.values() for x in list(rows)]
def select_insert(self, source_table_id, destination_table_id, query_field, prefix=' ', fg='yellow'): query = 'SELECT {query_field} FROM {dataset_id}.{source_table_id}'.format( query_field=query_field, dataset_id=self._dataset_ref.dataset_id, source_table_id=source_table_id) destination_table = self.dataset.table(destination_table_id) job_config = QueryJobConfig() job_config.use_legacy_sql = False job_config.use_query_cache = False job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE job_config.destination = destination_table job = self._client.query(query, job_config) echo('Inserting... {0}'.format(job.job_id), prefix=prefix, fg=fg, no_color=self.no_color) echo(' {0}'.format(job.query), prefix=prefix, fg=fg, no_color=self.no_color) job.result() assert job.state == 'DONE' error_result = job.error_result if error_result: raise RuntimeError(job.errors)
def execute(self, query, destination_table, write_disposition="WRITE_TRUNCATE", allow_large_results=True): """ :param query_file: query file path :param destination_table: target table :param write_disposition: default is to replace existing table. To append: WRITE_APPEND :param allow_large_results: default to True :return: """ query_configuration = QueryJobConfig() query_configuration.use_legacy_sql = False if destination_table: ref = TableReferenceBuilder(destination_table, self._dataset, self._project) query_configuration.write_disposition = write_disposition query_configuration.default_dataset = ref.dataset_reference query_configuration.destination = ref.table_reference query_configuration.allow_large_results = allow_large_results sql_query = self.__get_query(query) if not self._quiet: print("-- #### {}\n{}\n".format(destination_table or "", sql_query)) self._query_job = bigquery.Client(project=self._project).query( sql_query, job_config=query_configuration) if self._query_job.errors: raise Exception(self._query_job.errors)
def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self._run_async_query(query, job_config=config) rows = self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs) if query_job.errors: logging.warning('Errors in get_query_results: {}'.format( query_job.errors)) return [x.values() for x in list(rows)]
def execute_sync_query(project_id, query_str, bq_client=None): if bq_client is None: bq_client = bigquery.Client(project_id) config = QueryJobConfig() config.use_legacy_sql = False config.use_query_cache = False query_job = bq_client.query(query_str, job_config=config, location="EU") result = [] for row in query_job: result.append(row) return result
def create_table_from_query( self, query, # type: str table_path, # type: str write_disposition='WRITE_EMPTY', # type: Optional[str] use_legacy_sql=False, # type: Optional[bool] max_wait_secs=None, # type: Optional[int] expected_schema=None # type: Optional[List[SchemaField]] ): # type: (...) -> None """Creates a table in BigQuery from a specified query. Args: query: The query to run. table_path: The path to the table (in the client's project) to write the results to. write_disposition: Specifies behavior if table already exists. See options here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs under configuration.query.writeDisposition use_legacy_sql: Whether the query is written in standard or legacy sql. max_wait_secs: Seconds to wait for the query before timing out. If not set, the class default will be used. expected_schema: The expected schema of the resulting table; unused in this implementation """ if write_disposition not in [ 'WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY' ]: raise ValueError( 'write_disposition must be one of WRITE_TRUNCATE, ' 'WRITE_APPEND, or WRITE_EMPTY') config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql config.write_disposition = write_disposition config.allow_large_results = True config.destination = self.get_table_reference_from_path(table_path) query_job = self._run_async_query(query, job_config=config) return self._wait_for_job(query_job, query, max_wait_secs=max_wait_secs or self.max_wait_secs)
def execute(self, query, tbl_ref=None, append=False, preview=True): sql_query = self.__get_query(query) if tbl_ref: print("-- ## " + str(tbl_ref)) print("{}{}".format("-- preview: \n" if preview else "", sql_query)) if preview: return job_conf = QueryJobConfig() job_conf.use_legacy_sql = False if tbl_ref: job_conf.write_disposition = "WRITE_APPEND" if append else "WRITE_TRUNCATE" job_conf.default_dataset = tbl_ref.dataset_ref job_conf.destination = tbl_ref.table_ref job_conf.allow_large_results = True query_job = self.connect(tbl_ref.project if tbl_ref else None).query( sql_query, job_config=job_conf) if query_job.errors: raise Exception(query_job.errors)
def create_table_from_query(self, query, # type: str table_path, # type: str write_disposition='WRITE_EMPTY', # type: Optional[str] use_legacy_sql=False, # type: Optional[bool] max_wait_secs=None, # type: Optional[int] expected_schema=None # type: Optional[List[SchemaField]] ): # type: (...) -> None """Creates a table in BigQuery from a specified query. Args: query: The query to run. table_path: The path to the table (in the client's project) to write the results to. write_disposition: One of 'WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY'. Default is WRITE_EMPTY. use_legacy_sql: Whether the query is written in standard or legacy sql. max_wait_secs: Seconds to wait for the query before timing out. If not set, the class default will be used. expected_schema: The expected schema of the resulting table; unused in this implementation """ if write_disposition not in ['WRITE_TRUNCATE', 'WRITE_APPEND', 'WRITE_EMPTY']: raise ValueError('write_disposition must be one of WRITE_TRUNCATE, ' 'WRITE_APPEND, or WRITE_EMPTY') config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql config.write_disposition = write_disposition config.allow_large_results = True config.destination = self.get_table_reference_from_path(table_path) query_job = self.gclient.query(query, job_config=config, retry=self.default_retry) return query_job.result(timeout=max_wait_secs or self.max_wait_secs)
def get_query_results(self, query, use_legacy_sql=False, max_wait_secs=None): # type: (str, Optional[Bool], Optional[int]) -> List[Tuple[Any]] """Returns a list or rows, each of which is a tuple of values. Args: query: A string with a complete SQL query. use_legacy_sql: Whether to use legacy SQL max_wait_secs: The maximum number of seconds to wait for the query to complete. If not set, the class default will be used. Returns: A list of tuples of values. """ config = QueryJobConfig() if self.maximum_billing_tier: config.maximum_billing_tier = self.maximum_billing_tier config.use_legacy_sql = use_legacy_sql query_job = self.gclient.query(query, job_config=config, retry=self.default_retry) rows = query_job.result(retry=self.default_retry, timeout=max_wait_secs or self.max_wait_secs) return [x.values() for x in list(rows)]
def create_config(): config = QueryJobConfig() config.use_legacy_sql = False return config