def _bq_cell_magic(line, query): from IPython.core import magic_arguments from google.cloud.bigquery.magics import _cell_magic import time start = time.time() args = magic_arguments.parse_argstring(_cell_magic, line) if args.params is not None: try: from google.cloud.bigquery.dbapi import _helpers import ast params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params))) query_params = dict() for p in params: query_params[p.name] = p.value query = query.format(**query_params) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") result = _cell_magic(line, query) print(f"BigQuery execution took {int(time.time() - start)} seconds.") return result
def execute(self, operation, parameters=None, job_id=None): """Prepare and execute a database operation. .. note:: When setting query parameters, values which are "text" (``unicode`` in Python2, ``str`` in Python3) will use the 'STRING' BigQuery type. Values which are "bytes" (``str`` in Python2, ``bytes`` in Python3), will use using the 'BYTES' type. A `~datetime.datetime` parameter without timezone information uses the 'DATETIME' BigQuery type (example: Global Pi Day Celebration March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with timezone information uses the 'TIMESTAMP' BigQuery type (example: a wedding on April 29, 2011 at 11am, British Summer Time). For more information about BigQuery data types, see: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not yet supported. See: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524 :type operation: str :param operation: A Google BigQuery query string. :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: (Optional) dictionary or sequence of parameter values. :type job_id: str :param job_id: (Optional) The job_id to use. If not set, a job ID is generated at random. """ self._query_data = None self._query_job = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. formatted_operation = _format_operation(operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) config = job.QueryJobConfig() config.query_parameters = query_parameters config.use_legacy_sql = False self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id ) # Wait for the query to finish. try: self._query_job.result() except google.cloud.exceptions.GoogleCloudError as exc: raise exceptions.DatabaseError(exc) query_results = self._query_job._query_results self._set_rowcount(query_results) self._set_description(query_results.schema)
def execute(self, operation, parameters=None, job_id=None): """Prepare and execute a database operation. .. note:: When setting query parameters, values which are "text" (``unicode`` in Python2, ``str`` in Python3) will use the 'STRING' BigQuery type. Values which are "bytes" (``str`` in Python2, ``bytes`` in Python3), will use using the 'BYTES' type. A `~datetime.datetime` parameter without timezone information uses the 'DATETIME' BigQuery type (example: Global Pi Day Celebration March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with timezone information uses the 'TIMESTAMP' BigQuery type (example: a wedding on April 29, 2011 at 11am, British Summer Time). For more information about BigQuery data types, see: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not yet supported. See: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524 :type operation: str :param operation: A Google BigQuery query string. :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: (Optional) dictionary or sequence of parameter values. :type job_id: str :param job_id: (Optional) The job_id to use. If not set, a job ID is generated at random. """ self._query_data = None self._query_job = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. formatted_operation = _format_operation( operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) config = job.QueryJobConfig() config.query_parameters = query_parameters config.use_legacy_sql = False self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: self._query_job.result() except google.cloud.exceptions.GoogleCloudError as exc: raise exceptions.DatabaseError(exc) query_results = self._query_job._query_results self._set_rowcount(query_results) self._set_description(query_results.schema)
def _cell_magic(line, query=None): args = magic_arguments.parse_argstring(_cell_magic, line) args_dict = vars(args) active_unsupport_args = [] for unsupport_arg in UNSUPPORTED_ARGS: if args_dict[unsupport_arg] is not None and args_dict[ unsupport_arg] is not False: active_unsupport_args.append(unsupport_arg) if len(active_unsupport_args) > 0: active_unsupport_args.sort() print(F"Unsupported args: {', '.join(active_unsupport_args)}") params = None if args.maximum_bytes_billed is None: maximum_bytes_billed = None elif args.maximum_bytes_billed == 'None': maximum_bytes_billed = 0 else: maximum_bytes_billed = int(args.maximum_bytes_billed) # test params syntax if args.params is not None: try: params = ast.literal_eval("".join(args.params)) _helpers.to_query_parameters(params) except Exception: raise SyntaxError("--params is not a correctly formatted\ JSON string or a JSON " "serializable dictionary") query_flags = { 'destination_table': args.destination_table, 'project': args.project, 'maximum_bytes_billed': maximum_bytes_billed, 'params': params, 'use_legacy_sql': args.use_legacy_sql, } e = QueryIncellEditor() e.query = query.strip('\n') if isinstance(query, str) else '' e.query_flags = json.dumps(query_flags) return e
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params))) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") project = args.project or context.project client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo( user_agent="ipython-{}".format(IPython.__version__)), ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials) job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value query_job = _run_query(client, query, job_config) if not args.verbose: display.clear_output() result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result
def test_to_query_parameters_complex_types(parameters, parameter_types, expect): from google.cloud.bigquery.dbapi._helpers import to_query_parameters result = [ p.to_api_repr() for p in to_query_parameters(parameters, parameter_types) ] assert result == expect
def test_to_query_parameters_struct_error(): from google.cloud.bigquery.dbapi._helpers import to_query_parameters with pytest.raises( NotImplementedError, match=_expected_error_match( "STRUCT-like parameter values are not supported, " "unless an explicit type is give in the parameter placeholder " "(e.g. '%(:struct<...>)s')."), ): to_query_parameters([dict(x=1)], [None]) with pytest.raises( NotImplementedError, match=_expected_error_match( "STRUCT-like parameter values are not supported (parameter foo), " "unless an explicit type is give in the parameter placeholder " "(e.g. '%(foo:struct<...>)s')."), ): to_query_parameters(dict(foo=dict(x=1)), {})
def _cell_magic(line, query=None): args = magic_arguments.parse_argstring(_cell_magic, line) params = None if args.maximum_bytes_billed is None: maximum_bytes_billed = None elif args.maximum_bytes_billed == 'None': maximum_bytes_billed = 0 else: maximum_bytes_billed = int(args.maximum_bytes_billed) # test params syntax if args.params is not None: try: params = ast.literal_eval("".join(args.params)) _helpers.to_query_parameters(params) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") query_flags = { 'destination_table': args.destination_table, 'project': args.project, 'maximum_bytes_billed': maximum_bytes_billed, 'params': params, 'use_legacy_sql': args.use_legacy_sql, 'destination_var': args.destination_var, } e = QueryIncellEditor() e.query = query if isinstance(query, str) else '' e.query_flags = json.dumps(query_flags) if args.destination_var: e.observe(partial(handler, args.destination_var, e)) return e
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params)) ) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary" ) project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql query_job = _run_query(client, query, job_config) if not args.verbose: display.clear_output() result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result
def _execute(self, formatted_operation, parameters, job_id, job_config, parameter_types): self._query_data = None self._query_job = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. query_parameters = _helpers.to_query_parameters( parameters, parameter_types) if client._default_query_job_config: if job_config: config = job_config._fill_from_default( client._default_query_job_config) else: config = copy.deepcopy(client._default_query_job_config) else: config = job_config or job.QueryJobConfig(use_legacy_sql=False) config.query_parameters = query_parameters self._query_job = client.query(formatted_operation, job_config=config, job_id=job_id) if self._query_job.dry_run: self._set_description(schema=None) self.rowcount = 0 return # Wait for the query to finish. try: self._query_job.result() except google.cloud.exceptions.GoogleCloudError as exc: raise exceptions.DatabaseError(exc) query_results = self._query_job._query_results self._set_rowcount(query_results) self._set_description(query_results.schema)
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params))) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql query_job = _run_query(client, query, job_config) if not args.verbose: display.clear_output() result = query_job.to_dataframe() if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return result
def query(self, request_body, page_size): query = request_body['query'] jobConfig = request_body['jobConfig'] dryRunOnly = request_body['dryRunOnly'] # process flags processed_flags = { support_flag: jobConfig[support_flag] for support_flag in SUPPORTED_JOB_CONFIG_FLAGS if support_flag in jobConfig } if 'params' in processed_flags: processed_flags['query_parameters'] = _helpers.to_query_parameters( processed_flags['params']) if 'maximum_bytes_billed' in processed_flags and\ processed_flags['maximum_bytes_billed'] is None: del processed_flags['maximum_bytes_billed'] if 'use_legacy_sql' in processed_flags and\ not isinstance(processed_flags['use_legacy_sql'], bool): raise ValueError( 'use_legacy_sql shoud be boolean, instead received {}'.format( processed_flags['use_legacy_sql'])) if 'destination_table' in processed_flags: processed_flags['destination'] = processed_flags[ 'destination_table'] del processed_flags['destination_table'] # dry run, will throw exception if fail dry_run_job_config = bigquery.QueryJobConfig(**processed_flags) dry_run_job_config.dry_run = True dry_run_job_config.use_query_cache = False try: with PagedQueryHandler.client_lock: if 'project' in jobConfig and jobConfig['project'] is not None: PagedQueryHandler.client.project = jobConfig['project'] else: PagedQueryHandler.client.project = PagedQueryHandler.orig_project dry_run_job = PagedQueryHandler.client.query( query, job_config=dry_run_job_config) PagedQueryHandler.client.project = PagedQueryHandler.orig_project except Exception as err: if hasattr(err, 'errors'): raise Exception(err.errors[0]['message']) else: raise Exception(err) total_bytes_processed = dry_run_job.total_bytes_processed if dryRunOnly: job_id = 'dry_run' if dry_run_job.job_id is None else dry_run_job.job_id yield dry_run_job, job_id yield { 'content': json.dumps(None), 'labels': json.dumps(None), 'bytesProcessed': json.dumps(total_bytes_processed) } return # actual run job_config = bigquery.QueryJobConfig(**processed_flags) # need synchronization since all query handler share the same client with PagedQueryHandler.client_lock: if 'project' in jobConfig and jobConfig['project'] is not None: PagedQueryHandler.client.project = jobConfig['project'] else: PagedQueryHandler.client.project = PagedQueryHandler.orig_project query_job = PagedQueryHandler.client.query(query, job_config=job_config) PagedQueryHandler.client.project = PagedQueryHandler.orig_project if query_job.error_result is not None: raise Exception(query_job.error_result) yield query_job, query_job.job_id # send contents en = query_job.result(page_size) schema_fields = format_preview_fields(en.schema) duration = (query_job.ended - query_job.started).total_seconds() for page in en.pages: if page.num_items > USE_PARALLEL_THRESH: content = parallel_format_preview_rows(page, en.schema, pool=self.pool) else: content = format_preview_rows(page, en.schema) response = { 'content': json.dumps(content), 'labels': schema_fields, 'bytesProcessed': total_bytes_processed, 'project': query_job.project, 'duration': duration, } yield response
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params))) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") project = args.project or context.project client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials) if args.max_results: max_results = int(args.max_results) else: max_results = None query = query.strip() # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): try: rows = client.list_rows(query, max_results=max_results) except Exception as ex: _print_error(str(ex), args.destination_var) return result = rows.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return else: return result job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: _print_error(str(ex), args.destination_var) return if not args.verbose: display.clear_output() if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return elif args.dry_run: print("Query validated. This query will process {} bytes.".format( query_job.total_bytes_processed)) return query_job if max_results: result = query_job.result(max_results=max_results).to_dataframe( bqstorage_client=bqstorage_client) else: result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result
def execute(self, operation, parameters=None): """Prepare and execute a database operation. .. note:: When setting query parameters, values which are "text" (``unicode`` in Python2, ``str`` in Python3) will use the 'STRING' BigQuery type. Values which are "bytes" (``str`` in Python2, ``bytes`` in Python3), will use using the 'BYTES' type. A `~datetime.datetime` parameter without timezone information uses the 'DATETIME' BigQuery type (example: Global Pi Day Celebration March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with timezone information uses the 'TIMESTAMP' BigQuery type (example: a wedding on April 29, 2011 at 11am, British Summer Time). For more information about BigQuery data types, see: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not yet supported. See: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524 :type operation: str :param operation: A Google BigQuery query string. :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: (Optional) dictionary or sequence of parameter values. """ self._query_results = None self._page_token = None self._has_fetched_all_rows = False client = self.connection._client job_id = str(uuid.uuid4()) # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. formatted_operation = _format_operation(operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) query_job = client.run_async_query(job_id, formatted_operation, query_parameters=query_parameters) query_job.use_legacy_sql = False query_job.begin() _helpers.wait_for_job(query_job) query_results = query_job.results() # Force the iterator to run because the query_results doesn't # have the total_rows populated. See: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3506 query_iterator = query_results.fetch_data() try: six.next(iter(query_iterator)) except StopIteration: pass self._query_data = iter( query_results.fetch_data(max_results=self.arraysize)) self._set_rowcount(query_results) self._set_description(query_results.schema)
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params)) ) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary" ) project = args.project or context.project client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value error = None try: query_job = _run_query(client, query, job_config) except Exception as ex: error = str(ex) if not args.verbose: display.clear_output() if error: if args.destination_var: print( "Could not save output to variable '{}'.".format(args.destination_var), file=sys.stderr, ) print("\nERROR:\n", error, file=sys.stderr) return if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return elif args.dry_run: print( "Query validated. This query will process {} bytes.".format( query_job.total_bytes_processed ) ) return query_job result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ args = magic_arguments.parse_argstring(_cell_magic, line) if args.use_bqstorage_api is not None: warnings.warn( "Deprecated option --use_bqstorage_api, the BigQuery " "Storage API is already used by default.", category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api params = [] if args.params is not None: try: params = _helpers.to_query_parameters( ast.literal_eval("".join(args.params))) except Exception: raise SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") project = args.project or context.project client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials) close_transports = functools.partial(_close_transports, client, bqstorage_client) try: if args.max_results: max_results = int(args.max_results) else: max_results = None query = query.strip() if not query: error = ValueError("Query is missing.") _handle_error(error, args.destination_var) return # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): try: rows = client.list_rows(query, max_results=max_results) except Exception as ex: _handle_error(ex, args.destination_var) return result = rows.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return else: return result job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run if args.destination_table: split = args.destination_table.split(".") if len(split) != 2: raise ValueError( "--destination_table should be in a <dataset_id>.<table_id> format." ) dataset_id, table_id = split job_config.allow_large_results = True dataset_ref = bigquery.dataset.DatasetReference( client.project, dataset_id) destination_table_ref = dataset_ref.table(table_id) job_config.destination = destination_table_ref job_config.create_disposition = "CREATE_IF_NEEDED" job_config.write_disposition = "WRITE_TRUNCATE" _create_dataset_if_necessary(client, dataset_id) if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: _handle_error(ex, args.destination_var) return if not args.verbose: display.clear_output() if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return elif args.dry_run: print("Query validated. This query will process {} bytes.".format( query_job.total_bytes_processed)) return query_job if max_results: result = query_job.result(max_results=max_results).to_dataframe( bqstorage_client=bqstorage_client) else: result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result finally: close_transports()
def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ # The built-in parser does not recognize Python structures such as dicts, thus # we extract the "--params" option and inteprpret it separately. try: params_option_value, rest_of_args = _split_args_line(line) except lap.exceptions.QueryParamsParseError as exc: rebranded_error = SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary") six.raise_from(rebranded_error, exc) except lap.exceptions.DuplicateQueryParamsError as exc: rebranded_error = ValueError("Duplicate --params option.") six.raise_from(rebranded_error, exc) except lap.exceptions.ParseError as exc: rebranded_error = ValueError( "Unrecognized input, are option values correct? " "Error details: {}".format(exc.args[0])) six.raise_from(rebranded_error, exc) args = magic_arguments.parse_argstring(_cell_magic, rest_of_args) if args.use_bqstorage_api is not None: warnings.warn( "Deprecated option --use_bqstorage_api, the BigQuery " "Storage API is already used by default.", category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api params = [] if params_option_value: # A non-existing params variable is not expanded and ends up in the input # in its raw form, e.g. "$query_params". if params_option_value.startswith("$"): msg = 'Parameter expansion failed, undefined variable "{}".'.format( params_option_value[1:]) raise NameError(msg) params = _helpers.to_query_parameters( ast.literal_eval(params_option_value)) project = args.project or context.project bigquery_client_options = copy.deepcopy(context.bigquery_client_options) if args.bigquery_api_endpoint: if isinstance(bigquery_client_options, dict): bigquery_client_options[ "api_endpoint"] = args.bigquery_api_endpoint else: bigquery_client_options.api_endpoint = args.bigquery_api_endpoint client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), client_options=bigquery_client_options, ) if context._connection: client._connection = context._connection bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options) if args.bqstorage_api_endpoint: if isinstance(bqstorage_client_options, dict): bqstorage_client_options[ "api_endpoint"] = args.bqstorage_api_endpoint else: bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( use_bqstorage_api, context.credentials, bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) try: if args.max_results: max_results = int(args.max_results) else: max_results = None query = query.strip() if not query: error = ValueError("Query is missing.") _handle_error(error, args.destination_var) return # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): try: rows = client.list_rows(query, max_results=max_results) except Exception as ex: _handle_error(ex, args.destination_var) return result = rows.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return else: return result job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run if args.destination_table: split = args.destination_table.split(".") if len(split) != 2: raise ValueError( "--destination_table should be in a <dataset_id>.<table_id> format." ) dataset_id, table_id = split job_config.allow_large_results = True dataset_ref = bigquery.dataset.DatasetReference( client.project, dataset_id) destination_table_ref = dataset_ref.table(table_id) job_config.destination = destination_table_ref job_config.create_disposition = "CREATE_IF_NEEDED" job_config.write_disposition = "WRITE_TRUNCATE" _create_dataset_if_necessary(client, dataset_id) if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: _handle_error(ex, args.destination_var) return if not args.verbose: display.clear_output() if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return elif args.dry_run: print("Query validated. This query will process {} bytes.".format( query_job.total_bytes_processed)) return query_job if max_results: result = query_job.result(max_results=max_results).to_dataframe( bqstorage_client=bqstorage_client) else: result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result finally: close_transports()