def _resolve_query_with_cache( cache_info: _CacheInfo, categories: Optional[List[str]], chunksize: Optional[Union[int, bool]], use_threads: bool, session: Optional[boto3.Session], ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: """Fetch cached data and return it as a pandas DataFrame (or list of DataFrames).""" _logger.debug("cache_info:\n%s", cache_info) if cache_info.query_execution_id is None: raise RuntimeError("Trying to resolve with cache but w/o any query execution ID.") query_metadata: _QueryMetadata = _get_query_metadata( query_execution_id=cache_info.query_execution_id, boto3_session=session, categories=categories, query_execution_payload=cache_info.query_execution_payload, ) if cache_info.file_format == "parquet": return _fetch_parquet_result( query_metadata=query_metadata, keep_files=True, categories=categories, chunksize=chunksize, use_threads=use_threads, boto3_session=session, ) if cache_info.file_format == "csv": return _fetch_csv_result( query_metadata=query_metadata, keep_files=True, chunksize=chunksize, use_threads=use_threads, boto3_session=session, ) raise exceptions.InvalidArgumentValue(f"Invalid data type: {cache_info.file_format}.")
def _resolve_query_without_cache_regular( sql: str, database: Optional[str], data_source: Optional[str], s3_output: Optional[str], keep_files: bool, chunksize: Union[int, bool, None], categories: Optional[List[str]], encryption: Optional[str], workgroup: Optional[str], kms_key: Optional[str], wg_config: _WorkGroupConfig, use_threads: bool, s3_additional_kwargs: Optional[Dict[str, Any]], boto3_session: boto3.Session, ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: _logger.debug("sql: %s", sql) query_id: str = _start_query_execution( sql=sql, wg_config=wg_config, database=database, data_source=data_source, s3_output=s3_output, workgroup=workgroup, encryption=encryption, kms_key=kms_key, boto3_session=boto3_session, ) _logger.debug("query_id: %s", query_id) query_metadata: _QueryMetadata = _get_query_metadata( query_execution_id=query_id, boto3_session=boto3_session, categories=categories, metadata_cache_manager=_cache_manager, ) return _fetch_csv_result( query_metadata=query_metadata, keep_files=keep_files, chunksize=chunksize, use_threads=use_threads, boto3_session=boto3_session, s3_additional_kwargs=s3_additional_kwargs, )
def _resolve_query_without_cache_ctas( sql: str, database: Optional[str], data_source: Optional[str], s3_output: Optional[str], keep_files: bool, chunksize: Union[int, bool, None], categories: Optional[List[str]], encryption: Optional[str], workgroup: Optional[str], kms_key: Optional[str], wg_config: _WorkGroupConfig, name: Optional[str], use_threads: bool, s3_additional_kwargs: Optional[Dict[str, Any]], boto3_session: boto3.Session, ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: path: str = f"{s3_output}/{name}" ext_location: str = "\n" if wg_config.enforced is True else f",\n external_location = '{path}'\n" sql = (f'CREATE TABLE "{name}"\n' f"WITH(\n" f" format = 'Parquet',\n" f" parquet_compression = 'SNAPPY'" f"{ext_location}" f") AS\n" f"{sql}") _logger.debug("sql: %s", sql) try: query_id: str = _start_query_execution( sql=sql, wg_config=wg_config, database=database, data_source=data_source, s3_output=s3_output, workgroup=workgroup, encryption=encryption, kms_key=kms_key, boto3_session=boto3_session, ) except botocore.exceptions.ClientError as ex: error: Dict[str, Any] = ex.response["Error"] if error[ "Code"] == "InvalidRequestException" and "Exception parsing query" in error[ "Message"]: raise exceptions.InvalidCtasApproachQuery( "Is not possible to wrap this query into a CTAS statement. Please use ctas_approach=False." ) if error[ "Code"] == "InvalidRequestException" and "extraneous input" in error[ "Message"]: raise exceptions.InvalidCtasApproachQuery( "Is not possible to wrap this query into a CTAS statement. Please use ctas_approach=False." ) raise ex _logger.debug("query_id: %s", query_id) try: query_metadata: _QueryMetadata = _get_query_metadata( query_execution_id=query_id, boto3_session=boto3_session, categories=categories, metadata_cache_manager=_cache_manager, ) except exceptions.QueryFailed as ex: msg: str = str(ex) if "Column name" in msg and "specified more than once" in msg: raise exceptions.InvalidCtasApproachQuery( f"Please, define distinct names for your columns OR pass ctas_approach=False. Root error message: {msg}" ) if "Column name not specified" in msg: raise exceptions.InvalidArgumentValue( "Please, define all columns names in your query. (E.g. 'SELECT MAX(col1) AS max_col1, ...')" ) if "Column type is unknown" in msg: raise exceptions.InvalidArgumentValue( "Please, don't leave undefined columns types in your query. You can cast to ensure it. " "(E.g. 'SELECT CAST(NULL AS INTEGER) AS MY_COL, ...')") raise ex return _fetch_parquet_result( query_metadata=query_metadata, keep_files=keep_files, categories=categories, chunksize=chunksize, use_threads=use_threads, s3_additional_kwargs=s3_additional_kwargs, boto3_session=boto3_session, )