def _get_query_metadata( query_execution_id: str, categories: List[str] = None, boto3_session: Optional[boto3.Session] = None ) -> Tuple[Dict[str, str], List[str], List[str], Dict[str, Any], List[str]]: """Get query metadata.""" cols_types: Dict[str, str] = get_query_columns_types( query_execution_id=query_execution_id, boto3_session=boto3_session ) _logger.debug("cols_types: %s", cols_types) dtype: Dict[str, str] = {} parse_timestamps: List[str] = [] parse_dates: List[str] = [] converters: Dict[str, Any] = {} binaries: List[str] = [] col_name: str col_type: str for col_name, col_type in cols_types.items(): if col_type == "array": raise exceptions.UnsupportedType( "List data type is not support with ctas_approach=False. " "Please use ctas_approach=True for List columns." ) if col_type == "row": raise exceptions.UnsupportedType( "Struct data type is not support with ctas_approach=False. " "Please use ctas_approach=True for Struct columns." ) pandas_type: str = _data_types.athena2pandas(dtype=col_type) if (categories is not None) and (col_name in categories): dtype[col_name] = "category" elif pandas_type in ["datetime64", "date"]: parse_timestamps.append(col_name) if pandas_type == "date": parse_dates.append(col_name) elif pandas_type == "bytes": dtype[col_name] = "string" binaries.append(col_name) elif pandas_type == "decimal": converters[col_name] = lambda x: Decimal(str(x)) if str(x) not in ("", "none", " ", "<NA>") else None else: dtype[col_name] = pandas_type _logger.debug("dtype: %s", dtype) _logger.debug("parse_timestamps: %s", parse_timestamps) _logger.debug("parse_dates: %s", parse_dates) _logger.debug("converters: %s", converters) _logger.debug("binaries: %s", binaries) return dtype, parse_timestamps, parse_dates, converters, binaries
def _get_query_metadata( # pylint: disable=too-many-statements query_execution_id: str, boto3_session: boto3.Session, categories: Optional[List[str]] = None, query_execution_payload: Optional[Dict[str, Any]] = None, ) -> _QueryMetadata: """Get query metadata.""" if (query_execution_payload is not None) and (query_execution_payload["Status"]["State"] in _QUERY_FINAL_STATES): if query_execution_payload["Status"]["State"] != "SUCCEEDED": reason: str = query_execution_payload["Status"]["StateChangeReason"] raise exceptions.QueryFailed(f"Query error: {reason}") _query_execution_payload: Dict[str, Any] = query_execution_payload else: _query_execution_payload = wait_query(query_execution_id=query_execution_id, boto3_session=boto3_session) cols_types: Dict[str, str] = get_query_columns_types( query_execution_id=query_execution_id, boto3_session=boto3_session ) _logger.debug("cols_types: %s", cols_types) dtype: Dict[str, str] = {} parse_timestamps: List[str] = [] parse_dates: List[str] = [] converters: Dict[str, Any] = {} binaries: List[str] = [] col_name: str col_type: str for col_name, col_type in cols_types.items(): if col_type == "array": raise exceptions.UnsupportedType( "List data type is not support with ctas_approach=False. " "Please use ctas_approach=True for List columns." ) if col_type == "row": raise exceptions.UnsupportedType( "Struct data type is not support with ctas_approach=False. " "Please use ctas_approach=True for Struct columns." ) pandas_type: str = _data_types.athena2pandas(dtype=col_type) if (categories is not None) and (col_name in categories): dtype[col_name] = "category" elif pandas_type in ["datetime64", "date"]: parse_timestamps.append(col_name) if pandas_type == "date": parse_dates.append(col_name) elif pandas_type == "bytes": dtype[col_name] = "string" binaries.append(col_name) elif pandas_type == "decimal": converters[col_name] = lambda x: Decimal(str(x)) if str(x) not in ("", "none", " ", "<NA>") else None else: dtype[col_name] = pandas_type output_location: Optional[str] = None if "ResultConfiguration" in _query_execution_payload: output_location = _query_execution_payload["ResultConfiguration"].get("OutputLocation") athena_statistics: Dict[str, Union[int, str]] = _query_execution_payload.get("Statistics", {}) manifest_location: Optional[str] = str(athena_statistics.get("DataManifestLocation")) query_metadata: _QueryMetadata = _QueryMetadata( execution_id=query_execution_id, dtype=dtype, parse_timestamps=parse_timestamps, parse_dates=parse_dates, converters=converters, binaries=binaries, output_location=output_location, manifest_location=manifest_location, raw_payload=_query_execution_payload, ) _logger.debug("query_metadata:\n%s", query_metadata) return query_metadata