Exemplo n.º 1
0
 def _process_rows(self, response):
     result_set = response.get("ResultSet", None)
     if not result_set:
         raise DataError("KeyError `ResultSet`")
     rows = result_set.get("Rows", None)
     if rows is None:
         raise DataError("KeyError `Rows`")
     processed_rows = []
     if len(rows) > 0:
         offset = (
             1
             if not self._next_token and self._is_first_row_column_labels(rows)
             else 0
         )
         processed_rows = [
             tuple(
                 [
                     self._converter.convert(
                         meta.get("Type", None), row.get("VarCharValue", None)
                     )
                     for meta, row in zip(self._meta_data, rows[i].get("Data", []))
                 ]
             )
             for i in xrange(offset, len(rows))
         ]
     self._rows.extend(processed_rows)
     self._next_token = response.get("NextToken", None)
Exemplo n.º 2
0
    def __init__(self, response):
        query_execution = response.get('QueryExecution', None)
        if not query_execution:
            raise DataError('KeyError `QueryExecution`')

        self._query_id = query_execution.get('QueryExecutionId', None)
        if not self._query_id:
            raise DataError('KeyError `QueryExecutionId`')

        self._query = query_execution.get('Query', None)
        if not self._query:
            raise DataError('KeyError `Query`')

        status = query_execution.get('Status', None)
        if not status:
            raise DataError('KeyError `Status`')
        self._state = status.get('State', None)
        self._state_change_reason = status.get('StateChangeReason', None)
        self._completion_date_time = status.get('CompletionDateTime', None)
        self._submission_date_time = status.get('SubmissionDateTime', None)

        statistics = query_execution.get('Statistics', {})
        self._data_scanned_in_bytes = statistics.get('DataScannedInBytes',
                                                     None)
        self._execution_time_in_millis = statistics.get(
            'EngineExecutionTimeInMillis', None)

        result_conf = query_execution.get('ResultConfiguration', {})
        self._output_location = result_conf.get('OutputLocation', None)
Exemplo n.º 3
0
    def __init__(self, response: Dict[str, Any]) -> None:
        query_execution = response.get("QueryExecution", None)
        if not query_execution:
            raise DataError("KeyError `QueryExecution`")

        query_execution_context = query_execution.get("QueryExecutionContext",
                                                      {})
        self._database: Optional[str] = query_execution_context.get(
            "Database", None)

        self._query_id: Optional[str] = query_execution.get(
            "QueryExecutionId", None)
        if not self._query_id:
            raise DataError("KeyError `QueryExecutionId`")

        self._query: Optional[str] = query_execution.get("Query", None)
        if not self._query:
            raise DataError("KeyError `Query`")
        self._statement_type: Optional[str] = query_execution.get(
            "StatementType", None)

        status = query_execution.get("Status", None)
        if not status:
            raise DataError("KeyError `Status`")
        self._state: Optional[str] = status.get("State", None)
        self._state_change_reason: Optional[str] = status.get(
            "StateChangeReason", None)
        self._completion_date_time: Optional[datetime] = status.get(
            "CompletionDateTime", None)
        self._submission_date_time: Optional[datetime] = status.get(
            "SubmissionDateTime", None)

        statistics = query_execution.get("Statistics", {})
        self._data_scanned_in_bytes: Optional[int] = statistics.get(
            "DataScannedInBytes", None)
        self._engine_execution_time_in_millis: Optional[int] = statistics.get(
            "EngineExecutionTimeInMillis", None)
        self._query_queue_time_in_millis: Optional[int] = statistics.get(
            "QueryQueueTimeInMillis", None)
        self._total_execution_time_in_millis: Optional[int] = statistics.get(
            "TotalExecutionTimeInMillis", None)
        self._query_planning_time_in_millis: Optional[int] = statistics.get(
            "QueryPlanningTimeInMillis", None)
        self._service_processing_time_in_millis: Optional[
            int] = statistics.get("ServiceProcessingTimeInMillis", None)
        self._data_manifest_location: Optional[str] = statistics.get(
            "DataManifestLocation", None)

        result_conf = query_execution.get("ResultConfiguration", {})
        self._output_location: Optional[str] = result_conf.get(
            "OutputLocation", None)

        encryption_conf = result_conf.get("EncryptionConfiguration", {})
        self._encryption_option: Optional[str] = encryption_conf.get(
            "EncryptionOption", None)
        self._kms_key: Optional[str] = encryption_conf.get("KmsKey", None)

        self._work_group: Optional[str] = query_execution.get(
            "WorkGroup", None)
Exemplo n.º 4
0
 def _process_meta_data(self, response):
     result_set = response.get('ResultSet', None)
     if not result_set:
         raise DataError('KeyError `ResultSet`')
     meta_data = result_set.get('ResultSetMetadata', None)
     if not meta_data:
         raise DataError('KeyError `ResultSetMetadata`')
     column_info = meta_data.get('ColumnInfo', None)
     if column_info is None:
         raise DataError('KeyError `ColumnInfo`')
     self._meta_data = tuple(column_info)
Exemplo n.º 5
0
 def _process_meta_data(self, response):
     result_set = response.get("ResultSet", None)
     if not result_set:
         raise DataError("KeyError `ResultSet`")
     meta_data = result_set.get("ResultSetMetadata", None)
     if not meta_data:
         raise DataError("KeyError `ResultSetMetadata`")
     column_info = meta_data.get("ColumnInfo", None)
     if column_info is None:
         raise DataError("KeyError `ColumnInfo`")
     self._meta_data = tuple(column_info)
Exemplo n.º 6
0
    def _poll(self):
        if not self._query_id:
            raise ProgrammingError('QueryExecutionId is none or empty.')
        while True:
            try:
                request = {'QueryExecutionId': self._query_id}
                response = retry_api_call(self._connection.get_query_execution,
                                          exceptions=self.retry_exceptions,
                                          attempt=self.retry_attempt,
                                          multiplier=self.retry_multiplier,
                                          max_delay=self.retry_max_deply,
                                          exp_base=self.retry_exponential_base,
                                          logger=_logger,
                                          **request)
            except Exception as e:
                _logger.exception('Failed to poll query result.')
                raise_from(OperationalError(*e.args), e)
            else:
                query_execution = response.get('QueryExecution', None)
                if not query_execution:
                    raise DataError('KeyError `QueryExecution`')
                status = query_execution.get('Status', None)
                if not status:
                    raise DataError('KeyError `Status`')

                state = status.get('State', None)
                if state == 'SUCCEEDED':
                    self._completion_date_time = status.get(
                        'CompletionDateTime', None)
                    self._submission_date_time = status.get(
                        'SubmissionDateTime', None)

                    statistics = query_execution.get('Statistics', {})
                    self._data_scanned_in_bytes = statistics.get(
                        'DataScannedInBytes', None)
                    self._execution_time_in_millis = statistics.get(
                        'EngineExecutionTimeInMillis', None)

                    result_conf = query_execution.get('ResultConfiguration',
                                                      {})
                    self._output_location = result_conf.get(
                        'OutputLocation', None)
                    break
                elif state == 'FAILED':
                    raise OperationalError(
                        status.get('StateChangeReason', None))
                elif state == 'CANCELLED':
                    raise OperationalError(
                        status.get('StateChangeReason', None))
                else:
                    time.sleep(self._poll_interval)
Exemplo n.º 7
0
 def _process_rows(self, response: Dict[str, Any]) -> None:
     result_set = response.get("ResultSet", None)
     if not result_set:
         raise DataError("KeyError `ResultSet`")
     rows = result_set.get("Rows", None)
     if rows is None:
         raise DataError("KeyError `Rows`")
     processed_rows = []
     if len(rows) > 0:
         offset = (1 if not self._next_token
                   and self._is_first_row_column_labels(rows) else 0)
         meta_data = cast(Tuple[Any, ...], self._meta_data)
         processed_rows = self._get_rows(offset, meta_data, rows)
     self._rows.extend(processed_rows)
     self._next_token = response.get("NextToken", None)
Exemplo n.º 8
0
 def _process_rows(self, response):
     result_set = response.get('ResultSet', None)
     if not result_set:
         raise DataError('KeyError `ResultSet`')
     rows = result_set.get('Rows', None)
     if rows is None:
         raise DataError('KeyError `Rows`')
     processed_rows = []
     if len(rows) > 0:
         offset = 1 if not self._next_token and self._is_first_row_column_labels(rows) else 0
         processed_rows = [
             tuple([self._converter.convert(meta.get('Type', None),
                                            row.get('VarCharValue', None))
                    for meta, row in zip(self._meta_data, rows[i].get('Data', []))])
             for i in xrange(offset, len(rows))
         ]
     self._rows.extend(processed_rows)
     self._next_token = response.get('NextToken', None)
Exemplo n.º 9
0
    def __init__(self, response):
        query_execution = response.get('QueryExecution', None)
        if not query_execution:
            raise DataError('KeyError `QueryExecution`')

        query_execution_context = query_execution.get('QueryExecutionContext',
                                                      {})
        self._database = query_execution_context.get('Database', None)

        self._query_id = query_execution.get('QueryExecutionId', None)
        if not self._query_id:
            raise DataError('KeyError `QueryExecutionId`')

        self._query = query_execution.get('Query', None)
        if not self._query:
            raise DataError('KeyError `Query`')
        self._statement_type = query_execution.get('StatementType', None)

        status = query_execution.get('Status', None)
        if not status:
            raise DataError('KeyError `Status`')
        self._state = status.get('State', None)
        self._state_change_reason = status.get('StateChangeReason', None)
        self._completion_date_time = status.get('CompletionDateTime', None)
        self._submission_date_time = status.get('SubmissionDateTime', None)

        statistics = query_execution.get('Statistics', {})
        self._data_scanned_in_bytes = statistics.get('DataScannedInBytes',
                                                     None)
        self._execution_time_in_millis = statistics.get(
            'EngineExecutionTimeInMillis', None)

        result_conf = query_execution.get('ResultConfiguration', {})
        self._output_location = result_conf.get('OutputLocation', None)

        encryption_conf = result_conf.get('EncryptionConfiguration', {})
        self._encryption_option = encryption_conf.get('EncryptionOption', None)
        self._kms_key = encryption_conf.get('KmsKey', None)

        self._work_group = query_execution.get('WorkGroup', None)
Exemplo n.º 10
0
    def __init__(self, response):
        query_execution = response.get("QueryExecution", None)
        if not query_execution:
            raise DataError("KeyError `QueryExecution`")

        query_execution_context = query_execution.get("QueryExecutionContext",
                                                      {})
        self._database = query_execution_context.get("Database", None)

        self._query_id = query_execution.get("QueryExecutionId", None)
        if not self._query_id:
            raise DataError("KeyError `QueryExecutionId`")

        self._query = query_execution.get("Query", None)
        if not self._query:
            raise DataError("KeyError `Query`")
        self._statement_type = query_execution.get("StatementType", None)

        status = query_execution.get("Status", None)
        if not status:
            raise DataError("KeyError `Status`")
        self._state = status.get("State", None)
        self._state_change_reason = status.get("StateChangeReason", None)
        self._completion_date_time = status.get("CompletionDateTime", None)
        self._submission_date_time = status.get("SubmissionDateTime", None)

        statistics = query_execution.get("Statistics", {})
        self._data_scanned_in_bytes = statistics.get("DataScannedInBytes",
                                                     None)
        self._execution_time_in_millis = statistics.get(
            "EngineExecutionTimeInMillis", None)

        result_conf = query_execution.get("ResultConfiguration", {})
        self._output_location = result_conf.get("OutputLocation", None)

        encryption_conf = result_conf.get("EncryptionConfiguration", {})
        self._encryption_option = encryption_conf.get("EncryptionOption", None)
        self._kms_key = encryption_conf.get("KmsKey", None)

        self._work_group = query_execution.get("WorkGroup", None)
Exemplo n.º 11
0
    def __init__(self, response):
        table_metadata = response.get("TableMetadata", None)
        if not table_metadata:
            raise DataError("KeyError `TableMetadata`")

        self._name: Optional[str] = table_metadata.get("Name", None)
        self._create_time: Optional[datetime] = table_metadata.get(
            "CreateTime", None)
        self._last_access_time: Optional[datetime] = table_metadata.get(
            "LastAccessTime", None)
        self._table_type: Optional[str] = table_metadata.get("TableType", None)

        columns = table_metadata.get("Columns", [])
        self._columns: List[AthenaTableMetadataColumn] = []
        for column in columns:
            self._columns.append(AthenaTableMetadataColumn(column))

        partition_keys = table_metadata.get("PartitionKeys", [])
        self._partition_keys: List[AthenaTableMetadataPartitionKey] = []
        for key in partition_keys:
            self._partition_keys.append(AthenaTableMetadataPartitionKey(key))

        self._parameters: Dict[str, str] = table_metadata.get("Parameters", {})
Exemplo n.º 12
0
 def _parse_output_location(cls, output_location):
     match = cls._pattern_output_location.search(output_location)
     if match:
         return match.group('bucket'), match.group('key')
     else:
         raise DataError('Unknown `output_location` format.')