示例#1
0
 def execute(
     self,
     operation,
     parameters=None,
     work_group=None,
     s3_staging_dir=None,
     cache_size=0,
     keep_default_na=False,
     na_values=None,
     quoting=1,
 ):
     self._reset_state()
     self._query_id = self._execute(
         operation,
         parameters=parameters,
         work_group=work_group,
         s3_staging_dir=s3_staging_dir,
         cache_size=cache_size,
     )
     query_execution = self._poll(self._query_id)
     if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
         self._result_set = AthenaPandasResultSet(
             connection=self._connection,
             converter=self._converter,
             query_execution=query_execution,
             arraysize=self.arraysize,
             retry_config=self._retry_config,
             keep_default_na=keep_default_na,
             na_values=na_values,
             quoting=quoting,
         )
     else:
         raise OperationalError(query_execution.state_change_reason)
     return self
示例#2
0
 def execute(self, operation, parameters=None):
     self._reset_state()
     self._query_id = self._execute(operation, parameters)
     query_execution = self._poll(self._query_id)
     if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
         self._result_set = AthenaPandasResultSet(self._connection,
                                                  self._converter,
                                                  query_execution,
                                                  self.arraysize,
                                                  self._retry_config)
     else:
         raise OperationalError(query_execution.state_change_reason)
     return self
示例#3
0
 def execute(self, operation, parameters=None):
     self._reset_state()
     self._query_id = self._execute(operation, parameters)
     query_execution = self._poll(self._query_id)
     if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
         self._result_set = AthenaPandasResultSet(
             self._connection, self._converter, query_execution,
             self.arraysize, self.retry_exceptions, self.retry_attempt,
             self.retry_multiplier, self.retry_max_delay,
             self.retry_exponential_base)
     else:
         raise OperationalError(query_execution.state_change_reason)
     return self
 def _collect_result_set(self, query_id):
     query_execution = self._poll(query_id)
     return AthenaPandasResultSet(connection=self._connection,
                                  converter=self._converter,
                                  query_execution=query_execution,
                                  arraysize=self._arraysize,
                                  retry_config=self._retry_config)
示例#5
0
 def execute(self, operation, parameters=None, work_group=None, s3_staging_dir=None,
             cache_size=0):
     self._reset_state()
     self._query_id = self._execute(operation,
                                    parameters=parameters,
                                    work_group=work_group,
                                    s3_staging_dir=s3_staging_dir,
                                    cache_size=cache_size)
     query_execution = self._poll(self._query_id)
     if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
         self._result_set = AthenaPandasResultSet(
             self._connection, self._converter, query_execution, self.arraysize,
             self._retry_config)
     else:
         raise OperationalError(query_execution.state_change_reason)
     return self
 def _collect_result_set(
     self,
     query_id,
     keep_default_na=False,
     na_values=None,
     quoting=1,
 ):
     query_execution = self._poll(query_id)
     return AthenaPandasResultSet(
         connection=self._connection,
         converter=self._converter,
         query_execution=query_execution,
         arraysize=self._arraysize,
         retry_config=self._retry_config,
         keep_default_na=keep_default_na,
         na_values=na_values,
         quoting=quoting,
     )
示例#7
0
class PandasCursor(BaseCursor, CursorIterator, WithResultSet):
    def __init__(self,
                 connection,
                 s3_staging_dir,
                 schema_name,
                 work_group,
                 poll_interval,
                 encryption_option,
                 kms_key,
                 converter,
                 formatter,
                 retry_config,
                 kill_on_interrupt=True,
                 **kwargs):
        super(PandasCursor, self).__init__(connection=connection,
                                           s3_staging_dir=s3_staging_dir,
                                           schema_name=schema_name,
                                           work_group=work_group,
                                           poll_interval=poll_interval,
                                           encryption_option=encryption_option,
                                           kms_key=kms_key,
                                           converter=converter,
                                           formatter=formatter,
                                           retry_config=retry_config,
                                           kill_on_interrupt=kill_on_interrupt,
                                           **kwargs)

    @property
    def rownumber(self):
        return self._result_set.rownumber if self._result_set else None

    def close(self):
        if self._result_set and not self._result_set.is_closed:
            self._result_set.close()

    @synchronized
    def execute(
        self,
        operation,
        parameters=None,
        work_group=None,
        s3_staging_dir=None,
        cache_size=0,
        keep_default_na=False,
        na_values=None,
        quoting=1,
    ):
        self._reset_state()
        self._query_id = self._execute(
            operation,
            parameters=parameters,
            work_group=work_group,
            s3_staging_dir=s3_staging_dir,
            cache_size=cache_size,
        )
        query_execution = self._poll(self._query_id)
        if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
            self._result_set = AthenaPandasResultSet(
                connection=self._connection,
                converter=self._converter,
                query_execution=query_execution,
                arraysize=self.arraysize,
                retry_config=self._retry_config,
                keep_default_na=keep_default_na,
                na_values=na_values,
                quoting=quoting,
            )
        else:
            raise OperationalError(query_execution.state_change_reason)
        return self

    def executemany(self, operation, seq_of_parameters):
        for parameters in seq_of_parameters:
            self.execute(operation, parameters)
        # Operations that have result sets are not allowed with executemany.
        self._reset_state()

    @synchronized
    def cancel(self):
        if not self._query_id:
            raise ProgrammingError("QueryExecutionId is none or empty.")
        self._cancel(self._query_id)

    @synchronized
    def fetchone(self):
        if not self.has_result_set:
            raise ProgrammingError("No result set.")
        return self._result_set.fetchone()

    @synchronized
    def fetchmany(self, size=None):
        if not self.has_result_set:
            raise ProgrammingError("No result set.")
        return self._result_set.fetchmany(size)

    @synchronized
    def fetchall(self):
        if not self.has_result_set:
            raise ProgrammingError("No result set.")
        return self._result_set.fetchall()

    @synchronized
    def as_pandas(self):
        if not self.has_result_set:
            raise ProgrammingError("No result set.")
        return self._result_set.as_pandas()
示例#8
0
class PandasCursor(BaseCursor, CursorIterator, WithResultSet):
    def __init__(self, connection, s3_staging_dir, schema_name, work_group,
                 poll_interval, encryption_option, kms_key, converter,
                 formatter, retry_config, **kwargs):
        super(PandasCursor, self).__init__(connection=connection,
                                           s3_staging_dir=s3_staging_dir,
                                           schema_name=schema_name,
                                           work_group=work_group,
                                           poll_interval=poll_interval,
                                           encryption_option=encryption_option,
                                           kms_key=kms_key,
                                           converter=converter,
                                           formatter=formatter,
                                           retry_config=retry_config,
                                           **kwargs)

    @property
    def rownumber(self):
        return self._result_set.rownumber if self._result_set else None

    def close(self):
        if self._result_set and not self._result_set.is_closed:
            self._result_set.close()

    @synchronized
    def execute(self,
                operation,
                parameters=None,
                work_group=None,
                s3_staging_dir=None,
                cache_size=0):
        self._reset_state()
        self._query_id = self._execute(operation,
                                       parameters=parameters,
                                       work_group=work_group,
                                       s3_staging_dir=s3_staging_dir,
                                       cache_size=cache_size)
        query_execution = self._poll(self._query_id)
        if query_execution.state == AthenaQueryExecution.STATE_SUCCEEDED:
            self._result_set = AthenaPandasResultSet(self._connection,
                                                     self._converter,
                                                     query_execution,
                                                     self.arraysize,
                                                     self._retry_config)
        else:
            raise OperationalError(query_execution.state_change_reason)
        return self

    def executemany(self, operation, seq_of_parameters):
        raise NotSupportedError

    @synchronized
    def cancel(self):
        if not self._query_id:
            raise ProgrammingError('QueryExecutionId is none or empty.')
        self._cancel(self._query_id)

    @synchronized
    def fetchone(self):
        if not self.has_result_set:
            raise ProgrammingError('No result set.')
        return self._result_set.fetchone()

    @synchronized
    def fetchmany(self, size=None):
        if not self.has_result_set:
            raise ProgrammingError('No result set.')
        return self._result_set.fetchmany(size)

    @synchronized
    def fetchall(self):
        if not self.has_result_set:
            raise ProgrammingError('No result set.')
        return self._result_set.fetchall()

    @synchronized
    def as_pandas(self):
        if not self.has_result_set:
            raise ProgrammingError('No result set.')
        return self._result_set.as_pandas()
示例#9
0
 def test_parse_invalid_output_location(self):
     with self.assertRaises(DataError):
         AthenaPandasResultSet._parse_output_location('http://foobar')
示例#10
0
 def test_parse_output_location(self):
     actual = AthenaPandasResultSet._parse_output_location(
         's3://bucket/path/to')
     self.assertEqual(actual[0], 'bucket')
     self.assertEqual(actual[1], 'path/to')