Пример #1
0
    def __execute_query(self,
                        database,
                        query,
                        s3_output_url,
                        return_results=True,
                        save_results=True):
        s3_bucket, s3_path = self.__parse_s3_path(s3_output_url)

        response = self.__athena.start_query_execution(
            QueryString=query,
            QueryExecutionContext={'Database': database},
            ResultConfiguration={
                'OutputLocation': 's3://' + s3_bucket + "/" + s3_path,
            })

        query_execution_id = response['QueryExecutionId']
        status = self.__poll_status(query_execution_id)

        if status == 'SUCCEEDED':

            s3_key = s3_path + "/" + query_execution_id + '.csv'

            if return_results:
                # Dirty patch to support descriptive queries such as describe table and show partition:
                try:
                    obj = self.__s3.get_object(Bucket=s3_bucket, Key=s3_key)
                    df = pd.read_csv(io.BytesIO(obj['Body'].read()))
                except ClientError as e:
                    if e.response['Error']['Code'] == 'NoSuchKey':
                        try:
                            s3_key = s3_path + "/" + query_execution_id + '.txt'
                            obj = self.__s3.get_object(Bucket=s3_bucket,
                                                       Key=s3_key)
                            df = obj['Body'].read().decode('utf-8')
                        except ClientError as e:
                            if e.response['Error']['Code'] == 'NoSuchKey':
                                raise Exceptions.QueryNotSupported(
                                    "The specified query is not supported by this package."
                                )
                            else:
                                raise e
                    else:
                        raise e

                # Remove result file from s3
                if not save_results:
                    self.__s3.delete_object(Bucket=s3_bucket, Key=s3_key)
                    self.__s3.delete_object(Bucket=s3_bucket,
                                            Key=s3_key + '.metadata')

                return df, query_execution_id
            else:
                return query_execution_id
        elif status == "FAILED":
            raise Exceptions.QueryExecutionFailedException(
                "Query Failed. Check athena logs for more info.")
        else:
            raise Exceptions.QueryUnknownStatusException(
                "Query is in an unknown status. Check athena logs for more info."
            )
Пример #2
0
    def get_result(self, query_execution_id, save_results=False):
        '''
        Given an execution id, returns result as a pandas df if successful. Prints error otherwise. 
        -- Data deleted unless save_results true
        '''
        # Get execution status and save path, which we can then split into bucket and key. Automatically handles csv/txt
        res = self._athena.get_query_execution(QueryExecutionId=query_execution_id)
        s3_bucket, s3_key = self.__parse_s3_path(res['QueryExecution']['ResultConfiguration']['OutputLocation'])

        # If succeed, return df
        if res['QueryExecution']['Status']['State'] == 'SUCCEEDED':
            obj = self.__s3.get_object(Bucket=s3_bucket, Key=s3_key)
            df = pd.read_csv(io.BytesIO(obj['Body'].read()))

            # Remove results from s3
            if not save_results:
                self.__s3.delete_object(Bucket=s3_bucket, Key=s3_key)
                self.__s3.delete_object(Bucket=s3_bucket, Key=s3_key + '.metadata')

            return df

        # If failed, return error message
        elif res['QueryExecution']['Status']['State'] == 'FAILED':
            raise Exceptions.QueryExecutionFailedException("Query failed with response: %s" % (self.get_query_error(query_execution_id)))
        elif res['QueryExecution']['Status']['State'] == 'RUNNING':
            raise Exceptions.QueryStillRunningException("Query has not finished executing.")
        else:
            raise Exceptions.QueryUnknownStatusException("Query is in an unknown status. Check athena logs for more info.")