Пример #1
0
class TestSQSHandler(MatrixTestCaseUsingMockAWS):
    def setUp(self):
        super(TestSQSHandler, self).setUp()
        self.sqs_handler = SQSHandler()
        self.sqs.meta.client.purge_queue(QueueUrl="test_query_job_q_name")

    def test_add_message_to_queue(self):
        payload = {'test_key': "test_value"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)

        messages = self.sqs.meta.client.receive_message(
            QueueUrl="test_query_job_q_name")
        message_body = json.loads(messages['Messages'][0]['Body'])
        self.assertEqual(message_body['test_key'], "test_value")

    def test_receive_messages_from_queue__returns_None_when_no_messages_found(
            self):
        message = self.sqs_handler.receive_messages_from_queue(
            "test_query_job_q_name", 1)
        self.assertEqual(message, None)

    def test_retrieve_messages_from_queue__returns_message_when_message_is_found(
            self):
        payload = {'test_key': "test_value"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)

        messages = self.sqs_handler.receive_messages_from_queue(
            queue_url="test_query_job_q_name")

        message_body = json.loads(messages[0]['Body'])
        self.assertEqual(len(messages), 1)
        self.assertEqual(message_body['test_key'], "test_value")

    def test_delete_message_from_queue(self):
        payload = {'test_key': "test_value"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)
        messages = self.sqs_handler.receive_messages_from_queue(
            queue_url="test_query_job_q_name")
        receipt_handle = messages[0]['ReceiptHandle']

        self.sqs_handler.delete_message_from_queue("test_query_job_q_name",
                                                   receipt_handle)

        message = self.sqs_handler.receive_messages_from_queue(
            "test_query_job_q_name", 1)
        self.assertEqual(message, None)
class TestQueryRunner(MatrixTestCaseUsingMockAWS):
    def setUp(self):
        super(TestQueryRunner, self).setUp()
        self.query_runner = QueryRunner()
        self.matrix_infra_config.set(self.__class__.TEST_CONFIG)
        self.query_runner.matrix_infra_config = self.matrix_infra_config
        self.sqs_handler = SQSHandler()
        self.sqs.meta.client.purge_queue(QueueUrl="test_query_job_q_name")
        self.sqs.meta.client.purge_queue(
            QueueUrl="test_deadletter_query_job_q_name")

    @mock.patch(
        "matrix.common.aws.s3_handler.S3Handler.load_content_from_obj_key")
    @mock.patch(
        "matrix.common.aws.sqs_handler.SQSHandler.receive_messages_from_queue")
    def test_run__with_no_messages_in_queue(self, mock_receive_messages,
                                            mock_load_obj):
        mock_receive_messages.return_value = None
        self.query_runner.run(max_loops=1)
        mock_receive_messages.assert_called_once_with(
            self.query_runner.query_job_q_url)
        mock_load_obj.assert_not_called()

    @mock.patch(
        "matrix.common.aws.batch_handler.BatchHandler.schedule_matrix_conversion"
    )
    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.is_request_ready_for_conversion"
    )
    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.complete_subtask_execution"
    )
    @mock.patch(
        "matrix.common.aws.redshift_handler.RedshiftHandler.transaction")
    @mock.patch(
        "matrix.common.aws.s3_handler.S3Handler.load_content_from_obj_key")
    def test_run__with_one_message_in_queue_and_not_ready_for_conversion(
            self, mock_load_obj, mock_transaction, mock_complete_subtask,
            mock_is_ready_for_conversion, mock_schedule_conversion):
        request_id = str(uuid.uuid4())
        payload = {'request_id': request_id, 's3_obj_key': "test_s3_obj_key"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)
        mock_is_ready_for_conversion.return_value = False

        self.query_runner.run(max_loops=1)

        mock_load_obj.assert_called_once_with("test_s3_obj_key")
        mock_transaction.assert_called()
        mock_complete_subtask.assert_called_once_with(Subtask.QUERY)
        mock_schedule_conversion.assert_not_called()

    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.write_batch_job_id_to_db"
    )
    @mock.patch("matrix.common.request.request_tracker.RequestTracker.format")
    @mock.patch(
        "matrix.common.aws.batch_handler.BatchHandler.schedule_matrix_conversion"
    )
    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.is_request_ready_for_conversion"
    )
    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.complete_subtask_execution"
    )
    @mock.patch(
        "matrix.common.aws.redshift_handler.RedshiftHandler.transaction")
    @mock.patch(
        "matrix.common.aws.s3_handler.S3Handler.load_content_from_obj_key")
    def test_run__with_one_message_in_queue_and_ready_for_conversion(
            self, mock_load_obj, mock_transaction, mock_complete_subtask,
            mock_is_ready_for_conversion, mock_schedule_conversion,
            mock_request_format, mock_write_batch_job_id_to_db):
        request_id = str(uuid.uuid4())
        payload = {'request_id': request_id, 's3_obj_key': "test_s3_obj_key"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)
        mock_is_ready_for_conversion.return_value = True
        mock_schedule_conversion.return_value = "123-123"

        self.query_runner.run(max_loops=1)

        mock_schedule_conversion.assert_called_once_with(request_id, mock.ANY)
        mock_write_batch_job_id_to_db.assert_called_once_with("123-123")

    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.log_error")
    @mock.patch("matrix.common.request.request_tracker.RequestTracker.format")
    @mock.patch(
        "matrix.common.request.request_tracker.RequestTracker.complete_subtask_execution"
    )
    @mock.patch(
        "matrix.common.aws.redshift_handler.RedshiftHandler.transaction")
    @mock.patch(
        "matrix.common.aws.s3_handler.S3Handler.load_content_from_obj_key")
    def test_run__with_one_message_in_queue_and_fails(self, mock_load_obj,
                                                      mock_transaction,
                                                      mock_complete_subtask,
                                                      mock_request_format,
                                                      mock_log_error):
        request_id = str(uuid.uuid4())
        payload = {'request_id': request_id, 's3_obj_key': "test_s3_obj_key"}
        self.sqs_handler.add_message_to_queue("test_query_job_q_name", payload)
        mock_complete_subtask.side_effect = MatrixException(
            status=requests.codes.not_found, title=f"Unable to find")

        self.query_runner.run(max_loops=1)

        mock_log_error.assert_called_once()
        query_queue_messages = self.sqs_handler.receive_messages_from_queue(
            "test_query_job_q_name", 1)
        self.assertEqual(query_queue_messages, None)
        deadletter_queue_messages = self.sqs_handler.receive_messages_from_queue(
            "test_deadletter_query_job_q_name", 1)
        self.assertEqual(len(deadletter_queue_messages), 1)
        message_body = json.loads(deadletter_queue_messages[0]['Body'])
        self.assertEqual(message_body['request_id'], request_id)
        self.assertEqual(message_body['s3_obj_key'], "test_s3_obj_key")
Пример #3
0
class QueryRunner:
    def __init__(self):
        self.sqs_handler = SQSHandler()
        self.s3_handler = S3Handler(os.environ["MATRIX_QUERY_BUCKET"])
        self.batch_handler = BatchHandler()
        self.redshift_handler = RedshiftHandler()
        self.matrix_infra_config = MatrixInfraConfig()

    @property
    def query_job_q_url(self):
        return self.matrix_infra_config.query_job_q_url

    @property
    def query_job_deadletter_q_url(self):
        return self.matrix_infra_config.query_job_deadletter_q_url

    def run(self, max_loops=None):
        loops = 0
        while max_loops is None or loops < max_loops:
            loops += 1
            messages = self.sqs_handler.receive_messages_from_queue(
                self.query_job_q_url)
            if messages:
                message = messages[0]
                logger.info(f"Received {message} from {self.query_job_q_url}")
                payload = json.loads(message['Body'])
                request_id = payload['request_id']
                request_tracker = RequestTracker(request_id)
                Logging.set_correlation_id(logger, value=request_id)
                obj_key = payload['s3_obj_key']
                receipt_handle = message['ReceiptHandle']
                try:
                    logger.info(f"Fetching query from {obj_key}")
                    query = self.s3_handler.load_content_from_obj_key(obj_key)

                    logger.info(f"Running query from {obj_key}")
                    self.redshift_handler.transaction([query], read_only=True)
                    logger.info(f"Finished running query from {obj_key}")

                    logger.info(
                        f"Deleting {message} from {self.query_job_q_url}")
                    self.sqs_handler.delete_message_from_queue(
                        self.query_job_q_url, receipt_handle)

                    logger.info(
                        "Incrementing completed queries in state table")
                    request_tracker.complete_subtask_execution(Subtask.QUERY)

                    if request_tracker.is_request_ready_for_conversion():
                        logger.info("Scheduling batch conversion job")
                        batch_job_id = self.batch_handler.schedule_matrix_conversion(
                            request_id, request_tracker.format)
                        request_tracker.write_batch_job_id_to_db(batch_job_id)
                except Exception as e:
                    logger.info(
                        f"QueryRunner failed on {message} with error {e}")
                    request_tracker.log_error(str(e))
                    logger.info(
                        f"Adding {message} to {self.query_job_deadletter_q_url}"
                    )
                    self.sqs_handler.add_message_to_queue(
                        self.query_job_deadletter_q_url, payload)
                    logger.info(
                        f"Deleting {message} from {self.query_job_q_url}")
                    self.sqs_handler.delete_message_from_queue(
                        self.query_job_q_url, receipt_handle)
            else:
                logger.info(f"No messages to read from {self.query_job_q_url}")