Exemplo n.º 1
0
    def test_multilevel_compose(self) -> None:
        base_queue = queue_pb2.Queue(
            rate_limits=queue_pb2.RateLimits(
                max_dispatches_per_second=100,
                max_concurrent_dispatches=1,
            ),
            retry_config=queue_pb2.RetryConfig(max_attempts=5, ),
        )

        queue = (ProtobufBuilder(queue_pb2.Queue).compose(base_queue).compose(
            queue_pb2.Queue(rate_limits=queue_pb2.RateLimits(
                max_dispatches_per_second=50, ), )).update_args(
                    retry_config=queue_pb2.RetryConfig(max_doublings=4,
                                                       )).build())

        expected_queue = queue_pb2.Queue(
            rate_limits=queue_pb2.RateLimits(
                max_dispatches_per_second=50,
                max_concurrent_dispatches=1,
            ),
            retry_config=queue_pb2.RetryConfig(
                max_attempts=5,
                max_doublings=4,
            ),
        )

        self.assertEqual(queue, expected_queue)
def _build_cloud_task_queue_configs(
        client_wrapper: GoogleCloudTasksClientWrapper
) -> List[queue_pb2.Queue]:
    """Builds a list of configurations for all Google Cloud Tasks queues that
    should be deployed in this environment.
    """

    queues = []

    # Direct ingest queues for handling /process_job requests
    for queue_name in [
            DIRECT_INGEST_STATE_PROCESS_JOB_QUEUE_V2,
            DIRECT_INGEST_JAILS_PROCESS_JOB_QUEUE_V2,
            DIRECT_INGEST_BQ_IMPORT_EXPORT_QUEUE_V2,
            DIRECT_INGEST_SCHEDULER_QUEUE_V2
    ]:
        queues.append(
            _queue_config_with_name(client_wrapper,
                                    DIRECT_INGEST_QUEUE_BASE_CONFIG,
                                    queue_name))

    queues.append(
        ProtobufBuilder(queue_pb2.Queue).compose(
            _queue_config_with_name(
                client_wrapper, SCRAPER_PHASE_QUEUE_CONFIG,
                SCRAPER_PHASE_QUEUE_V2)).update_args(
                    rate_limits=queue_pb2.RateLimits(
                        max_dispatches_per_second=1, ), ).build())

    queues.append(
        _queue_config_with_name(client_wrapper, BIGQUERY_QUEUE_CONFIG,
                                BIGQUERY_QUEUE_V2))
    queues.append(
        _queue_config_with_name(client_wrapper, JOB_MONITOR_QUEUE_CONFIG,
                                JOB_MONITOR_QUEUE_V2))

    for vendor in vendors.get_vendors():
        queue_params = vendors.get_vendor_queue_params(vendor)
        if queue_params is None:
            continue
        vendor_queue_name = \
            'vendor-{}-scraper-v2'.format(vendor.replace('_', '-'))
        queue = ProtobufBuilder(queue_pb2.Queue).compose(
            _queue_config_with_name(
                client_wrapper,
                BASE_SCRAPER_QUEUE_CONFIG, vendor_queue_name)).compose(
                    queue_pb2.Queue(**queue_params)).build()
        queues.append(queue)

    for region in regions.get_supported_regions():
        if region.shared_queue or not region.is_ingest_launched_in_env():
            continue

        queue = _queue_config_with_name(client_wrapper,
                                        BASE_SCRAPER_QUEUE_CONFIG,
                                        region.get_queue_name())
        if region.queue:
            queue = ProtobufBuilder(queue_pb2.Queue).compose(queue).compose(
                queue_pb2.Queue(**region.queue)).build()
        queues.append(queue)

    return queues
from google.oauth2 import credentials
from google.protobuf import duration_pb2

from recidiviz.common.google_cloud.google_cloud_tasks_shared_queues import \
    DIRECT_INGEST_SCHEDULER_QUEUE_V2, \
    DIRECT_INGEST_STATE_PROCESS_JOB_QUEUE_V2, \
    DIRECT_INGEST_JAILS_PROCESS_JOB_QUEUE_V2, BIGQUERY_QUEUE_V2, \
    JOB_MONITOR_QUEUE_V2, SCRAPER_PHASE_QUEUE_V2, DIRECT_INGEST_BQ_IMPORT_EXPORT_QUEUE_V2
from recidiviz.common.google_cloud.google_cloud_tasks_client_wrapper import \
    GoogleCloudTasksClientWrapper
from recidiviz.common.google_cloud.protobuf_builder import ProtobufBuilder
from recidiviz.utils import vendors, regions

DIRECT_INGEST_QUEUE_BASE_CONFIG = queue_pb2.Queue(
    rate_limits=queue_pb2.RateLimits(
        max_dispatches_per_second=100,
        max_concurrent_dispatches=1,
    ),
    retry_config=queue_pb2.RetryConfig(max_attempts=5, ),
    stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
        sampling_ratio=1.0, ))

BIGQUERY_QUEUE_CONFIG = queue_pb2.Queue(
    name=BIGQUERY_QUEUE_V2,
    rate_limits=queue_pb2.RateLimits(
        max_dispatches_per_second=1,
        max_concurrent_dispatches=1,
    ),
    retry_config=queue_pb2.RetryConfig(max_attempts=1, ),
    stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
        sampling_ratio=1.0, ))
    def test_initialize_queues(self, mock_regions):
        # Arrange
        region_xx = fake_region(
            region_code='us_xx',
            queue={'rate_limits': {
                'max_dispatches_per_second': 0.3
            }})
        region_xx.get_queue_name.return_value = 'us_xx_queue'
        region_yy = fake_region(region_code='us_yy')
        region_yy.get_queue_name.return_value = 'us_yy_queue'
        mock_regions.return_value = [region_xx, region_yy]

        # Act
        with metadata.local_project_id_override('my-project-id'):
            google_cloud_task_queue_config.initialize_queues(
                google_auth_token='fake-auth-token')

        # Assert
        queues_updated_by_id = self.get_updated_queues()
        for queue in queues_updated_by_id.values():
            self.assertTrue(
                queue.name.startswith(
                    'projects/my-project-id/locations/us-east1/queues/'))
            self.assertEqual(queue.stackdriver_logging_config.sampling_ratio,
                             1.0)

        direct_ingest_queue_ids = {
            'direct-ingest-state-process-job-queue-v2',
            'direct-ingest-jpp-process-job-queue-v2',
            'direct-ingest-bq-import-export-v2', 'direct-ingest-scheduler-v2'
        }
        self.assertFalse(
            direct_ingest_queue_ids.difference(queues_updated_by_id.keys()))

        for queue_id in direct_ingest_queue_ids:
            queue = queues_updated_by_id[queue_id]
            self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1)

        # Test that composition works as expected
        self.assertEqual(
            queues_updated_by_id[region_xx.get_queue_name()],
            queue_pb2.Queue(
                name=
                'projects/my-project-id/locations/us-east1/queues/us_xx_queue',
                rate_limits=queue_pb2.RateLimits(
                    # This is overridden in the mock above
                    max_dispatches_per_second=0.3,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, )))

        # Test that other regions are unaffected
        self.assertEqual(
            queues_updated_by_id[region_yy.get_queue_name()],
            queue_pb2.Queue(
                name=
                'projects/my-project-id/locations/us-east1/queues/us_yy_queue',
                rate_limits=queue_pb2.RateLimits(
                    max_dispatches_per_second=0.08333333333,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, )))

        self.assertTrue('bigquery-v2' in queues_updated_by_id)
        self.assertTrue('job-monitor-v2' in queues_updated_by_id)
        self.assertTrue('scraper-phase-v2' in queues_updated_by_id)
Exemplo n.º 5
0
    def test_initialize_queues(self, mock_regions: Mock) -> None:
        # Arrange
        region_xx = fake_region(
            region_code="us_xx",
            queue={"rate_limits": {
                "max_dispatches_per_second": 0.3
            }},
            get_queue_name=lambda: "us_xx_queue",
        )
        region_yy = fake_region(region_code="us_yy",
                                get_queue_name=lambda: "us_yy_queue")
        mock_regions.return_value = [region_xx, region_yy]

        # Act
        with metadata.local_project_id_override("my-project-id"):
            google_cloud_task_queue_config.initialize_queues(
                google_auth_token="fake-auth-token")

        # Assert
        queues_updated_by_id = self.get_updated_queues()
        for queue in queues_updated_by_id.values():
            self.assertTrue(
                queue.name.startswith(
                    "projects/my-project-id/locations/us-east1/queues/"))
            self.assertEqual(queue.stackdriver_logging_config.sampling_ratio,
                             1.0)

        direct_ingest_queue_ids = {
            "direct-ingest-state-process-job-queue-v2",
            "direct-ingest-jpp-process-job-queue-v2",
            "direct-ingest-bq-import-export-v2",
            "direct-ingest-scheduler-v2",
        }
        self.assertFalse(
            direct_ingest_queue_ids.difference(queues_updated_by_id.keys()))

        for queue_id in direct_ingest_queue_ids:
            queue = queues_updated_by_id[queue_id]
            self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1)

        # Test that composition works as expected
        self.assertEqual(
            queues_updated_by_id[region_xx.get_queue_name()],
            queue_pb2.Queue(
                name=
                "projects/my-project-id/locations/us-east1/queues/us_xx_queue",
                rate_limits=queue_pb2.RateLimits(
                    # This is overridden in the mock above
                    max_dispatches_per_second=0.3,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, ),
            ),
        )

        # Test that other regions are unaffected
        self.assertEqual(
            queues_updated_by_id[region_yy.get_queue_name()],
            queue_pb2.Queue(
                name=
                "projects/my-project-id/locations/us-east1/queues/us_yy_queue",
                rate_limits=queue_pb2.RateLimits(
                    max_dispatches_per_second=0.08333333333,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, ),
            ),
        )

        self.assertTrue("bigquery-v2" in queues_updated_by_id)
        self.assertTrue("job-monitor-v2" in queues_updated_by_id)
        self.assertTrue("scraper-phase-v2" in queues_updated_by_id)
        self.assertTrue("admin-panel-data-discovery" in queues_updated_by_id)