def test_using_lock_when_already_locked(self) -> None:
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(self.LOCK_NAME)

        with self.assertRaises(GCSPseudoLockAlreadyExists):
            with lock_manager.using_lock(self.LOCK_NAME, self.CONTENTS):
                pass
Exemplo n.º 2
0
    def test_create_wait_to_refresh_bq_tasks_state_ingest_locked(
            self, mock_task_manager):
        # Arrange
        mock_table = Mock()
        mock_table.name = "test_table"
        self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [
            mock_table
        ]
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME)

        # Act
        response = self.mock_flask_client.get(
            "/create_refresh_bq_tasks/state",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        # Assert
        self.assertEqual(response.status_code, HTTPStatus.OK)
        self.assertFalse(
            lock_manager.no_active_locks_with_prefix(
                POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME))
        self.assertTrue(
            mock_task_manager.return_value.
            job_monitor_cloud_task_queue_manager.create_task.called)
        mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called(
        )
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
 def test_lock_two_diff(self) -> None:
     """Locks two different locks, asserts both locked"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     lock_manager.lock(self.LOCK_NAME2)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
 def test_double_lock(self) -> None:
     """Locks and then locks again, asserts its still locked and an error is raised"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     with self.assertRaises(GCSPseudoLockAlreadyExists):
         lock_manager.lock(self.LOCK_NAME)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
 def test_lock_two_diff_unlock_one(self) -> None:
     """Locks two different locks, unlocks one, asserts both in correct place"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     lock_manager.lock(self.LOCK_NAME2)
     lock_manager.unlock(self.LOCK_NAME)
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
 def test_lock_one_unlock_other(self) -> None:
     """Locks one lock and unlocks another, asserts both have correct status"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     with self.assertRaises(GCSPseudoLockDoesNotExist):
         lock_manager.unlock(self.LOCK_NAME2)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME2))
 def test_locks_with_prefix_do_not_exist(self) -> None:
     """Ensures lock manager can see regions are not running"""
     prefix = "SOME_LOCK_PREFIX"
     lock_name = prefix + "some_suffix"
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(lock_name)
     lock_manager.unlock(lock_name)
     self.assertTrue(lock_manager.no_active_locks_with_prefix(prefix))
 def test_contents_of_lock_set(self) -> None:
     """Locks with pre-specified contents and asserts the lockfile contains those contents"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME, self.CONTENTS)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_contents = self.fs.download_as_string(path)
     self.assertEqual(self.CONTENTS, actual_contents)
 def test_region_are_running(self) -> None:
     """Ensures lock manager can see regions are running"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME +
                       self.REGION.upper())
     self.assertFalse(
         lock_manager.no_active_locks_with_prefix(
             GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME))
 def test_unlock_locks_with_prefix(self) -> None:
     """Tests that all locks with prefix are unlocked"""
     lock_manager = GCSPseudoLockManager()
     lock_manager.lock(self.PREFIX + self.LOCK_NAME)
     lock_manager.lock(self.PREFIX + self.LOCK_NAME2)
     lock_manager.unlock_locks_with_prefix(self.PREFIX)
     self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME))
     self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME2))
    def test_locks_with_prefix_mixed(self) -> None:
        """Ensures lock manager can see if locks with a prefix exist."""
        prefix = "SOME_LOCK_PREFIX"
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)

        self._upload_fake_expired_lock(lock_manager, prefix + "some_suffix")
        lock_manager.lock(prefix + "some_suffix2")

        self.assertFalse(lock_manager.no_active_locks_with_prefix(prefix))
 def test_contents_of_lock_default(self) -> None:
     """Locks with default contents and asserts the lockfile contains correct time"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     correct_contents = datetime.now().strftime(self.TIME_FORMAT)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_contents = self.fs.download_as_string(path)
     self.assertEqual(correct_contents, actual_contents)
 def test_contents_of_lock_default(self) -> None:
     """Locks with default contents and asserts the lockfile contains correct time"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_body = GCSPseudoLockBody.from_json_string(
         self.fs.download_as_string(path))
     self.assertIsNotNone(actual_body)
    def test_unlock_delete_fails(self) -> None:
        self.gcs_factory_patcher.stop()
        self.gcs_factory_patcher.start().return_value = _FailingDeleteFs()

        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME)

        with self.assertRaises(GCSPseudoLockFailedUnlock):
            lock_manager.unlock(self.LOCK_NAME)
def wait_for_ingest_to_create_tasks(schema_arg: str) -> Tuple[str, HTTPStatus]:
    """Worker function to wait until ingest is not running to create_all_bq_refresh_tasks_for_schema.
    When ingest is not running/locked, creates task to create_all_bq_refresh_tasks_for_schema.
    When ingest is running/locked, re-enqueues this task to run again in 60 seconds.
    """
    task_manager = BQRefreshCloudTaskManager()
    lock_manager = GCSPseudoLockManager()
    json_data_text = request.get_data(as_text=True)
    try:
        json_data = json.loads(json_data_text)
    except (TypeError, json.decoder.JSONDecodeError):
        json_data = {}
    if "lock_id" not in json_data:
        lock_id = str(uuid.uuid4())
    else:
        lock_id = json_data["lock_id"]
    logging.info("Request lock id: %s", lock_id)

    if not lock_manager.is_locked(
            postgres_to_bq_lock_name_with_suffix(schema_arg)):
        time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        contents_as_json = {"time": time, "lock_id": lock_id}
        contents = json.dumps(contents_as_json)
        lock_manager.lock(postgres_to_bq_lock_name_with_suffix(schema_arg),
                          contents)
    else:
        contents = lock_manager.get_lock_contents(
            postgres_to_bq_lock_name_with_suffix(schema_arg))
        try:
            contents_json = json.loads(contents)
        except (TypeError, json.decoder.JSONDecodeError):
            contents_json = {}
        logging.info("Lock contents: %s", contents_json)
        if lock_id != contents_json.get("lock_id"):
            raise GCSPseudoLockAlreadyExists(
                f"UUID {lock_id} does not match existing lock's UUID")

    no_regions_running = lock_manager.no_active_locks_with_prefix(
        GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME)
    if not no_regions_running:
        logging.info("Regions running, renqueuing this task.")
        task_id = "{}-{}-{}".format("renqueue_wait_task",
                                    str(datetime.utcnow().date()),
                                    uuid.uuid4())
        body = {"schema_type": schema_arg, "lock_id": lock_id}
        task_manager.job_monitor_cloud_task_queue_manager.create_task(
            task_id=task_id,
            body=body,
            relative_uri=
            f"/cloud_sql_to_bq/create_refresh_bq_tasks/{schema_arg}",
            schedule_delay_seconds=60,
        )
        return "", HTTPStatus.OK
    logging.info("No regions running, calling create_refresh_bq_tasks")
    create_all_bq_refresh_tasks_for_schema(schema_arg)
    return "", HTTPStatus.OK
    def test_double_lock_diff_contents(self) -> None:
        """Locks and then locks again with unique contents, asserts its still locked and an error is raised"""
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME, payload=self.CONTENTS)

        with self.assertRaises(GCSPseudoLockAlreadyExists):
            lock_manager.lock(self.LOCK_NAME, self.CONTENTS2)
        self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
        self.assertEqual(self.CONTENTS,
                         lock_manager.get_lock_payload(self.LOCK_NAME))
    def test_lock_unlock_with_retry(self) -> None:
        """Locks then unlocks temp, checks if still locked"""

        self.gcs_factory_patcher.stop()
        self.gcs_factory_patcher.start(
        ).return_value = _MultipleAttemptDeleteFs()

        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME)
        lock_manager.unlock(self.LOCK_NAME)
        self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
 def test_double_lock_diff_contents(self) -> None:
     """Locks and then locks again with unique contents, asserts its still locked and an error is raised"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
     lock_id = str(uuid.uuid4())
     contents_as_json = {"time": time, "uuid": lock_id}
     contents = json.dumps(contents_as_json)
     with self.assertRaises(GCSPseudoLockAlreadyExists):
         lock_manager.lock(self.LOCK_NAME, contents)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertEqual(time, lock_manager.get_lock_contents(self.LOCK_NAME))
    def test_contents_of_unlocked_and_relocked(self) -> None:
        """Locks with pre-specified contents and asserts the lockfile contains those contents"""
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME, self.CONTENTS)
        lock_manager.unlock(self.LOCK_NAME)
        lock_manager.lock(self.LOCK_NAME, self.CONTENTS2)
        path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                             blob_name=self.LOCK_NAME)
        actual_body = GCSPseudoLockBody.from_json_string(
            self.fs.download_as_string(path))

        assert actual_body is not None
        self.assertEqual(self.CONTENTS2, actual_body.payload)
Exemplo n.º 20
0
    def test_monitor_refresh_bq_tasks_requeue_with_topic_and_message(
        self,
        mock_task_manager: mock.MagicMock,
        mock_pubsub_helper: mock.MagicMock,
        mock_supported_region_codes: mock.MagicMock,
    ) -> None:
        """Test that a new bq monitor task is added to the queue when there are
        still unfinished tasks on the bq queue, with topic/message to publish."""
        queue_path = "test-queue-path"
        lock_manager = GCSPseudoLockManager()
        mock_supported_region_codes.return_value = []

        schema = "schema"
        topic = "fake_topic"
        message = "fake_message"
        route = "/monitor_refresh_bq_tasks"
        data = {"schema": schema, "topic": topic, "message": message}

        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                          schema.upper())

        mock_task_manager.return_value.get_bq_queue_info.return_value = (
            CloudTaskQueueInfo(
                queue_name="queue_name",
                task_names=[
                    f"{queue_path}/tasks/table_name-123-{schema}",
                    f"{queue_path}/tasks/table_name-456-{schema}",
                    f"{queue_path}/tasks/table_name-789-{schema}",
                ],
            ))

        response = self.mock_flask_client.post(
            route,
            data=json.dumps(data),
            content_type="application/json",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_called_with(
            schema, topic, message)
        mock_pubsub_helper.publish_message_to_topic.assert_not_called()
        self.assertTrue(
            lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                                   schema.upper()))
Exemplo n.º 21
0
    def test_create_wait_to_refresh_bq_tasks_state_export_locked(
            self, mock_task_manager):
        # Arrange
        mock_table = Mock()
        mock_table.name = "test_table"
        self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [
            mock_table
        ]
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + "STATE")

        # Act
        with self.assertRaises(GCSPseudoLockAlreadyExists):
            self.mock_flask_client.get(
                "/create_refresh_bq_tasks/state",
                headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
            )

        mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called(
        )
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
Exemplo n.º 22
0
    def test_monitor_refresh_bq_tasks_requeue_unlock_no_publish(
        self,
        mock_task_manager: mock.MagicMock,
        mock_pubsub_helper: mock.MagicMock,
        mock_supported_region_codes: mock.MagicMock,
    ) -> None:
        """Test that a bq monitor task does not publish topic/message
        with empty topic/message and that it unlocks export lock"""
        lock_manager = GCSPseudoLockManager()
        mock_supported_region_codes.return_value = []

        schema = "schema"
        topic = ""
        message = ""
        route = "/monitor_refresh_bq_tasks"
        data = {"schema": schema, "topic": topic, "message": message}

        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                          schema.upper())

        mock_task_manager.return_value.get_bq_queue_info.return_value = (
            CloudTaskQueueInfo(queue_name="queue_name", task_names=[]))

        response = self.mock_flask_client.post(
            route,
            data=json.dumps(data),
            content_type="application/json",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
        mock_pubsub_helper.publish_message_to_topic.assert_not_called()
        self.assertFalse(
            lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                                   schema.upper()))
 def test_lock_unlock(self) -> None:
     """Locks then unlocks temp, checks if still locked"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     lock_manager.unlock(self.LOCK_NAME)
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
 def test_lock(self) -> None:
     """Locks temp and then checks if locked"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
Exemplo n.º 25
0
class CloudSqlToBQLockManager:
    """Manages acquiring and releasing the lock for the Cloud SQL -> BQ refresh, as well
    as determining if the refresh can proceed given other ongoing processes.
    """

    def __init__(self) -> None:
        self.lock_manager = GCSPseudoLockManager()

    def acquire_lock(self, lock_id: str, schema_type: SchemaType) -> None:
        """Acquires the CloudSQL -> BQ refresh lock for a given schema, or refreshes the
         timeout of the lock if a lock with the given |lock_id| already exists. The
         presence of the lock tells other ongoing processes to yield until the lock has
         been released.

         Acquiring the lock does NOT tell us if we can proceed with the refresh. You
         must call can_proceed() to determine if all blocking processes have
         successfully yielded.

        Throws if a lock with a different lock_id exists for this schema.
        """
        lock_name = postgres_to_bq_lock_name_for_schema(schema_type)
        try:
            self.lock_manager.lock(
                lock_name,
                payload=lock_id,
                expiration_in_seconds=self._export_lock_timeout_for_schema(schema_type),
            )
        except GCSPseudoLockAlreadyExists as e:
            previous_lock_id = self.lock_manager.get_lock_payload(lock_name)
            logging.info("Lock contents: %s", previous_lock_id)
            if lock_id != previous_lock_id:
                raise GCSPseudoLockAlreadyExists(
                    f"UUID {lock_id} does not match existing lock's UUID {previous_lock_id}"
                ) from e

    def can_proceed(self, schema_type: SchemaType) -> bool:
        """Returns True if all blocking processes have stopped and we can proceed with
        the export, False otherwise.
        """

        if not self.is_locked(schema_type):
            raise GCSPseudoLockDoesNotExist(
                f"Must acquire the lock for [{schema_type}] before checking if can proceed"
            )

        if schema_type not in (
            SchemaType.STATE,
            SchemaType.JAILS,
            SchemaType.OPERATIONS,
        ):
            return True

        if schema_type == SchemaType.STATE:
            blocking_lock_prefix = STATE_GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_PREFIX
        elif schema_type == SchemaType.JAILS:
            blocking_lock_prefix = JAILS_GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_PREFIX
        elif schema_type == SchemaType.OPERATIONS:
            # The operations export yields for all types of ingest
            blocking_lock_prefix = GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_PREFIX
        else:
            raise ValueError(f"Unexpected schema type [{schema_type}]")

        no_blocking_locks = self.lock_manager.no_active_locks_with_prefix(
            blocking_lock_prefix
        )
        return no_blocking_locks

    def release_lock(self, schema_type: SchemaType) -> None:
        """Releases the CloudSQL -> BQ refresh lock for a given schema."""
        self.lock_manager.unlock(postgres_to_bq_lock_name_for_schema(schema_type))

    def is_locked(self, schema_type: SchemaType) -> bool:
        return self.lock_manager.is_locked(
            postgres_to_bq_lock_name_for_schema(schema_type)
        )

    @staticmethod
    def _export_lock_timeout_for_schema(_schema_type: SchemaType) -> int:
        """Defines the exported lock timeouts permitted based on the schema arg.
        For the moment all lock timeouts are set to one hour in length.

        Export jobs may take longer than the alotted time, but if they do so, they
        will de facto relinquish their hold on the acquired lock."""
        return 3600
 def test_get_lock_contents(self) -> None:
     """Tests that the get_lock_contents gets the correct contents from the lock"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME, self.CONTENTS)
     actual_contents = lock_manager.get_lock_contents(self.LOCK_NAME)
     self.assertEqual(self.CONTENTS, actual_contents)
Exemplo n.º 27
0
class DirectIngestRegionLockManager:
    """Manages acquiring and releasing the lock for the ingest process that writes
    data to Postgres for a given region's ingest instance.
    """

    def __init__(
        self,
        region_code: str,
        ingest_instance: DirectIngestInstance,
        blocking_locks: List[str],
    ) -> None:
        """
        Args:
            region_code: The region code for the region to lock / unlock ingest for.
            blocking_locks: Any locks that, if present, mean ingest into Postgres
                cannot proceed for this region.
        """
        self.region_code = region_code
        self.ingest_instance = ingest_instance
        self.blocking_locks = blocking_locks
        self.lock_manager = GCSPseudoLockManager()

    def is_locked(self) -> bool:
        """Returns True if the ingest lock is held for the region associated with this
        lock manager.
        """
        return self.lock_manager.is_locked(self._ingest_lock_name_for_instance())

    def can_proceed(self) -> bool:
        """Returns True if ingest can proceed for the region associated with this
        lock manager.
        """
        for lock in self.blocking_locks:
            if self.lock_manager.is_locked(lock):
                return False
        return True

    def acquire_lock(self) -> None:
        self.lock_manager.lock(self._ingest_lock_name_for_instance())

    def release_lock(self) -> None:
        self.lock_manager.unlock(self._ingest_lock_name_for_instance())

    @contextmanager
    def using_region_lock(
        self,
        *,
        expiration_in_seconds: int,
    ) -> Iterator[None]:
        """A context manager for acquiring the lock for a given region. Usage:
        with lock_manager.using_region_lock(expiration_in_seconds=60):
           ... do work requiring the lock
        """
        with self.lock_manager.using_lock(
            self._ingest_lock_name_for_instance(),
            expiration_in_seconds=expiration_in_seconds,
        ):
            yield

    @staticmethod
    def for_state_ingest(
        state_code: StateCode, ingest_instance: DirectIngestInstance
    ) -> "DirectIngestRegionLockManager":
        return DirectIngestRegionLockManager.for_direct_ingest(
            region_code=state_code.value,
            ingest_instance=ingest_instance,
            schema_type=SchemaType.STATE,
        )

    @staticmethod
    def for_direct_ingest(
        region_code: str,
        ingest_instance: DirectIngestInstance,
        schema_type: DirectIngestSchemaType,
    ) -> "DirectIngestRegionLockManager":
        return DirectIngestRegionLockManager(
            region_code=region_code,
            ingest_instance=ingest_instance,
            blocking_locks=[
                postgres_to_bq_lock_name_for_schema(schema_type),
                postgres_to_bq_lock_name_for_schema(SchemaType.OPERATIONS),
            ],
        )

    def _ingest_lock_name_for_instance(self) -> str:
        if StateCode.is_state_code(self.region_code):
            return (
                STATE_GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_PREFIX
                + self.region_code.upper()
                + f"_{self.ingest_instance.name}"
            )
        return (
            JAILS_GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_PREFIX + self.region_code.upper()
        )