Пример #1
0
def dataflow_monitor() -> Tuple[str, HTTPStatus]:
    """Calls the dataflow monitor manager to begin monitoring a Dataflow job.

    Endpoint path parameters:
        job_id: The unique id of the job to monitor
        location: The region where the job is being run
        topic: The Pub/Sub topic to publish a message to if the job is
            successful
    """
    job_id = get_str_param_value('job_id', request.args)
    location = get_str_param_value('location', request.args)
    topic = get_str_param_value('topic', request.args)

    if not job_id:
        raise ValueError('Unexpected empty job_id.')
    if not location:
        raise ValueError('Unexpected empty location.')
    if not topic:
        raise ValueError('Unexpected empty topic.')

    logging.info(
        "Attempting to monitor the job with id: %s. Will "
        "publish to %s on success.", job_id, topic)

    CalculateCloudTaskManager().create_dataflow_monitor_task(
        job_id, location, topic)

    return '', HTTPStatus.OK
def handle_dataflow_monitor_task() -> Tuple[str, HTTPStatus]:
    """Worker function to publish a message to a Pub/Sub topic once a Dataflow
    job with the given `job_id` has successfully completed.

    If the job is running, or has another current state that could eventually
    progress to `JOB_STATE_DONE` in the future, a new task is queued to
    continue to monitor the job progress.
    """
    json_data = request.get_data(as_text=True)
    data = json.loads(json_data)
    project_id = metadata.project_id()
    job_id = data["job_id"]
    location = data["location"]
    topic_dashed = data["topic"]
    topic = topic_dashed.replace("-", ".")

    job = get_dataflow_job_with_id(project_id, job_id, location)

    if job:
        state = job["currentState"]

        if state == "JOB_STATE_DONE":
            # Job was successful. Publish success message.
            logging.info(
                "Job %s successfully completed. Triggering " "dashboard export.", job_id
            )
            message = "Dataflow job {} complete".format(job_id)
            pubsub_helper.publish_message_to_topic(message, topic)

        elif state in [
            "JOB_STATE_STOPPED",
            "JOB_STATE_RUNNING",
            "JOB_STATE_PENDING",
            "JOB_STATE_QUEUED",
        ]:
            logging.info(
                "Job %s has state: %s. Continuing " "to monitor progress.",
                job_id,
                state,
            )
            # Job has not completed yet. Re-queue monitor task.
            CalculateCloudTaskManager().create_dataflow_monitor_task(
                job_id, location, topic_dashed
            )
        else:
            logging.warning(
                "Dataflow job %s has state: %s. Killing the" "monitor tasks.",
                job_id,
                state,
            )
    else:
        logging.warning("Dataflow job %s not found.", job_id)

    return "", HTTPStatus.OK
Пример #3
0
    def test_create_dataflow_monitor_task(
        self, mock_client: mock.MagicMock, mock_uuid: mock.MagicMock
    ) -> None:
        # Arrange
        delay_sec = 300
        now_utc_timestamp = int(datetime.datetime.now().timestamp())

        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid

        job_id = "12345"
        location = "fake_location"
        topic = "fake.topic"
        body = {
            "project_id": self.mock_project_id,
            "job_id": job_id,
            "location": location,
            "topic": topic,
        }

        queue_path = f"queue_path/{self.mock_project_id}/{QUEUES_REGION}"

        task_id = "12345-2019-04-14-random-uuid"
        task_path = f"{queue_path}/{task_id}"
        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            schedule_time=timestamp_pb2.Timestamp(
                seconds=(now_utc_timestamp + delay_sec)
            ),
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri": "/dataflow_monitor/monitor",
                "body": json.dumps(body).encode(),
            },
        )

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        CalculateCloudTaskManager().create_dataflow_monitor_task(
            job_id, location, topic
        )

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2
        )
        mock_client.return_value.task_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2, task_id
        )
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task
        )
Пример #4
0
    def test_create_dataflow_monitor_task(self, mock_client, mock_uuid):
        # Arrange
        delay_sec = 300
        now_utc_timestamp = int(datetime.datetime.now().timestamp())

        uuid = 'random-uuid'
        mock_uuid.uuid4.return_value = uuid

        job_id = '12345'
        location = 'fake_location'
        topic = 'fake.topic'
        project_id = 'recidiviz-456'
        body = {
            'project_id': project_id,
            'job_id': job_id,
            'location': location,
            'topic': topic,
        }

        queue_path = f'queue_path/{project_id}/{QUEUES_REGION}'

        task_id = '12345-2019-04-14-random-uuid'
        task_path = f'{queue_path}/{task_id}'
        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            schedule_time=timestamp_pb2.Timestamp(
                seconds=(now_utc_timestamp + delay_sec)),
            app_engine_http_request={
                'http_method': 'POST',
                'relative_uri': '/dataflow_monitor/monitor',
                'body': json.dumps(body).encode()
            }
        )

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        CalculateCloudTaskManager(project_id=project_id). \
            create_dataflow_monitor_task(job_id, location, topic)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            project_id,
            QUEUES_REGION,
            JOB_MONITOR_QUEUE_V2)
        mock_client.return_value.task_path.assert_called_with(
            project_id,
            QUEUES_REGION,
            JOB_MONITOR_QUEUE_V2,
            task_id)
        mock_client.return_value.create_task.assert_called_with(
            queue_path, task)