Пример #1
0
    def test_wait(self, mock_hook):
        job = self.create_job(JobStatus.RUNNING)
        job_id = "job_id"
        mock_hook.return_value.get_job.return_value = job

        sensor = DataprocJobSensor(
            task_id=TASK_ID,
            location=GCP_LOCATION,
            project_id=GCP_PROJECT,
            dataproc_job_id=job_id,
            gcp_conn_id=GCP_CONN_ID,
            timeout=TIMEOUT,
        )
        ret = sensor.poke(context={})

        mock_hook.return_value.get_job.assert_called_once_with(
            job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT)
        assert not ret
Пример #2
0
    def test_cancelled(self, mock_hook):
        job = self.create_job(JobStatus.CANCELLED)
        job_id = "job_id"
        mock_hook.return_value.get_job.return_value = job

        sensor = DataprocJobSensor(
            task_id=TASK_ID,
            location=GCP_LOCATION,
            project_id=GCP_PROJECT,
            dataproc_job_id=job_id,
            gcp_conn_id=GCP_CONN_ID,
            timeout=TIMEOUT,
        )
        with pytest.raises(AirflowException, match="Job was cancelled"):
            sensor.poke(context={})

        mock_hook.return_value.get_job.assert_called_once_with(
            job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT)
Пример #3
0
    def test_error(self, mock_hook):
        job = self.create_job(JobStatus.ERROR)
        job_id = "job_id"
        mock_hook.return_value.get_job.return_value = job

        sensor = DataprocJobSensor(
            task_id=TASK_ID,
            location=GCP_LOCATION,
            project_id=GCP_PROJECT,
            dataproc_job_id=job_id,
            gcp_conn_id=GCP_CONN_ID,
            timeout=TIMEOUT,
        )

        with self.assertRaisesRegex(AirflowException, "Job failed"):
            sensor.poke(context={})

        mock_hook.return_value.get_job.assert_called_once_with(
            job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT)
Пример #4
0
    spark_task = DataprocSubmitJobOperator(task_id="spark_task",
                                           job=SPARK_JOB,
                                           region=REGION,
                                           project_id=PROJECT_ID)

    # [START cloud_dataproc_async_submit_sensor]
    spark_task_async = DataprocSubmitJobOperator(task_id="spark_task_async",
                                                 job=SPARK_JOB,
                                                 region=REGION,
                                                 project_id=PROJECT_ID,
                                                 asynchronous=True)

    spark_task_async_sensor = DataprocJobSensor(
        task_id='spark_task_async_sensor_task',
        region=REGION,
        project_id=PROJECT_ID,
        dataproc_job_id=spark_task_async.output,
        poke_interval=10,
    )
    # [END cloud_dataproc_async_submit_sensor]

    # [START how_to_cloud_dataproc_submit_job_to_cluster_operator]
    pyspark_task = DataprocSubmitJobOperator(task_id="pyspark_task",
                                             job=PYSPARK_JOB,
                                             region=REGION,
                                             project_id=PROJECT_ID)
    # [END how_to_cloud_dataproc_submit_job_to_cluster_operator]

    sparkr_task = DataprocSubmitJobOperator(task_id="sparkr_task",
                                            job=SPARKR_JOB,
                                            region=REGION,
Пример #5
0
    spark_task = DataprocSubmitJobOperator(task_id="spark_task",
                                           job=SPARK_JOB,
                                           location=REGION,
                                           project_id=PROJECT_ID)

    # [START cloud_dataproc_async_submit_sensor]
    spark_task_async = DataprocSubmitJobOperator(task_id="spark_task_async",
                                                 job=SPARK_JOB,
                                                 location=REGION,
                                                 project_id=PROJECT_ID,
                                                 asynchronous=True)

    spark_task_async_sensor = DataprocJobSensor(
        task_id='spark_task_async_sensor_task',
        location=REGION,
        project_id=PROJECT_ID,
        dataproc_job_id=
        "{{task_instance.xcom_pull(task_ids='spark_task_async')}}",
        poke_interval=10,
    )
    # [END cloud_dataproc_async_submit_sensor]

    # [START how_to_cloud_dataproc_submit_job_to_cluster_operator]
    pyspark_task = DataprocSubmitJobOperator(task_id="pyspark_task",
                                             job=PYSPARK_JOB,
                                             location=REGION,
                                             project_id=PROJECT_ID)
    # [END how_to_cloud_dataproc_submit_job_to_cluster_operator]

    sparkr_task = DataprocSubmitJobOperator(task_id="sparkr_task",
                                            job=SPARKR_JOB,
                                            location=REGION,