示例#1
0
 def _start_template_dataflow(self, name, variables, parameters,
                              dataflow_template):
     # Builds RuntimeEnvironment from variables dictionary
     # https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
     environment = {}
     for key in [
             'maxWorkers', 'zone', 'serviceAccountEmail', 'tempLocation',
             'bypassTempDirValidation', 'machineType', 'network',
             'subnetwork'
     ]:
         if key in variables:
             environment.update({key: variables[key]})
     body = {
         "jobName": name,
         "parameters": parameters,
         "environment": environment
     }
     service = self.get_conn()
     request = service.projects().locations().templates().launch(
         projectId=variables['project'],
         location=variables['region'],
         gcsPath=dataflow_template,
         body=body)
     response = request.execute()
     #variables = self._set_variables(variables)
     _DataflowJob(self.get_conn(), PROJECT, name, REGION,
                  self.poll_sleep).wait_for_done()
     return response
示例#2
0
 def test_dataflow_job_init_without_job_id(self):
     mock_jobs = MagicMock()
     self.mock_dataflow.projects.return_value.locations.return_value.\
         jobs.return_value = mock_jobs
     _DataflowJob(self.mock_dataflow, TEST_PROJECT, TEST_JOB_NAME,
                  TEST_LOCATION, 10)
     mock_jobs.list.assert_called_with(projectId=TEST_PROJECT,
                                       location=TEST_LOCATION)
 def test_dataflow_job_init_without_job_id(self):
     mock_jobs = MagicMock()
     self.mock_dataflow.projects.return_value.locations.return_value.\
         jobs.return_value = mock_jobs
     _DataflowJob(self.mock_dataflow, TEST_PROJECT, TEST_JOB_NAME,
                  TEST_LOCATION, 10)
     mock_jobs.list.assert_called_with(projectId=TEST_PROJECT,
                                       location=TEST_LOCATION)
示例#4
0
 def _start_dataflow(self, variables, name, command_prefix,
                     label_formatter):
     variables = self._set_variables(variables)
     cmd = command_prefix + self._build_cmd(variables, label_formatter)
     job_id = _GFWDataflow(cmd).wait_for_done()
     _DataflowJob(self.get_conn(), variables['project'], name,
                  variables['region'], self.poll_sleep, job_id,
                  self.num_retries).wait_for_done()
 def test_dataflow_job_init_with_job_id(self):
     mock_jobs = MagicMock()
     self.mock_dataflow.projects.return_value.\
         jobs.return_value = mock_jobs
     _DataflowJob(self.mock_dataflow, TEST_PROJECT, TEST_JOB_NAME,
                  TEST_LOCATION, 10, TEST_JOB_ID)
     mock_jobs.get.assert_called_with(projectId=TEST_PROJECT,
                                      jobId=TEST_JOB_ID)
示例#6
0
    def _run_cmd(self, cmd):
        dfc = self.dataflow_config

        from airflow.contrib.hooks.gcp_dataflow_hook import _DataflowJob

        run_cmd(
            cmd,
            name="dataflow %s" % self.task_run.job_name,
            stdout_handler=self._process_dataflow_log,
        )

        _DataflowJob(
            self._gcp_dataflow_hook.get_conn(),
            dfc.project,
            self.task_run.job_id,
            dfc.region,
            dfc.poll_sleep,
            self.current_dataflow_job_id,
        ).wait_for_done()