def _start_template_dataflow(self, name, variables, parameters, dataflow_template): # Builds RuntimeEnvironment from variables dictionary # https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment environment = {} for key in [ 'maxWorkers', 'zone', 'serviceAccountEmail', 'tempLocation', 'bypassTempDirValidation', 'machineType', 'network', 'subnetwork' ]: if key in variables: environment.update({key: variables[key]}) body = { "jobName": name, "parameters": parameters, "environment": environment } service = self.get_conn() request = service.projects().locations().templates().launch( projectId=variables['project'], location=variables['region'], gcsPath=dataflow_template, body=body) response = request.execute() #variables = self._set_variables(variables) _DataflowJob(self.get_conn(), PROJECT, name, REGION, self.poll_sleep).wait_for_done() return response
def test_dataflow_job_init_without_job_id(self): mock_jobs = MagicMock() self.mock_dataflow.projects.return_value.locations.return_value.\ jobs.return_value = mock_jobs _DataflowJob(self.mock_dataflow, TEST_PROJECT, TEST_JOB_NAME, TEST_LOCATION, 10) mock_jobs.list.assert_called_with(projectId=TEST_PROJECT, location=TEST_LOCATION)
def _start_dataflow(self, variables, name, command_prefix, label_formatter): variables = self._set_variables(variables) cmd = command_prefix + self._build_cmd(variables, label_formatter) job_id = _GFWDataflow(cmd).wait_for_done() _DataflowJob(self.get_conn(), variables['project'], name, variables['region'], self.poll_sleep, job_id, self.num_retries).wait_for_done()
def test_dataflow_job_init_with_job_id(self): mock_jobs = MagicMock() self.mock_dataflow.projects.return_value.\ jobs.return_value = mock_jobs _DataflowJob(self.mock_dataflow, TEST_PROJECT, TEST_JOB_NAME, TEST_LOCATION, 10, TEST_JOB_ID) mock_jobs.get.assert_called_with(projectId=TEST_PROJECT, jobId=TEST_JOB_ID)
def _run_cmd(self, cmd): dfc = self.dataflow_config from airflow.contrib.hooks.gcp_dataflow_hook import _DataflowJob run_cmd( cmd, name="dataflow %s" % self.task_run.job_name, stdout_handler=self._process_dataflow_log, ) _DataflowJob( self._gcp_dataflow_hook.get_conn(), dfc.project, self.task_run.job_id, dfc.region, dfc.poll_sleep, self.current_dataflow_job_id, ).wait_for_done()