def modify_job_state(self, job_id, new_state): """Modify the run state of the job. Args: job_id: The id of the job. new_state: A string representing the new desired state. It could be set to either 'JOB_STATE_DONE', 'JOB_STATE_CANCELLED' or 'JOB_STATE_DRAINING'. Returns: True if the job was modified successfully. """ if new_state == 'JOB_STATE_DONE': new_state = dataflow.Job.RequestedStateValueValuesEnum.JOB_STATE_DONE elif new_state == 'JOB_STATE_CANCELLED': new_state = dataflow.Job.RequestedStateValueValuesEnum.JOB_STATE_CANCELLED elif new_state == 'JOB_STATE_DRAINING': new_state = dataflow.Job.RequestedStateValueValuesEnum.JOB_STATE_DRAINING else: # Other states could only be set by the service. return False request = dataflow.DataflowProjectsJobsUpdateRequest() request.jobId = job_id request.projectId = self.google_cloud_options.project request.job = dataflow.Job(requestedState=new_state) self._client.projects_jobs.Update(request) return True
def __init__(self, options): self.options = options self.google_cloud_options = options.view_as(GoogleCloudOptions) required_google_cloud_options = [ 'project', 'job_name', 'staging_location', 'temp_location' ] missing = [ option for option in required_google_cloud_options if not getattr(self.google_cloud_options, option) ] if missing: raise ValueError('Missing required configuration parameters: %s' % missing) # Make the staging and temp locations job name and time specific. This is # needed to avoid clashes between job submissions using the same staging # area or team members using same job names. This method is not entirely # foolproof since two job submissions with same name can happen at exactly # the same time. However the window is extremely small given that # time.time() has at least microseconds granularity. We add the suffix only # for GCS staging locations where the potential for such clashes is high. if self.google_cloud_options.staging_location.startswith('gs://'): path_suffix = '%s.%f' % (self.google_cloud_options.job_name, time.time()) self.google_cloud_options.staging_location = utils.path.join( self.google_cloud_options.staging_location, path_suffix) self.google_cloud_options.temp_location = utils.path.join( self.google_cloud_options.temp_location, path_suffix) self.proto = dataflow.Job(name=self.google_cloud_options.job_name) if self.options.view_as(StandardOptions).streaming: self.proto.type = dataflow.Job.TypeValueValuesEnum.JOB_TYPE_STREAMING else: self.proto.type = dataflow.Job.TypeValueValuesEnum.JOB_TYPE_BATCH self.base64_str_re = re.compile(r'^[A-Za-z0-9+/]*=*$') self.coder_str_re = re.compile(r'^([A-Za-z]+\$)([A-Za-z0-9+/]*=*)$')