def block_polling(self, interval_sec=30, timeout_sec=3600) -> SparkJobStatus: """ Blocks until the Dataproc job is completed or failed. Args: interval_sec (int): Polling interval. timeout_sec (int): Timeout limit. Returns: SparkJobStatus: Latest job status Raise: SparkJobFailure: Raise error if the job neither failed nor completed within the timeout limit. """ start = time.time() while True: elapsed_time = time.time() - start if timeout_sec and elapsed_time >= timeout_sec: raise SparkJobFailure( f"Job is still not completed after {timeout_sec}.") status = self.get_status() if status in [SparkJobStatus.FAILED, SparkJobStatus.COMPLETED]: break time.sleep(interval_sec) return status
def get_output_file_uri(self, timeout_sec=None, block=True): if not block: return self._output_file_uri status = self.block_polling(timeout_sec=timeout_sec) if status == SparkJobStatus.COMPLETED: return self._output_file_uri raise SparkJobFailure(self.get_error_message())
def get_output_file_uri(self, timeout_sec=None, block=True): if not block: return self._output_file_uri if self._wait_for_complete(timeout_sec): return self._output_file_uri else: raise SparkJobFailure("Spark job failed")
def wait_termination(self, timeout_sec=None): status = self._wait_for_job_status( goal_status=[SparkJobStatus.COMPLETED, SparkJobStatus.FAILED], timeout_seconds=timeout_sec or 600, ) if status != SparkJobStatus.COMPLETED: raise SparkJobFailure( f"Spark job failed; Reason: {self.get_error_message()}")
def get_output_file_uri(self, timeout_sec=None): status = self._wait_for_job_status( goal_status=[SparkJobStatus.COMPLETED, SparkJobStatus.FAILED], timeout_seconds=600, ) if status == SparkJobStatus.COMPLETED: return self._output_file_uri else: raise SparkJobFailure("Spark job failed")
def get_output_file_uri(self, timeout_sec=None, block=True): if not block: return self._output_file_uri state = _wait_for_job_state(self._emr_client, self._job_ref, TERMINAL_STEP_STATES, timeout_sec) if state in SUCCEEDED_STEP_STATES: return self._output_file_uri else: raise SparkJobFailure("Spark job failed")
def get_output_file_uri(self, timeout_sec: int = None, block=True): if not block: return self._output_file_uri with self._process as p: try: p.wait(timeout_sec) return self._output_file_uri except Exception: p.kill() raise SparkJobFailure( "Timeout waiting for subprocess to return") if self._process.returncode != 0: stderr = "" if self._process.stderr is None else self._process.stderr.read( ) stdout = "" if self._process.stdout is None else self._process.stdout.read( ) raise SparkJobFailure( f"Non zero return code: {self._process.returncode}. stderr: {stderr} stdout: {stdout}" ) return self._output_file_uri