Пример #1
0
    def block_polling(self,
                      interval_sec=30,
                      timeout_sec=3600) -> SparkJobStatus:
        """
        Blocks until the Dataproc job is completed or failed.

        Args:
            interval_sec (int): Polling interval.
            timeout_sec (int): Timeout limit.

        Returns:
            SparkJobStatus: Latest job status

        Raise:
            SparkJobFailure: Raise error if the job neither failed nor completed within the timeout limit.
        """

        start = time.time()
        while True:
            elapsed_time = time.time() - start
            if timeout_sec and elapsed_time >= timeout_sec:
                raise SparkJobFailure(
                    f"Job is still not completed after {timeout_sec}.")

            status = self.get_status()
            if status in [SparkJobStatus.FAILED, SparkJobStatus.COMPLETED]:
                break
            time.sleep(interval_sec)
        return status
Пример #2
0
    def get_output_file_uri(self, timeout_sec=None, block=True):
        if not block:
            return self._output_file_uri

        status = self.block_polling(timeout_sec=timeout_sec)
        if status == SparkJobStatus.COMPLETED:
            return self._output_file_uri
        raise SparkJobFailure(self.get_error_message())
Пример #3
0
    def get_output_file_uri(self, timeout_sec=None, block=True):
        if not block:
            return self._output_file_uri

        if self._wait_for_complete(timeout_sec):
            return self._output_file_uri
        else:
            raise SparkJobFailure("Spark job failed")
Пример #4
0
    def wait_termination(self, timeout_sec=None):
        status = self._wait_for_job_status(
            goal_status=[SparkJobStatus.COMPLETED, SparkJobStatus.FAILED],
            timeout_seconds=timeout_sec or 600,
        )

        if status != SparkJobStatus.COMPLETED:
            raise SparkJobFailure(
                f"Spark job failed; Reason: {self.get_error_message()}")
Пример #5
0
 def get_output_file_uri(self, timeout_sec=None):
     status = self._wait_for_job_status(
         goal_status=[SparkJobStatus.COMPLETED, SparkJobStatus.FAILED],
         timeout_seconds=600,
     )
     if status == SparkJobStatus.COMPLETED:
         return self._output_file_uri
     else:
         raise SparkJobFailure("Spark job failed")
Пример #6
0
    def get_output_file_uri(self, timeout_sec=None, block=True):
        if not block:
            return self._output_file_uri

        state = _wait_for_job_state(self._emr_client, self._job_ref,
                                    TERMINAL_STEP_STATES, timeout_sec)
        if state in SUCCEEDED_STEP_STATES:
            return self._output_file_uri
        else:
            raise SparkJobFailure("Spark job failed")
Пример #7
0
    def get_output_file_uri(self, timeout_sec: int = None, block=True):
        if not block:
            return self._output_file_uri

        with self._process as p:
            try:
                p.wait(timeout_sec)
                return self._output_file_uri
            except Exception:
                p.kill()
                raise SparkJobFailure(
                    "Timeout waiting for subprocess to return")
        if self._process.returncode != 0:
            stderr = "" if self._process.stderr is None else self._process.stderr.read(
            )
            stdout = "" if self._process.stdout is None else self._process.stdout.read(
            )

            raise SparkJobFailure(
                f"Non zero return code: {self._process.returncode}. stderr: {stderr} stdout: {stdout}"
            )
        return self._output_file_uri