def add_job(self, job: SparkJob) -> None: """Add a Spark job to the cache. Args: job (SparkJob): The new Spark job to add. """ with self.lock: self.job_by_id[job.get_id()] = job if isinstance(job, StreamIngestionJob): self.hash_by_id[job.get_id()] = job.get_hash()
def _job_to_proto(spark_job: SparkJob) -> JobProto: job = JobProto() job.id = spark_job.get_id() job.log_uri = cast(str, spark_job.get_log_uri() or "") job.error_message = cast(str, spark_job.get_error_message() or "") status = spark_job.get_status() if status == SparkJobStatus.COMPLETED: job.status = JobStatus.JOB_STATUS_DONE elif status == SparkJobStatus.IN_PROGRESS: job.status = JobStatus.JOB_STATUS_RUNNING elif status == SparkJobStatus.FAILED: job.status = JobStatus.JOB_STATUS_ERROR elif status == SparkJobStatus.STARTING: job.status = JobStatus.JOB_STATUS_PENDING else: raise ValueError(f"Invalid job status {status}") if isinstance(spark_job, RetrievalJob): job.type = JobType.RETRIEVAL_JOB job.retrieval.output_location = spark_job.get_output_file_uri( block=False) elif isinstance(spark_job, BatchIngestionJob): job.type = JobType.BATCH_INGESTION_JOB job.batch_ingestion.table_name = spark_job.get_feature_table() elif isinstance(spark_job, StreamIngestionJob): job.type = JobType.STREAM_INGESTION_JOB job.stream_ingestion.table_name = spark_job.get_feature_table() else: raise ValueError(f"Invalid job type {job}") job.start_time.FromDatetime(spark_job.get_start_time()) return job