def get_jobs(self, return_json: bool = False, test_jobs: bool = True, real_jobs: bool = True) -> Union[JobCollection, List[Dict]]: """ Get all jobs in the project as a JobCollection or json. Use Workflow().get_job() to get a JobCollection with jobs associated with a specific workflow. Args: return_json: If true, returns the job info jsons instead of JobCollection. test_jobs: Return test jobs or test queries. real_jobs: Return real jobs. Returns: All job objects in a JobCollection, or alternatively the jobs info as json. """ url = f"{self.auth._endpoint()}/projects/{self.project_id}/jobs" response_json = self.auth._request(request_type="GET", url=url) jobs_json = filter_jobs_on_mode(response_json["data"], test_jobs, real_jobs) logger.info(f"Got {len(jobs_json)} jobs in project {self.project_id}.") if return_json: return jobs_json else: jobs = [ Job(self.auth, job_id=job["id"], project_id=self.project_id) for job in tqdm(jobs_json) ] jobcollection = JobCollection(auth=self.auth, project_id=self.project_id, jobs=jobs) return jobcollection
def initialize_jobcollection(job_ids: List[str]) -> "JobCollection": """ Returns a JobCollection object (the referenced jobs have to exist on UP42). Args: job_ids: List of UP42 job_ids """ if _auth is None: raise RuntimeError("Not authenticated, call up42.authenticate() first") jobs = [ Job(auth=_auth, job_id=job_id, project_id=str(_auth.project_id)) for job_id in job_ids ] jobcollection = JobCollection( auth=_auth, project_id=str(_auth.project_id), jobs=jobs ) logger.info(f"Initialized {jobcollection}") return jobcollection
def _helper_run_parallel_jobs( self, input_parameters_list: List[dict] = None, max_concurrent_jobs: int = 10, test_job: bool = False, name: str = None, ) -> "JobCollection": """ Helper function to create and run parallel real or test jobs. Args: input_parameters_list: List of dictionary of input parameters. max_concurrent_jobs: Maximum number of parallel jobs that can be triggered. test_job: If set, runs a test query (search for available imagery based on your data parameters). name: The job name. Optional, by default the workflow name is assigned. Returns: The spawned real or test job object. Raises: ValueError: When max_concurrent_jobs is greater than max_concurrent_jobs set in project settings. """ if input_parameters_list is None: raise ValueError( "Provide the job parameters via `input_parameters_list`." " You can use workflow.construct_parallel_parameters()!") if test_job: input_parameters_list = copy.deepcopy(input_parameters_list) for input_parameters in input_parameters_list: input_parameters.update({"config": { "mode": "DRY_RUN" }}) # type: ignore logger.info("+++++++++++++++++++++++++++++++++") logger.info("Running this job as Test Query...") logger.info("+++++++++++++++++++++++++++++++++") if name is None: name = self._info["name"] jobs_list = [] job_nr = 0 if max_concurrent_jobs > self.max_concurrent_jobs: logger.error( f"Maximum concurrent jobs {max_concurrent_jobs} greater " f"than project settings {self.max_concurrent_jobs}. " "Use project.update_project_settings to change this value.") raise ValueError("Too many concurrent jobs!") # Run all jobs in parallel batches of the max_concurrent_jobs (max. 10.) batches = [ input_parameters_list[pos:pos + max_concurrent_jobs] for pos in range(0, len(input_parameters_list), max_concurrent_jobs) ] for batch in batches: batch_jobs = [] # for params in ten_selected_input_parameters: for params in batch: logger.info(f"Selected input_parameters: {params}.") job_name = ( f"{name}_{job_nr}_py" # Temporary recognition of python API usage. ) url = (f"{self.auth._endpoint()}/projects/{self.project_id}/" f"workflows/{self.workflow_id}/jobs?name={job_name}") response_json = self.auth._request(request_type="POST", url=url, data=params) job_json = response_json["data"] logger.info(f"Created and running new job: {job_json['id']}") job = Job( self.auth, job_id=job_json["id"], project_id=self.project_id, ) batch_jobs.append(job) job_nr += 1 # Track until all jobs in the batch are finished. for job in batch_jobs: try: job.track_status(report_time=20) except ValueError as e: if str(e) == "Job has failed! See the above log.": logger.warning("Skipping failed job...") else: raise jobs_list.extend(batch_jobs) job_collection = JobCollection(self.auth, project_id=self.project_id, jobs=jobs_list) return job_collection