예제 #1
0
    def get_jobs(self,
                 return_json: bool = False,
                 test_jobs: bool = True,
                 real_jobs: bool = True) -> Union[JobCollection, List[Dict]]:
        """
        Get all jobs in the project as a JobCollection or json.

        Use Workflow().get_job() to get a JobCollection with jobs associated with a
        specific workflow.

        Args:
            return_json: If true, returns the job info jsons instead of JobCollection.
            test_jobs: Return test jobs or test queries.
            real_jobs: Return real jobs.

        Returns:
            All job objects in a JobCollection, or alternatively the jobs info as json.
        """
        url = f"{self.auth._endpoint()}/projects/{self.project_id}/jobs"
        response_json = self.auth._request(request_type="GET", url=url)
        jobs_json = filter_jobs_on_mode(response_json["data"], test_jobs,
                                        real_jobs)
        logger.info(f"Got {len(jobs_json)} jobs in project {self.project_id}.")
        if return_json:
            return jobs_json
        else:
            jobs = [
                Job(self.auth, job_id=job["id"], project_id=self.project_id)
                for job in tqdm(jobs_json)
            ]
            jobcollection = JobCollection(auth=self.auth,
                                          project_id=self.project_id,
                                          jobs=jobs)
            return jobcollection
예제 #2
0
def initialize_jobcollection(job_ids: List[str]) -> "JobCollection":
    """
    Returns a JobCollection object (the referenced jobs have to exist on UP42).

    Args:
        job_ids: List of UP42 job_ids
    """
    if _auth is None:
        raise RuntimeError("Not authenticated, call up42.authenticate() first")
    jobs = [
        Job(auth=_auth, job_id=job_id, project_id=str(_auth.project_id))
        for job_id in job_ids
    ]
    jobcollection = JobCollection(
        auth=_auth, project_id=str(_auth.project_id), jobs=jobs
    )
    logger.info(f"Initialized {jobcollection}")
    return jobcollection
예제 #3
0
    def _helper_run_parallel_jobs(
        self,
        input_parameters_list: List[dict] = None,
        max_concurrent_jobs: int = 10,
        test_job: bool = False,
        name: str = None,
    ) -> "JobCollection":
        """
        Helper function to create and run parallel real or test jobs.

        Args:
            input_parameters_list: List of dictionary of input parameters.
            max_concurrent_jobs: Maximum number of parallel jobs that can be triggered.
            test_job: If set, runs a test query (search for available imagery based on your data parameters).
            name: The job name. Optional, by default the workflow name is assigned.

        Returns:
            The spawned real or test job object.

        Raises:
            ValueError: When max_concurrent_jobs is greater than max_concurrent_jobs set in project settings.
        """
        if input_parameters_list is None:
            raise ValueError(
                "Provide the job parameters via `input_parameters_list`."
                " You can use workflow.construct_parallel_parameters()!")

        if test_job:
            input_parameters_list = copy.deepcopy(input_parameters_list)
            for input_parameters in input_parameters_list:
                input_parameters.update({"config": {
                    "mode": "DRY_RUN"
                }})  # type: ignore
                logger.info("+++++++++++++++++++++++++++++++++")
                logger.info("Running this job as Test Query...")
                logger.info("+++++++++++++++++++++++++++++++++")

        if name is None:
            name = self._info["name"]

        jobs_list = []
        job_nr = 0

        if max_concurrent_jobs > self.max_concurrent_jobs:
            logger.error(
                f"Maximum concurrent jobs {max_concurrent_jobs} greater "
                f"than project settings {self.max_concurrent_jobs}. "
                "Use project.update_project_settings to change this value.")
            raise ValueError("Too many concurrent jobs!")

        # Run all jobs in parallel batches of the max_concurrent_jobs (max. 10.)
        batches = [
            input_parameters_list[pos:pos + max_concurrent_jobs] for pos in
            range(0, len(input_parameters_list), max_concurrent_jobs)
        ]
        for batch in batches:
            batch_jobs = []
            # for params in ten_selected_input_parameters:
            for params in batch:
                logger.info(f"Selected input_parameters: {params}.")

                job_name = (
                    f"{name}_{job_nr}_py"  # Temporary recognition of python API usage.
                )

                url = (f"{self.auth._endpoint()}/projects/{self.project_id}/"
                       f"workflows/{self.workflow_id}/jobs?name={job_name}")
                response_json = self.auth._request(request_type="POST",
                                                   url=url,
                                                   data=params)
                job_json = response_json["data"]
                logger.info(f"Created and running new job: {job_json['id']}")
                job = Job(
                    self.auth,
                    job_id=job_json["id"],
                    project_id=self.project_id,
                )
                batch_jobs.append(job)
                job_nr += 1

            # Track until all jobs in the batch are finished.
            for job in batch_jobs:
                try:
                    job.track_status(report_time=20)
                except ValueError as e:
                    if str(e) == "Job has failed! See the above log.":
                        logger.warning("Skipping failed job...")
                    else:
                        raise
            jobs_list.extend(batch_jobs)

        job_collection = JobCollection(self.auth,
                                       project_id=self.project_id,
                                       jobs=jobs_list)
        return job_collection