Пример #1
0
    def validate_job_name_and_folder(self):
        """
        Validate the job name and compute job_folder path.
        Optionally delete the job_folder if it exists.

        Returns:
             job_folder path
        """

        self.job = assets.validate_job_folder(self.job)
        delete_job = False
        if self.overwrite:
            delete_job = False
        elif self.force:
            delete_job = True
        elif self.job.exists():
            delete_job = confirm_yn(
                (colors.red & colors.bold
                 | f"Do you really want to remove ALL contents of ") +
                (colors.yellow
                 |
                 f"'{self.job}'?\nIf no, then job may be in an inconsistent state.\n"
                 ),
                "y",
            )

        if delete_job:
            tell(f"Deleting all of {self.job}.")
            self.job.delete()

        return self.job
Пример #2
0
 def __init__(self, job_folder, include_manifest=True, is_dev_override=False):
     self.job_folder = assets.validate_job_folder(
         job_folder, search_if_not_present=True
     )
     self._run_results = {
         run_folder.name: RunResult(run_folder, include_manifest=include_manifest)
         for run_folder in self.job_folder
         if run_folder.is_dir() and "run_manifest.yaml" in run_folder
     }
     self.is_dev_override = is_dev_override
     self.job_manifest = None
     job_yaml = self.job_folder / "job_manifest.yaml"
     if job_yaml.exists():
         self.job_manifest = utils.yaml_load_munch(job_yaml)
Пример #3
0
    def __init__(self, job_folders, include_manifest=True):
        check.t(job_folders, list)

        self.job_folder = "MultiJobResult has multiple folders in job_folders"
        self.job_folders = []
        self._run_results = {}
        for job_folder in job_folders:
            job_folder = assets.validate_job_folder(job_folder)
            self.job_folders += [job_folder]
            self._run_results.update(
                {
                    run_folder.name: RunResult(
                        run_folder, include_manifest=include_manifest
                    )
                    for run_folder in job_folder
                    if run_folder.is_dir() and "run_manifest.yaml" in run_folder
                }
            )
Пример #4
0
    def main(self, job_folder=None):
        switches = utils.plumbum_switches(self)

        if job_folder is None:
            job_folder = self.job

        job_folder = assets.validate_job_folder(job_folder)
        # At this point job_folder is a plumbum path

        # Add a new handler so we get PER-RUN log files into the job folder
        per_run_log_path = job_folder / f"{int(time.time()):06x}.log"
        formatter = zlog.ColorfulFormatter(
            "%(name)s %(asctime)s %(levelname)s %(message)s %(filename)s %(lineno)d"
        )
        handler = logging.StreamHandler(open(per_run_log_path, "w"))
        handler.setFormatter(formatter)
        zlog.add_handler(handler)

        tell(f"Trapping run logs into {per_run_log_path}")

        if job_folder is None:
            log.error(f"No job_folder was specified")
            return 1

        tell(
            f"Plaster run {job_folder} limit={self.limit} started at {arrow.utcnow().format()}"
        )

        if not job_folder.exists():
            log.error(f"Unable to find the path {job_folder}")
            return 1

        # Load the job_uuid if available, evntually this will be nice for logging
        job_uuid = None
        job_yaml = job_folder / "job_manifest.yaml"
        if job_yaml.exists():
            job_manifest = utils.yaml_load_munch(job_yaml)
            job_uuid = job_manifest.uuid

        # Find all the plaster_run.yaml files. They might be in run subfolders
        found = list(
            job_folder.walk(filter=lambda p: p.name == "plaster_run.yaml"))
        run_dirs = [p.dirname for p in found]

        if len(run_dirs) == 0:
            log.error(
                "Plaster: Nothing to do because no run_dirs have plaster_run.yaml files"
            )
            return 1

        def run_reports():
            report_paths = [job_folder / "report.ipynb"
                            ] + (job_folder / "_reports" // "*.ipynb")

            for report_src_path in report_paths:
                report_dst_path = report_src_path.with_suffix(".html")
                if report_src_path.exists() and (self.force or out_of_date(
                        report_src_path, report_dst_path)):
                    tell(f"Running report {report_src_path}")
                    self.run_ipynb(report_src_path)

        if self.reports_only:
            run_reports()
            return 0

        # A normal run where all happens in this process
        failure_count = 0
        for run_dir_i, run_dir in enumerate(sorted(run_dirs)):
            zlog.metrics(
                f"Starting run subdirectory {run_dir}. {run_dir_i + 1} of {len(run_dirs)}",
                log=log,
                _type="plaster_start",
                run_dir=run_dir,
                run_dir_i=run_dir_i,
                run_dir_n=len(run_dirs),
                **switches,
            )

            try:
                with zap.Context(
                        cpu_limit=self.cpu_limit,
                        mode="debug" if self.debug_mode else None,
                        allow_inner_parallelism=True,
                ):
                    # allow_inner_parallelism=True needs to be true so that each task such as sigproc_v2
                    # can allocate parallel jobs to each field.

                    run = RunExecutor(run_dir).load()
                    if "_erisyon" in run.config:
                        zlog.metrics(
                            "run metrics",
                            log=log,
                            _type="erisyon_block",
                            **run.config._erisyon,
                        )

                    failure_count += run.execute(
                        force=self.force,
                        limit=self.limit.split(",") if self.limit else None,
                        clean=self.clean,
                        n_fields_limit=self.n_fields_limit,
                        no_progress=self.no_progress,
                    )
            except Exception as e:
                failure_count += 1
                if not self.continue_on_error:
                    raise e

        if failure_count == 0 and self.limit is None and not self.clean:
            # WRITE job_info.yaml with metadata used by the indexer
            n_runs = len(run_dirs)
            job_info = Munch(n_runs=n_runs, job_uuid=job_uuid)
            if n_runs == 1:
                job = JobResult(job_folder=job_folder)
                tsv_data = {}
                try:
                    tsv_data = job.runs[0].ims_import.tsv_data
                except:
                    pass

                nd2_metadata = {}
                try:
                    nd2_metadata = job.runs[0].ims_import._nd2_metadata
                except:
                    pass

                job_info.update(tsv_data=tsv_data, nd2_metadata=nd2_metadata)

            utils.yaml_save(job_folder / "job_info.yaml", job_info)

            # RUN reports if not skipped
            if not self.skip_reports:
                run_reports()

        return failure_count
Пример #5
0
 def it_raises_on_non_str():
     with zest.raises(check.CheckError):
         assets.validate_job_folder(123)
Пример #6
0
 def it_raises_on_file_outside_jobs_folder():
     with zest.raises(ValueError):
         assets.validate_job_folder("/tmp/foo/bar")
Пример #7
0
 def it_raises_on_non_existing_file_in_jobs_folder():
     with zest.raises(FileNotFoundError):
         assets.validate_job_folder("./jobs_folder/__does_not_exist")
Пример #8
0
 def it_raises_on_run_folder_if_not_specified():
     with zest.raises(ValueError):
         assets.validate_job_folder(f"./jobs_folder/{job_name}/run1",
                                    allow_run_folders=False)
Пример #9
0
 def it_accepts_run_folder_if_specified():
     assert (assets.validate_job_folder(
         f"./jobs_folder/{job_name}/run1",
         allow_run_folders=True) == f"{job_name}/run1")
Пример #10
0
 def it_accepts_named_job_folder_if_it_exists():
     assert assets.validate_job_folder(job_name) == job_name
Пример #11
0
 def it_accepts_folder_in_jobs_folder_by_relative_string_with_trailing_slash(
 ):
     assert assets.validate_job_folder(
         f"./jobs_folder/{job_name}/") == job_name
Пример #12
0
 def it_accepts_folder_in_jobs_folder_by_relative_string():
     assert assets.validate_job_folder(
         f"./jobs_folder/{job_name}") == job_name
Пример #13
0
 def it_accepts_folder_in_jobs_folder_by_absolute_plumbum():
     job = local.path(local.env["HOST_JOBS_FOLDER"]) / job_name
     assert assets.validate_job_folder(job) == job_name
Пример #14
0
 def validate_job_folder(self, job_folder, allow_run_folders=False):
     return assets.validate_job_folder(job_folder,
                                       allow_run_folders=allow_run_folders)
Пример #15
0
 def it_raises_if_not_symbol():
     with zest.raises(ValueError):
         assets.validate_job_folder("foo bar")
Пример #16
0
 def it_returns_a_plumbum_path():
     assert isinstance(assets.validate_job_folder("foo_bar"),
                       plumbum.path.local.LocalPath)
Пример #17
0
 def it_accepts_slash_slash_jobs_folder():
     assert assets.validate_job_folder(
         f"//jobs_folder/{job_name}") == job_name