Exemplo n.º 1
0
    def is_blocked(self, state):
        if not self.status_path or not self.status_path.exists():
            log_message("error",
                        f"Status file for {self.dataset_id} cannot be found")
            sys.exit(1)

        # reload the status file in case somethings changed
        self.load_dataset_status_file()

        status_attrs = state.split(":")
        blocked = False
        if status_attrs[0] in self.stat["WAREHOUSE"].keys():
            state_messages = sorted(self.stat["WAREHOUSE"][status_attrs[0]])
            for ts, message in state_messages:
                if "Blocked" not in message and "Unblocked" not in message:
                    continue
                message_items = message.split(":")
                if len(message_items) < 2:
                    continue
                if message_items[0] not in state:
                    continue
                if "Blocked" in message_items[1]:
                    blocked = True
                elif "Unblocked" in message_items[1]:
                    blocked = False
        return blocked
Exemplo n.º 2
0
    def latest_warehouse_dir(self):
        if self.warehouse_path is None or (not self.warehouse_path and
                                           not self.warehouse_path.exists()):
            log_message(
                "error",
                f"The dataset {self.dataset_id} does not have a warehouse path"
            )
            sys.exit(1)
        if self.project != "CMIP6" and not self.warehouse_path.exists():
            self.warehouse_path.mkdir(parents=True, exist_ok=True)
        if self.project == "CMIP6" and not self.warehouse_path.exists():
            return None

        # import ipdb; ipdb.set_trace()

        # we assume that the warehouse directory contains only directories named "v0.#" or "v#"
        try:
            latest_version = sorted([
                float(str(x.name)[1:]) for x in self.warehouse_path.iterdir()
                if x.is_dir() and any(x.iterdir()) and "tmp" not in x.name
            ]).pop()
        except IndexError:
            latest_version = 0

        if not isinstance(latest_version, int) and latest_version.is_integer():
            latest_version = int(latest_version)

        if latest_version < 0.1:
            latest_version = 0

        path_to_latest = Path(self.warehouse_path,
                              f"v{latest_version}").resolve()
        if "CMIP6" not in self.dataset_id and not path_to_latest.exists():
            path_to_latest.mkdir(parents=True)
        return str(path_to_latest)
Exemplo n.º 3
0
    def check_climos(self, files):
        """
        Given a list of climo files, find any that are missing
        """
        missing = []

        pattern = r"_\d{6}_\d{6}_climo.nc"
        files = sorted(files)
        idx = re.search(pattern=pattern, string=files[0])
        if not idx:
            log_message("error", f"Unexpected file format: {files[0]}")
            sys.exit(1)
        prefix = files[0][:idx.start() - 2]

        for month in range(1, 13):
            name = f"{prefix}{month:02d}_{self.start_year:04d}{month:02d}_{self.end_year:04d}{month:02d}_climo.nc"
            if name not in files:
                missing.append(name)

        for season in SEASONS:
            name = f'{prefix}{season["name"]}_{self.start_year:04d}{season["start"]}_{self.end_year:04d}{season["end"]}_climo.nc'
            if name not in files:
                missing.append(name)

        return missing
Exemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     super().__init__(**kwargs)
     self.name = NAME.upper()
     parallel = self.params.get('parallel')
     self.serial = False if parallel else True
     self.metadata_path = None
     log_message('info', f'initializing workflow {self.name}')
Exemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.name = NAME.upper()
     self.pub_path = None
     log_message(
         'info',
         f'WF_pub_init Publication_init: initializing workflow {self.name}')
Exemplo n.º 6
0
    def load_children(self):
        my_path = Path(inspect.getfile(self.__class__)).parent.absolute()
        workflows = {}
        for d in os.scandir(my_path):
            if not d.is_dir() or d.name == "jobs" or d.name == "__pycache__":
                continue

            module_path = Path(my_path, d.name, '__init__.py')
            if not module_path.exists():
                log_message(
                    'error',
                    f"{module_path} doesnt exist, doesnt look like this is a well formatted workflow"
                )
                sys.exit(1)

            workflows_string = f"warehouse{os.sep}workflows"
            idx = str(my_path.resolve()).find(workflows_string)
            if self.name == NAME:
                module_name = f'warehouse.workflows.{d.name}'
            else:
                module_name = f'warehouse.workflows{str(my_path)[idx+len(workflows_string):].replace(os.sep, ".")}.{d.name}'

            self.print_debug(f"loading workflow module {module_name}")

            module = importlib.import_module(module_name)
            workflow_class = getattr(module, module.NAME)
            workflow_instance = workflow_class(
                parent=self, slurm_scripts=self.slurm_scripts)
            workflow_instance.load_children()
            workflow_instance.load_transitions()
            workflows[module.NAME.upper()] = workflow_instance
        self.children = workflows
Exemplo n.º 7
0
    def find_e3sm_source_dataset(self, job):
        """
        Given a job with a CMIP6 dataset that needs to be run, 
        find the matching raw E3SM dataset it needs as input

        Parameters:
            job (WorkflowJob): the CMIP6 job that needs to have its requirements met

        Returns:
            Dataset, the E3SM dataset that matches the input requirements for the job if found, else None
        """
        # msg = f"No raw E3SM dataset was in the list of datasets provided, seaching the warehouse for one that mathes {job}"
        # log_message("debug", msg)

        for x in self.collect_e3sm_datasets():
            # log_message("info", f"DBG: ==== looping =========================================================================")
            # log_message("info", f"DBG: processing dataset x={x} in list of self.collect_e3sm_datasets(). Calling Dataset with")
            # log_message("info", f"DBG:     pub_base={self.publication_path}")
            # log_message("info", f"DBG:     war_base={self.warehouse_path}")
            dataset = Dataset(dataset_id=x,
                              status_path=os.path.join(self.status_path,
                                                       f"{x}.status"),
                              pub_base=self.publication_path,
                              warehouse_base=self.warehouse_path,
                              archive_base=self.archive_path,
                              no_status_file=True)
            # log_message("info", f"DBG: Dataset() returned: {dataset}, Testing for 'matches_requirement()'")
            if job.matches_requirement(dataset):
                # log_message("info", f"DBG: dataset matched job requirement")
                dataset.initialize_status_file()
                msg = f"matching dataset found: {dataset.dataset_id}"
                log_message("debug", msg, self.debug)
                return dataset
        return None
Exemplo n.º 8
0
    def status(self, status):
        """
        Write out to the datasets status file and update its record of the latest state
        Because this is a @property you have to pass in the parameters along with the
        status as a tuple. Would love to have a solution for that uglyness
        """
        self.load_dataset_status_file()
        latest, _ = self.get_latest_status()
        if status is None or status == self._status or latest == status:
            log_message(
                "info",
                f"DBG: DS: status.setter: Return pre-set with input status = {status}"
            )
            return
        params = None
        if isinstance(status, tuple):
            status, params = status

        # msg = f"setting {self.dataset_id} to {status}"
        # log_message("debug", msg, )
        self._status = status

        with open(self.status_path, "a") as outstream:
            tstamp = UTC.localize(
                datetime.utcnow()).strftime("%Y%m%d_%H%M%S_%f")
            # msg = f'STAT:{tstamp}:WAREHOUSE:{status}'
            msg = f'STAT:{tstamp}:{status}'
            if params is not None:
                items = [
                    f"{k}={v}".replace(":", "^") for k, v in params.items()
                ]
                msg += ",".join(items)
            outstream.write(msg + "\n")
            log_message("info",
                        f"DBG: DS: status.setter: Wrote STAT message: {msg}")
Exemplo n.º 9
0
    def check_monthly(self, files):
        """
        Given a list of monthly files, find any that are missing
        """
        missing = []
        files = sorted(files)

        pattern = r"\d{4}-\d{2}.*nc"
        try:
            idx = re.search(pattern=pattern, string=files[0])
        except Exception as e:
            log_message(
                "error",
                f"file {files[0]} does not match expected pattern for monthly files",
            )
            sys.exit(1)

        if not idx:
            log_message("error", f"Unexpected file format: {files[0]}")
            sys.exit(1)

        prefix = files[0][:idx.start()]
        suffix = files[0][idx.start() + 7:]

        for year in range(self.start_year, self.end_year + 1):
            for month in range(1, 13):
                name = f"{prefix}{year:04d}-{month:02d}{suffix}"
                if name not in files:
                    missing.append(name)

        return missing
Exemplo n.º 10
0
    def __call__(self):
        from warehouse.warehouse import AutoWarehouse

        dataset_id = self.params['dataset_id']
        log_message("info", f'Starting with datasets {dataset_id}')

        if (metadata_path := self.params.get('metadata_path')):
            self.metadata_path = Path(metadata_path)
Exemplo n.º 11
0
    def check_time_series(self, files):

        missing = []
        files = [x.split("/")[-1] for x in sorted(files)]
        files_found = []

        # DEBUG not self.datavasrs
        if not self.datavars:
            log_message(
                "error",
                f"dataset.py: check_time_series: dataset {self.dataset_id} is trying to validate time-series files, but has no datavars",
            )
            sys.exit(1)

        for var in self.datavars:

            # depending on the mapping file used to regrid the time-series
            # they may have different names, so we start by finding
            # all the files for each variable
            v_files = list()
            for x in files:
                idx = -36 if "cmip6_180x360_aave" in x else -17
                if var in x and x[:idx] == var:
                    v_files.append(x)

            if not v_files:
                missing.append(
                    f"{self.dataset_id}-{var}-{self.start_year:04d}-{self.end_year:04d}"
                )
                continue

            v_files = sorted(v_files)
            v_start, v_end = self.get_ts_start_end(v_files[0])
            if self.start_year != v_start:
                missing.append(
                    f"{self.dataset_id}-{var}-{self.start_year:04d}-{v_start:04d}"
                )

            prev_end = self.start_year
            for file in v_files:
                file_start, file_end = self.get_ts_start_end(file)
                if file_start == self.start_year:
                    prev_end = file_end
                    continue
                if file_start == prev_end + 1:
                    prev_end = file_end
                else:
                    missing.append(
                        f"{self.dataset_id}-{var}-{prev_end:04d}-{file_start:04d}"
                    )

            file_start, file_end = self.get_ts_start_end(files[-1])
            if file_end != self.end_year:
                missing.append(
                    f"{self.dataset_id}-{var}-{file_start:04d}-{self.end_year:04d}"
                )

        return missing
Exemplo n.º 12
0
 def arg_checker(args):
     if not os.path.exists(args.path):
         log_message('error', f"The given path {args.path} does not exist")
         return False, COMMAND
     if not not os.path.exists(args.zstash):
         log_message('error',
                     f"The given path {args.zstash} does not exist")
         return False, COMMAND
     return True, COMMAND
Exemplo n.º 13
0
 def get_ts_start_end(filename):
     p = re.compile(r"_\d{6}_\d{6}.*nc")
     idx = p.search(filename)
     if not idx:
         log_message("error", f"Unexpected file format: {filename}")
         sys.exit(1)
     start = int(filename[idx.start() + 1:idx.start() + 5])
     end = int(filename[idx.start() + 8:idx.start() + 12])
     return start, end
Exemplo n.º 14
0
 def load_transitions(self):
     transition_path = Path(
         Path(inspect.getfile(self.__class__)).parents[0],
         'transitions.yaml')
     with open(transition_path, 'r') as instream:
         self.transitions = yaml.load(instream, Loader=yaml.SafeLoader)
         log_message("info", f"WF_init: {self.name} loads transitions")
         log_message(
             "debug",
             f"WF_init: {self.name} loads transitions {self.transitions}")
Exemplo n.º 15
0
    def check_submonthly(self, files):

        missing = list()
        files = sorted(files)

        first = files[0]
        pattern = re.compile(r"\d{4}-\d{2}.*nc")
        if not (idx := pattern.search(first)):
            log_message("error", f"Unexpected file format: {first}")
            sys.exit(1)
Exemplo n.º 16
0
 def start_listener(self):
     """
     Starts a file change listener for the status file
     for each of the datasets.
     """
     self.listener = []
     for _, dataset in self.datasets.items():
         log_message("info", f"starting listener for {dataset.status_path}")
         listener = Listener(warehouse=self, file_path=dataset.status_path)
         listener.start()
         self.listener.append(listener)
     log_message("info", "Listener setup complete")
Exemplo n.º 17
0
    def __call__(self, slurm):
        if not self.meets_requirements():
            log_message("error",
                        f"Job does not meet requirements! {self.requires}")
            return None
        msg = f"Starting job: {str(self)} with reqs {[x.dataset_id for x in self.requires.values()]}"
        log_message('debug', msg)

        self.resolve_cmd()

        working_dir = self.dataset.latest_warehouse_dir
        if self.dataset.is_locked(working_dir):
            log_message(
                'warning',
                f"Cant start job working dir is locked: {working_dir}")
            return None
        else:
            self.dataset.lock(working_dir)

        self._outname = self.get_slurm_output_script_name()
        output_option = ('-o',
                         f'{Path(self._slurm_out, self._outname).resolve()}')

        self._slurm_opts.extend(
            [output_option, ('-N', 1), ('-c', self._job_workers)])

        script_name = self.get_slurm_run_script_name()
        script_path = Path(self._slurm_out, script_name)
        script_path.touch(mode=0o664)

        message_file = NamedTemporaryFile(dir=self.tmpdir, delete=False)
        Path(message_file.name).touch()
        self._cmd = f"export message_file={message_file.name}\n" + self._cmd

        self.add_cmd_suffix()
        log_message(
            "info",
            f"WF_jobs_init:render_script: self,cmd={self.cmd}, script_path={str(script_path)}"
        )
        slurm.render_script(self.cmd, str(script_path), self._slurm_opts)
        self._job_id = slurm.sbatch(str(script_path))
        log_message(
            "info",
            f"WF_jobs_init: _call_: setting status to {self._parent}:{self.name}:Engaged: for {self.dataset.dataset_id}"
        )
        self.dataset.status = (f"{self._parent}:{self.name}:Engaged:", {
            "slurm_id": self.job_id
        })
        return self._job_id
Exemplo n.º 18
0
 def meets_requirements(self):
     """
     Check if all the requirements for the job are met
     """
     for req in self._requires:
         obtained = self._requires.get(req)
         # log_message("info", f"WF_jobs_init: job.meets_requirements(): checking req {req}")
         log_message(
             "debug",
             f"WF_jobs_init: job.meets_requirements(): self._requires.get(req) yields {obtained}"
         )
         if not obtained:
             log_message(
                 "info",
                 f"WF_jobs_init: job.meets_requirements(): returning False")
             return False
     return True
Exemplo n.º 19
0
    def __call__(self, *args, **kwargs):
        from warehouse.warehouse import AutoWarehouse

        dataset_id = self.params['dataset_id']
        tmpdir = self.params['tmp']

        log_message(
            'info',
            f'WF_pub_init Publication_call: starting workflow {self.name} for datasets {dataset_id}'
        )

        if (pub_base := self.params.get('publication_path')):
            self.pub_path = Path(pub_base)
            if not self.pub_path.exists():
                log_message(
                    "info",
                    f"WF_pub_init Publication_call: create pub dir {self.pub_path.resolve()}"
                )
                os.makedirs(self.pub_path.resolve())
Exemplo n.º 20
0
 def check_done(self):
     """
     Checks all the datasets to see if they're in the Pass or Fail state,
     if ALL datasets are in either Pass or Fail, then sys.exit(0) is called
     the filesystem listeners are shut down, and the 'should_exit' variable
     is set."""
     all_done = True
     for dataset in self.datasets.values():
         if (f"{self.workflow.name.upper()}:Pass:"******"{self.workflow.name.upper()}:Fail:"
                 not in dataset.status):
             all_done = False
     if all_done:
         for listener in self.listener:
             listener.observer.stop()
         self.should_exit = True
         log_message("info", "All datasets complete, exiting")
         sys.exit(0)
     return
Exemplo n.º 21
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.name = NAME

        # log_message("info", f"JOB PublishEsgf: processing {self.dataset.dataset_id}")
        log_message("info", f"JOB PublishEsgf: processing {self.dataset}")
        mapfile_path = sorted(
            [x for x in self.dataset.publication_path.glob('*.map')]).pop()

        optional_facets = {}
        if self.dataset.project == 'E3SM':
            dataset_attrs = self.dataset.dataset_id.split('.')
            model_version = dataset_attrs[1]
            experiment_name = dataset_attrs[2]

            experiment_info = self._spec['project']['E3SM'][model_version][
                experiment_name]
            if (campaign := experiment_info.get('campaign')):
                optional_facets['campaign'] = campaign
            if (science_driver := experiment_info.get('science_driver')):
                optional_facets['science_driver'] = science_driver
Exemplo n.º 22
0
    def __call__(self, *args, **kwargs):
        from warehouse.warehouse import AutoWarehouse
        log_message('info', f'starting workflow {self.name}')

        dataset_ids = self.params['dataset_id']
        warehouse_path = self.params['warehouse_path']
        publication_path = self.params['publication_path']
        archive_path = self.params['archive_path']
        status_path = self.params.get('status_path')

        if (data_path := self.params.get('data_path')):
            warehouse = AutoWarehouse(
                workflow=self,
                dataset_id=dataset_ids,
                warehouse_path=data_path,
                publication_path=publication_path,
                archive_path=archive_path,
                status_path=status_path,
                serial=True,
                job_worker=self.job_workers,
                debug=self.debug)
Exemplo n.º 23
0
    def status_was_updated(self, path):
        """
        This should be called whenever a datasets status file is updated
        Parameters: path (str) -> the path to the directory containing the status file
        """
        dataset_id = None

        with open(path, "r") as instream:
            for line in instream.readlines():
                if "DATASETID" in line:
                    dataset_id = line.split("=")[-1].strip()
        if dataset_id is None:
            log_message("error", "Unable to find dataset ID in status file")

        dataset = self.datasets[dataset_id]
        dataset.update_from_status_file()
        dataset.unlock(dataset.latest_warehouse_dir)

        # check to see of there's a slurm ID in the second to last status
        # and if there is, and the latest is either Pass or Fail, then
        # remove the job from the job_pool
        latest, second_latest = dataset.get_latest_status()
        log_message("info", f"dataset: {dataset_id} updated to state {latest}")

        if second_latest is not None:
            latest_attrs = latest.split(":")
            second_latest_attrs = second_latest.split(":")
            if "slurm_id" in second_latest_attrs[-1]:
                job_id = int(second_latest_attrs[-1]
                             [second_latest_attrs[-1].index("=") + 1:])
                # if the job names  are the same
                if second_latest_attrs[-3] == latest_attrs[-3]:
                    if "Pass" in latest_attrs[-2] or "Fail" in latest_attrs[-2]:
                        for job in self.job_pool:
                            if job.job_id == job_id:
                                self.job_pool.remove(job)
                                break

        # start the transition change for the dataset
        self.start_datasets({dataset_id: dataset})
Exemplo n.º 24
0
    def setup_datasets(self, check_esgf=True):
        log_message("info", "WH: setup_datasets: Initializing the warehouse")
        log_message("info", f"WH: self.warehouse_path = {self.warehouse_path}")
        log_message("info",
                    f"WH: self.publication_path = {self.publication_path}")
        cmip6_ids = [x for x in self.collect_cmip_datasets()]
        e3sm_ids = [x for x in self.collect_e3sm_datasets()]
        all_dataset_ids = cmip6_ids + e3sm_ids

        # if the user gave us a wild card, filter out anything
        # that doesn't match their pattern

        if self.dataset_ids and self.dataset_ids is not None:
            dataset_ids = []
            for dataset_pattern in self.dataset_ids:
                if new_ids := fnmatch.filter(all_dataset_ids, dataset_pattern):
                    dataset_ids.extend(new_ids)
            self.dataset_ids = dataset_ids
Exemplo n.º 25
0
 def print_missing(self):
     found_missing = False
     # import ipdb; ipdb.set_trace()
     for x in self.datasets.values():
         if x.missing:
             found_missing = True
             for m in x.missing:
                 print(f"{m}")
         elif x.status == DatasetStatus.UNITITIALIZED.value:
             found_missing = True
             msg = f"No files in dataset {x.dataset_id}"
             log_message("error", msg)
         elif x.status != DatasetStatus.SUCCESS.value:
             found_missing = True
             msg = f"Dataset {x.dataset_id} status is {x.status}"
             log_message("error", msg)
     if not found_missing:
         log_message("info", "No missing files in datasets")
Exemplo n.º 26
0
    def get_esgf_status(self):
        """
        Check ESGF to see of the dataset has already been published,
        if it exists check that the dataset is complete
        """
        # import ipdb; ipdb.set_trace()
        # TODO: fix this at some point

        if "CMIP6" in self.dataset_id:
            project = "CMIP6"
        else:
            project = "e3sm"
        facets = {"master_id": self.dataset_id, "type": "Dataset"}
        docs = search_esgf(project, facets)

        if not docs or int(docs[0]["number_of_files"]) == 0:
            if not docs:
                log_message(
                    "info",
                    f"dataset.py get_esgf_status: search facets for Dataset returned empty docs"
                )
            else:
                log_message(
                    "info",
                    f"dataset.py get_esgf_status: dataset query returned file_count = {int(docs[0]['number_of_files'])}"
                )
            return DatasetStatus.UNITITIALIZED.value

        facets = {"dataset_id": docs[0]["id"], "type": "File"}

        docs = search_esgf(project, facets)
        if not docs or len(docs) == 0:
            log_message(
                "info",
                f"dataset.py get_esgf_status: search facets for File returned empty docs"
            )
            return DatasetStatus.UNITITIALIZED.value

        files = [x["title"] for x in docs]

        if self.check_dataset_is_complete(files):
            return DatasetStatus.PUBLISHED.value
        else:
            return DatasetStatus.PARTIAL_PUBLISHED.value
Exemplo n.º 27
0
 def arg_checker(args, command=NAME):
     if args.data_path and not args.dataset_id:
         log_message(
             'error',
             "\nIf the data_path is given, please also give a dataset ID for the data at the path\n"
         )
         return False, command
     if not args.dataset_id and not args.data_path:
         log_message(
             'error',
             "\nError: please specify either the dataset-ids to process, or the data-path to find datasets\n"
         )
         return False, command
     if isinstance(args.dataset_id,
                   list) and len(args.dataset_id) > 1 and args.data_path:
         log_message(
             'error',
             "\nMultiple datasets were given along with the --data-path. For multiple datasets you must use the --warehouse-path and the E3SM publication directory structure"
         )
         return False, command
     return True, command
Exemplo n.º 28
0
    def start_datasets(self, datasets=None):
        """
        Resolve next steps for datasets and create job objects for them
        Parameters: datasets dict of string dataset_ids to dataset objects
        Returns: list of new job objects
        """

        log_message(
            "info",
            f"WH: start_datasets: Generate job objects for each dataset")
        log_message("debug", f"WH: start_datasets: datasets={datasets}")
        new_jobs = []
        ready_states = [
            DatasetStatus.NOT_IN_PUBLICATION.value,
            DatasetStatus.NOT_IN_WAREHOUSE.value,
            DatasetStatus.PARTIAL_PUBLISHED.value,
            DatasetStatus.UNITITIALIZED.value
        ]
        ''' DBG
        rsm = ""
        for stateval in ready_states:
            rsm = rsm + f"{stateval},"
        log_message("info", f"DBG: start_datasets: ready_states include {rsm}")
        end DBG '''

        if datasets is None:
            datasets = self.datasets

        for dataset_id, dataset in datasets.items():

            log_message(
                "debug",
                f"WH: start_datasets: working datasets_id {dataset_id} from datasets.items()"
            )
            log_message(
                "debug",
                f"WH: start_datasets: dataset.status = {dataset.status}")

            if "Engaged" in dataset.status:
                log_message(
                    "debug",
                    f"WH: start_datasets: 'Engaged' in dataset.status: continue"
                )
                continue

            # for all the datasets, if they're not yet published or in the warehouse
            # then mark them as ready to start
            if dataset.status in ready_states:
                log_message(
                    'debug',
                    f"WH: start_datasets: Dataset {dataset.dataset_id} is transitioning from {dataset.status} to {DatasetStatus.READY.value}"
                )
                dataset.status = DatasetStatus.READY.value
                continue

            # import ipdb; ipdb.set_trace()
            # we keep a reference to the workflow instance, so when
            # we make a job we can reconstruct the parent workflow name
            # for the status file
            log_message(
                "debug",
                f"WH: start_datasets: To reconstruct parent workflow name:")
            params = {}
            if parameters := dataset.status.split(":")[-1].strip():
                for item in parameters.split(","):
                    key, value = item.split("=")
                    params[key] = value.replace("^", ":")
                    log_message(
                        "debug",
                        f"WH: start_datasets: params[{key}] = {params[key]}")

            state = dataset.status
            workflow = self.workflow

            log_message("debug", f"WH: start_datasets: state = {state}")
            log_message("debug",
                        f"WH: start_datasets: workflow = {self.workflow}")

            if state == DatasetStatus.UNITITIALIZED.value:
                state = DatasetStatusMessage.WAREHOUSE_READY.value

            # check that the dataset isnt blocked by some other process thats acting on it
            # and that the workflow hasnt either failed or succeeded

            # import ipdb; ipdb.set_trace()
            if dataset.is_blocked(state):
                msg = f"Dataset {dataset.dataset_id} at state {state} is marked as Blocked"
                log_message("error", msg)
                continue
            elif f"{self.workflow.name.upper()}:Pass:"******"{self.workflow.name.upper()}:Fail:" == state:
                self.workflow_error(dataset)
                self.check_done()
                continue

            # there may be several transitions out of this state and
            # we need to collect them all
            engaged_states = []
            for item in self.workflow.next_state(dataset, state, params):
                new_state, workflow, params = item

                # if we have a new state with the "Engaged" keyword
                # we know its a leaf node that needs to be executed
                if "Engaged" in new_state:
                    engaged_states.append((new_state, workflow, params))
                # otherwise the new state and its parameters need to be
                # written to the dataset status file
                else:
                    msg = f"warehouse: start_datasets: Dataset {dataset.dataset_id} transitioning to state {new_state}"
                    if params:
                        msg += f" with params {params}"
                    log_message("info", msg)
                    log_message("debug", msg, self.debug)
                    dataset.status = (new_state, params)

            if not engaged_states:
                continue

            for state, workflow, params in engaged_states:
                # import ipdb; ipdb.set_trace()
                newjob = self.workflow.get_job(
                    dataset,
                    state,
                    params,
                    self.scripts_path,
                    self.slurm_path,
                    workflow=workflow,
                    job_workers=self.job_workers,
                    spec=self.dataset_spec,
                    debug=self.debug,
                    config=warehouse_conf,
                    other_datasets=list(self.datasets.values()),
                    serial=self.serial,
                    tmpdir=self.tmpdir,
                )
                if newjob is None:
                    continue

                # check if the new job is a duplicate
                if (matching_job := self.find_matching_job(newjob)) is None:
                    log_message(
                        "debug",
                        f"Created jobs from {state} for dataset {dataset_id}")
                    new_jobs.append(newjob)
                else:
                    matching_job.setup_requisites(newjob.dataset)
Exemplo n.º 29
0
    def __init__(self, *args, **kwargs):
        super().__init__()

        self.warehouse_path = Path(
            kwargs.get("warehouse_path", DEFAULT_WAREHOUSE_PATH))
        self.publication_path = Path(
            kwargs.get("publication_path", DEFAULT_PUBLICATION_PATH))
        self.archive_path = Path(
            kwargs.get("archive_path", DEFAULT_ARCHIVE_PATH))
        self.status_path = Path(kwargs.get("status_path", DEFAULT_STATUS_PATH))
        self.spec_path = Path(kwargs.get("spec_path", DEFAULT_SPEC_PATH))
        self.num_workers = kwargs.get("num", 8)
        self.serial = kwargs.get("serial", False)
        self.testing = kwargs.get("testing", False)
        self.dataset_ids = kwargs.get("dataset_id")
        if self.dataset_ids is not None and not isinstance(
                self.dataset_ids, list):
            self.dataset_ids = [self.dataset_ids]
        self.slurm_path = kwargs.get("slurm", "slurm_scripts")
        self.report_missing = kwargs.get("report_missing")
        self.job_workers = kwargs.get("job_workers", 8)
        self.datasets = None
        self.datasets_from_path = kwargs.get("datasets_from_path", False)
        os.makedirs(self.slurm_path, exist_ok=True)
        self.should_exit = False

        if kwargs.get("debug"):
            self.debug = "DEBUG"
        else:
            self.debug = "INFO"

        self.ask = kwargs.get("ask")
        self.tmpdir = kwargs.get("tmp", os.environ.get("TMPDIR", '/tmp'))

        self.scripts_path = Path(
            Path(inspect.getfile(self.__class__)).parent.absolute(),
            "scripts").resolve()

        # not sure where to put this - Tony
        setup_logging("debug", f"{self.slurm_path}/warehouse.log")

        if self.report_missing:
            pass
        else:
            self.workflow = kwargs.get(
                "workflow",
                Workflow(slurm_scripts=self.slurm_path,
                         debug=self.debug,
                         job_workers=self.job_workers))

            self.workflow.load_children()
            self.workflow.load_transitions()

            # this is a list of WorkflowJob objects
            self.job_pool = []

            # create the local Slurm object
            self.slurm = Slurm()

        # dont setup the listener until after we've gathered the datasets
        self.listener = None

        if self.serial is True:
            log_message("info", "Running warehouse in serial mode")
        else:
            log_message(
                "info",
                f"Running warehouse in parallel mode with {self.num_workers} workers",
            )

        with open(self.spec_path, "r") as instream:
            self.dataset_spec = yaml.load(instream, Loader=yaml.SafeLoader)
Exemplo n.º 30
0
 def workflow_success(self, dataset):
     log_message(
         "info",
         f"Dataset {dataset.dataset_id} SUCCEEDED from {dataset.status}")