def get_paths_with_label(file_path: Union[str, pathlib.Path], label_pattern: str) -> List[pathlib.Path]: """Find all paths with the given label Args: file_path: Path to dependency file. label_pattern: String with label or regular expression (e.g. 'gnss_rinex_nav_[MGE]' or 'gnss_rinex_nav_.'). Returns: List: List of file paths. """ label_re = re.compile( f"^{label_pattern}$") # ^ and $ is used to match the whole string # Make sure dependency file exists file_path = pathlib.Path(file_path) if not file_path.exists(): log.debug(f"Dependency file {file_path} does not exist") return [] # Find dependencies with the given label dependencies = Configuration.read_from_file("dependencies", file_path) paths = list() for file_path in dependencies.section_names: label = dependencies[file_path].label.str if label_re.match(label): paths.append(pathlib.Path(file_path)) return paths
def changed(file_path: Union[str, pathlib.Path], fast_check: bool = True) -> bool: """Check if the dependencies have changed Returns True if any of the files listed in the dependency file have changed, or if the dependency file itself does not exist. Args: file_path: Path to dependency file. fast_check: Fast check uses timestamps, slow check uses md5 checksums. Returns: True if any file has changed or if the dependecy file does not exist, False otherwise. """ # Make sure dependency file exists file_path = pathlib.Path(file_path) if not file_path.exists(): log.debug(f"Dependency file {file_path} does not exist") return True # Check if any dependencies have changed dependencies = Configuration.read_from_file("dependencies", file_path) for file_path in dependencies.section_names: previous_checksum = dependencies[file_path].checksum.str current_checksum = _file_info(file_path, fast_check=fast_check)["checksum"] if current_checksum != previous_checksum: log.debug( f"Dependency {file_path} changed from {previous_checksum} to {current_checksum}" ) return True return False
def changed(fast_check=True, **dep_vars): """Check if the dependencies of a model run have changed Returns True if any of the files in the dependency file have changed, or if the dependency file does not exist. Args: dep_vars: Variables specifying the model_run_depends-file. Returns: Boolean: True if any file has changed or if the dependecy file does not exist. """ # Make sure dependency file exists dependency_path = files.path("model_run_depends", file_vars=dep_vars) if not dependency_path.exists(): log.debug(f"Dependency file {dependency_path} does not exist") return True # Check if any dependencies have changed dependencies = Configuration.read_from_file("dependencies", dependency_path) for file_path in dependencies.section_names: previous_checksum = dependencies[file_path].checksum.str current_checksum = _file_info(file_path, fast_check=fast_check)["checksum"] if current_checksum != previous_checksum: log.debug(f"Dependency {file_path} changed from {previous_checksum} to {current_checksum}") return True return False
def _write(write_as_crash=True): """Write dependencies to file This function is called either when starting a new list of dependencies (with a call to `init`) or when the program exits (including with an error). If `write_as_crash` is True, a special dependency is stored that will force `changed` to return True. This will in particular make sure that a stage is rerun if it crashed the previous time it ran. Args: write_as_crash (Boolean): Whether to note that the current dependendee crashed. """ # Ignore dependency if no dependency variables are available (init_dependecies has not been called) if not _DEPENDENCY_FILE_VARS: return # Store timestamp of crash, this will also force the current stage to be rerun next time if write_as_crash: _CURRENT_DEPENDENCIES["CRASHED"] = _file_info("CRASHED", True, checksum="CRASHED") # No need to open and close files if there are no dependencies to store if not _CURRENT_DEPENDENCIES: return # Open dependency file or start from a fresh dictionary dependency_path = files.path("model_run_depends", file_vars=_DEPENDENCY_FILE_VARS) dependencies = Configuration.read_from_file("dependecies", dependency_path) # Update dependency information for file_path, info in _CURRENT_DEPENDENCIES.items(): dependencies.update_from_dict(info, section=file_path) _CURRENT_DEPENDENCIES.clear() # Write to dependency file dependencies.write_to_file(dependency_path)
def timestamps(rundate, pipeline, **kwargs): file_vars = create_file_vars(rundate, pipeline, **kwargs) ts_path = files.timestamp.directory.replace(**file_vars).path / files.timestamp.filename.replace(**file_vars).path cfg = Configuration.read_from_file("timestamps", ts_path) if "timestamps" in cfg.sections: return cfg.timestamps.as_dict() else: return dict()