def file_paths(self): """File paths used to read sinex data TODO: Make this configurable with a files-argument when initializing the factory """ file_vars = dict(version=self.solution) paths = { self.format: files.path(self.file_key_pattern.format(self.format), file_vars=file_vars, download_missing=True) } if self.format == "snx": if self.solution >= "2014": paths.update( dict( soln=files.path("trf-itrf_snx_soln", file_vars=file_vars, download_missing=True), psd=files.path("trf-itrf_snx_psd", file_vars=file_vars, download_missing=True), )) else: paths.update( dict(soln=files.path("trf-itrf_snx_soln", file_vars=file_vars, download_missing=True))) return paths
def plot(rundate: "datedoy", tech: "pipeline", stage: "option", writers: "option"): log.init(log_level="info") # Get options writer_names = writers.replace(",", " ").split() dataset_id = util.read_option_value("--dset_id", default="last") dataset_id = "last" if dataset_id == "last" else int(dataset_id) identifier = util.read_option_value("--id", default="") identifier = f"-{identifier}" if identifier else "" session = util.read_option_value("--session", default="") dset = data.Dataset(rundate=rundate, tech=tech, stage=stage, id=identifier, dataset_name=session, dataset_id=dataset_id) path_hdf5 = files.path("dataset_hdf5", file_vars=dset.vars) path_json = files.path("dataset_json", file_vars=dset.vars) log.info(f"Read Where dataset files {path_hdf5} and {path_json}.") if dset.num_obs == 0: log.fatal(f"No data to read for date {rundate}.") # Loop over writers for writer in writer_names: write(writer, dset=dset)
def _read_data(self): """Read data needed by this Reference Frame for calculating positions of sites Delegates to _read_data_<self.version> to read the actual data. Returns: Dict: Dictionary containing data about each site defined in this reference frame. """ trf = Configuration("trf_custom") trf.profiles = config.analysis.get("tech", value=self.version, default="").list trf_path = files.path("trf-custom") trf_local_path = files.path("trf-custom_local") trf.update_from_file(trf_path) dependencies.add(trf_path, label="trf") if trf_local_path.exists(): trf.update_from_file(trf_local_path) dependencies.add(trf_local_path, label="trf") data = dict() for section in trf.sections: info = { k: v for k, v in section.as_dict().items() if not k == "pos_itrs" } info["pos"] = np.array(section.pos_itrs.list, dtype=float) data[section.name] = info return data
def delete_from_file(self, tech=None, stage=None, dataset_name=None, dataset_id=None): """Delete this or related datasets from file Specify arguments relative to this dataset to find datasets which will be deleted. """ # Use existing names as default tech = self.vars["tech"] if tech is None else tech stage = self.vars["stage"] if stage is None else stage dataset_name = self.dataset_name if dataset_name is None else dataset_name if dataset_id is None: dataset_id = self.dataset_id else: dataset_id = _data.parse_dataset_id(self.rundate, tech, stage, dataset_name, dataset_id) dataset_id = {dataset_id} if isinstance(dataset_id, (float, int)) else set(dataset_id) ids_to_delete = dataset_id & set( _data.list_dataset_ids(self.rundate, tech, dataset_name, stage, dataset_name)) if not ids_to_delete: return # Open JSON and HDF5 file and remove datasets file_vars = dict(self.vars, tech=tech, stage=stage) json_path = files.path("dataset_json", file_vars=file_vars) with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) with files.open_datafile("dataset_hdf5", file_vars=file_vars, mode="a", write_log=False) as f_hdf5: for id_to_delete in ids_to_delete: name = "{name}/{id:04d}".format(name=dataset_name, id=id_to_delete) del json_all[name] del f_hdf5[name] log.debug( "Deleted {name} from dataset {tech}-{stage} at {directory}", name=name, tech=tech, stage=stage, directory=json_path.parent, ) with files.open_path(json_path, mode="wt", write_log=False) as f_json: json.dump(json_all, f_json) # Delete files if all datasets are deleted if not any(["/" in k for k in json_all.keys()]): json_path.unlink() files.path("dataset_hdf5", file_vars=file_vars).unlink()
def init(fast_check=True, **dep_vars): """Start a clean list of dependencies The dep_vars describe which model run stage the dependency is valid for. These are cached, so after a first invocation (as is done in pipelines.run) repeated calls do not need to specify the dep_vars. Args: fast_check: Fast check uses timestamps, slow check uses md5 checksums. dep_vars: Variables specifying the model_run_depends-file. """ # Store current dependencies to disk write() # Update and cache variables _DEPENDENCY_FILE_VARS.clear() _DEPENDENCY_FILE_VARS["fast_check"] = fast_check _DEPENDENCY_FILE_VARS.update(dep_vars) # Delete any existing dependency file dep_path = files.path("model_run_depends", file_vars=_DEPENDENCY_FILE_VARS) try: dep_path.unlink() log.debug(f"Removing old dependency file {dep_path}") except FileNotFoundError: pass # If dependency file does not exist, we do not do anything # Register _write in case program exits without writing all dependencies to disk atexit.register(_write)
def changed(fast_check=True, **dep_vars): """Check if the dependencies of a model run have changed Returns True if any of the files in the dependency file have changed, or if the dependency file does not exist. Args: dep_vars: Variables specifying the model_run_depends-file. Returns: Boolean: True if any file has changed or if the dependecy file does not exist. """ # Make sure dependency file exists dependency_path = files.path("model_run_depends", file_vars=dep_vars) if not dependency_path.exists(): log.debug(f"Dependency file {dependency_path} does not exist") return True # Check if any dependencies have changed dependencies = Configuration.read_from_file("dependencies", dependency_path) for file_path in dependencies.section_names: previous_checksum = dependencies[file_path].checksum.str current_checksum = _file_info(file_path, fast_check=fast_check)["checksum"] if current_checksum != previous_checksum: log.debug(f"Dependency {file_path} changed from {previous_checksum} to {current_checksum}") return True return False
def _write(write_as_crash=True): """Write dependencies to file This function is called either when starting a new list of dependencies (with a call to `init`) or when the program exits (including with an error). If `write_as_crash` is True, a special dependency is stored that will force `changed` to return True. This will in particular make sure that a stage is rerun if it crashed the previous time it ran. Args: write_as_crash (Boolean): Whether to note that the current dependendee crashed. """ # Ignore dependency if no dependency variables are available (init_dependecies has not been called) if not _DEPENDENCY_FILE_VARS: return # Store timestamp of crash, this will also force the current stage to be rerun next time if write_as_crash: _CURRENT_DEPENDENCIES["CRASHED"] = _file_info("CRASHED", True, checksum="CRASHED") # No need to open and close files if there are no dependencies to store if not _CURRENT_DEPENDENCIES: return # Open dependency file or start from a fresh dictionary dependency_path = files.path("model_run_depends", file_vars=_DEPENDENCY_FILE_VARS) dependencies = Configuration.read_from_file("dependecies", dependency_path) # Update dependency information for file_path, info in _CURRENT_DEPENDENCIES.items(): dependencies.update_from_dict(info, section=file_path) _CURRENT_DEPENDENCIES.clear() # Write to dependency file dependencies.write_to_file(dependency_path)
def pick_data(eop_data, time, window): """Pick out subset of eop_data relevant for the given time epochs and interpolation window Args: eop_data (Dict): Dictionary of EOP data indexed by MJD dates. time (Time): Time epochs for which to calculate EOPs. window (Int): Interpolation window [days]. Returns: Dict: EOP data subset to the time period needed. """ if time.isscalar: start_time = np.floor(time.utc.mjd) - window // 2 end_time = np.ceil(time.utc.mjd) + window // 2 else: start_time = np.floor(time.min().utc.mjd) - window // 2 end_time = np.ceil(time.max().utc.mjd) + window // 2 try: return {d: eop_data[d].copy() for d in np.arange(start_time, end_time + 1)} except KeyError: paths = [str(files.path(k)) for k in _EOP_FILE_KEYS] raise MissingDataError( "Not all days in the time period {:.0f} - {:.0f} MJD were found in EOP-files {}" "".format(start_time, end_time, ", ".join(paths)) )
def pick_data(eop_data, time, window, sources): """Pick out subset of eop_data relevant for the given time epochs and interpolation window Args: eop_data (Dict): Dictionary of EOP data indexed by MJD dates. time (Time): Time epochs for which to calculate EOPs. window (Int): Interpolation window [days]. Returns: Dict: EOP data subset to the time period needed. """ if time.isscalar: start_time = np.floor(time.utc.mjd) - window // 2 end_time = np.ceil(time.utc.mjd) + window // 2 else: start_time = np.floor(time.min().utc.mjd) - window // 2 end_time = np.ceil(time.max().utc.mjd) + window // 2 sources = config.tech.get("eop_sources", value=sources).list for source in sources: try: return { d: eop_data[source][d].copy() for d in np.arange(start_time, end_time + 1) } except KeyError: pass # No data found if we reached this point paths = [str(files.path(f"eop_{k}")) for k in sources] raise MissingDataError( "Not all days in the time period {:.0f} - {:.0f} MJD were found in EOP-files {}" "".format(start_time, end_time, ", ".join(paths)))
def parse(self): """Parse data This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including calculating secondary data. Returns: Parser: The parsed data """ if self.file_path is None: self.file_path = files.path(self.file_key, file_vars=self.vars, download_missing=True) parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1) with timer("Finish {} ({}) - {} in".format(parser_name, parser_package, self.file_key)): if self.data_available: self.read_data() if not self.data_available: # May have been set to False by self.read_data() log.warn( f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} " f"(was looking for {self.file_path})") return self self.calculate_data() dependencies.add(*self.dependencies, label=self.file_key) return self
def get_rinex_file_version(file_key, file_vars): """ Get RINEX file version for a given file key Args: file_key: File key defined in files.conf file (e.g. given for RINEX navigation or observation file) vars: Variables needed to identify RINEX file based on definition in files.conf file. Returns: tuple: with following elements =============== ================================================================================== Elements Description =============== ================================================================================== version RINEX file version filepath RINEX file path =============== ================================================================================== """ file_path = files.path(file_key, file_vars=file_vars) with files.open(file_key, file_vars=file_vars, mode="rt") as infile: try: version = infile.readline().split()[0] except IndexError: log.fatal(f"Could not find Rinex version in file {file_path}") return version, file_path
def _markdown_to_pdf(dset): """Convert markdown SISRE report file to pdf format Args: dset (Dataset): A dataset containing the data. """ if config.where.sisre_report.get("markdown_to_pdf", default=False).bool: md_path = str( files.path("output_sisre_comparison_report", file_vars=dset.vars)) pdf_path = md_path.replace(".md", ".pdf") program = "pandoc" # Convert markdown to pdf with pandoc pandoc_args = [ "-f markdown", "-V classoption:twoside", "-N", "-o " + pdf_path, md_path ] log.info(f"Start: {program} {' '.join(pandoc_args)}") status = os.system(f"{program} {' '.join(pandoc_args)}") if status != 0: log.error( f"{program} failed with error code {status} ({' '.join([program] + pandoc_args)})" )
def read(self): """Read a dataset from file A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for handling numeric data, while JSON is more flexible. The actual reading of the data is handled by the individual datatype table-classes. The dispatch to the correct class is done by functions defined in the :func:`Dataset._read`-method which is called by :mod:`where.data._data` when Dataset is first imported. """ # Open and read JSON-file json_path = files.path("dataset_json", file_vars=self.vars) with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) if self.name not in json_all: raise FileNotFoundError("Dataset {} not found in file {}".format(self.name, json_path)) log.debug(f"Read dataset {self.vars['tech']}-{self.vars['stage']} from disk at {json_path.parent}") json_data = json_all[self.name] self._num_obs = json_data["_num_obs"] tables = json_data["_tables"] # Open HDF5-file with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="r", write_log=False) as f_hdf5: hdf5_data = f_hdf5[self.name] # Read data for each table by dispatching to read function based on datatype for table, dtype in tables.items(): read_func = getattr(self, "_read_" + dtype) read_func(table, json_data, hdf5_data) # Add meta and vars properties self.meta = json_data.get("_meta", dict()) self.vars = json_data.get("_vars", self.vars)
def _get_figure_path( dset: "Dataset", field: str, sys: str, satellite: Union[List[str], str] = "", subplot: bool = False ) -> "PosixPath": """Get figure path and generate figure directory Args: dset: A dataset containing the data. field: Dataset field. sys: GNSS identifier. subplot: Plot subplot or not. Returns: Figure path """ if satellite: if len(satellite) == 1: satellite = satellite[0] else: satellite = "" dset.vars.update( {"field": field, "format": FIGURE_FORMAT, "system": GNSS_NAME[sys].lower(), "satellite": satellite.lower()} ) file_key = "output_sisre_subplot" if subplot else "output_sisre_plot" figure_path = files.path(file_key, file_vars=dset.vars) if not figure_path.parent.exists(): figure_path.parent.mkdir(parents=True, exist_ok=True) log.info(f"Plot figure: {figure_path}") return figure_path
def web_map_writer(dset): file_path = files.path("output_web_map", file_vars=dset.vars) log.info( f"Storing a web map at '{file_path}'. Open in a browser to look at it") sites = read_site_latlons(dset) map = draw_map(dset, sites) map.save(str(file_path))
def write_notebook(rundate, tech): kernelspec = _get_kernelspec("python3") cells = list(get_cells()) notebook = nbbase.new_notebook(cells=cells, metadata={"language": "python", "kernelspec": kernelspec}) nb_filename = files.path("output_notebook") nbformat.write(notebook, nb_filename) log.info(f"Create Jupyter Notebook report. To open:\n jupyter notebook {nb_filename}")
def parse_key(file_key, file_vars=None, parser_name=None, use_cache=True, **parser_args): """Parse a file given in the Where file-list and return parsed data By specifying a `file_key`. The file_key is looked up in the file list to figure out which file that should be parsed. The name of the parser will also be looked up in the file configuration. The dictionary `file_vars` may be specified if variables are needed to figure out the correct file path from the configuration. The following file keys are available: {doc_file_keys} Data can be retrieved either as Dictionaries, Pandas DataFrames or Where Datasets by using one of the methods `as_dict`, `as_dataframe` or `as_dataset`. Example: > df = parsers.parse_key('center_of_mass', file_vars=dict(satellite='Lageos')).as_dataset() Args: file_key (String): Used to look up parser_name and file_path in the Where file configuration. file_vars (Dict): Additional file variables used when looking up file path in configuration. parser_name (String): Name of parser to use. Default is to use parser named in the file list. use_cache (Boolean): Whether to use a cache to avoid parsing the same file several times. parser_args: Input arguments to the parser. Returns: Parser: Parser with the parsed data """ # Read parser_name from config.files if it is not given parser_name = config.files.get(section=file_key, key="parser", value=parser_name).str if not parser_name: log.warn( f"No parser found for {file_key!r} in {', '.join(config.files.sources)}" ) # Figure out the file path file_vars = dict() if file_vars is None else file_vars file_path = files.path(file_key, file_vars=file_vars, download_missing=True, use_aliases=True) dependencies.add(file_path, label=file_key) parser_args.setdefault("encoding", files.encoding(file_key)) # Use the Midgard parser function to create parser and parse data return parse_file(parser_name, file_path, use_cache=use_cache, timer_logger=log.time, **parser_args)
def file_paths(self): """File paths used to read VTRF data (dependent on format) TODO: Make this configurable with a files-argument when initializing the factory """ file_vars = dict(version=self.solution) if self.solution else None return { self.format: files.path(self.file_key_pattern.format(self.format), file_vars=file_vars, download_missing=True) }
def read_from_library(rundate, pipeline, session): cfg = _read_config(rundate, pipeline, session) cfg.update_from_options(allow_new=True) if not cfg.read_from_library.bool: raise StopIteration file_vars = config.create_file_vars(rundate, pipeline, session) lib_path = files.path("config_library", file_vars=file_vars) lib_cfg = mg_config.Configuration.read_from_file("library", lib_path) for section in lib_cfg.sections: yield section
def _read(self, dset_raw): """Read SP3 orbit file data and save it in a Dataset In addition to the given date, we read data for the day before and after. This is needed to carry out correct orbit interpolation at the start and end of a day. TODO: How well fits the orbits from day to day? Is it necessary to align the orbits? Args: dset_raw (Dataset): Dataset representing raw data from apriori orbit files """ date_to_read = dset_raw.rundate - timedelta(days=self.day_offset) file_paths = list() # Loop over days to read while date_to_read <= dset_raw.rundate + timedelta( days=self.day_offset): if self.file_path is None: file_path = files.path( self.file_key, file_vars=config.date_vars(date_to_read)) else: file_path = self.file_path log.debug(f"Parse precise orbit file {file_path}") # Generate temporary Dataset with orbit file data dset_temp = data.Dataset( rundate=date_to_read, tech=dset_raw.vars["tech"], stage="temporary", dataset_name="", dataset_id=0, empty=True, ) parser = parsers.parse(parser_name="orbit_sp3", file_path=file_path, rundate=date_to_read) parser.write_to_dataset(dset_temp) file_paths.append(str(parser.file_path)) # Extend Dataset dset_raw with temporary Dataset date = date_to_read.strftime("%Y-%m-%d") dset_raw.copy_from( dset_temp, meta_key=date) if dset_raw.num_obs == 0 else dset_raw.extend( dset_temp, meta_key=date) dset_raw.add_to_meta("parser", "file_path", file_paths) date_to_read += timedelta(days=1) return dset_raw
def write(self, write_level=None): """Write a dataset to file A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for handling numeric data, while JSON is more flexible. The actual writing of the data is handled by the individual datatype table-classes. These classes are free to choose how they divide the data between the JSON- and HDF5-files, as long as they are able to recover all the data. """ json_path = files.path("dataset_json", file_vars=self.vars) log.debug(f"Write dataset {self.vars['tech']}-{self.vars['stage']} to disk at {json_path.parent}") # Read write level from config write_level = config.tech.get("write_level", value=write_level).as_enum("write_level").name # Read existing data in JSON-file try: with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) except FileNotFoundError: json_all = dict() json_all.setdefault(self.name, dict()) json_data = json_all[self.name] # Figure out which tables have data tables = [t for t in self._data.values() if t.get_fields(write_level)] # Open HDF5-file with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="a", write_log=False) as f_hdf5: if self.name in f_hdf5: del f_hdf5[self.name] hdf5_data = f_hdf5.create_group(self.name) # Write data for each table (HDF5-data are automatically written to disk) for table in tables: table.write(json_data, hdf5_data, write_level) # Store metadata in JSON-data json_data["_version"] = where.__version__ json_data["_num_obs"] = self.num_obs json_data["_tables"] = {tbl.name: tbl.datatype for tbl in tables} json_data["_units"] = {tbl.name: tbl._units for tbl in tables} json_data["_write_levels"] = {tbl.name: tbl._write_level_strings for tbl in tables} json_data["_meta"] = self.meta json_data["_vars"] = self.vars # Store last dataset_id written to json_all.setdefault(self.dataset_name, dict())["_last_dataset_id"] = self.dataset_id # Write JSON-data to file with files.open_path(json_path, mode="wt", write_log=False) as f_json: json.dump(json_all, f_json)
def _config_path(rundate, pipeline, session): """The path to the configuration of a Where analysis Todo: Move this to lib.config Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. Returns: Path to configuration file. """ file_vars = config.create_file_vars(rundate, pipeline, session) return files.path("config", file_vars=file_vars)
def delete_analysis(rundate: "date", pipeline: "pipeline", session: "option" = ""): # typing: ignore """Delete working directory for a given model run date Args: rundate: The model run date. """ file_vars = config.create_file_vars(rundate, pipeline, session=session) work_directory = files.path("directory_work", file_vars=file_vars) log.info(f"Deleting '{work_directory}'") _warn_about_cwd_deleted(work_directory) try: shutil.rmtree(work_directory) except FileNotFoundError: log.warn(f"'{work_directory}' does not exist. Nothing to delete")
def sisre_report(dset): """Write SISRE report Args: dset (Dataset): A dataset containing the data. """ write_level = config.tech.get("write_level", default="operational").as_enum("write_level") # TODO: Better solution? if "sampling_rate" not in dset.vars: # necessary if called for example by where_concatenate.py dset.vars["sampling_rate"] = "" with files.open(file_key=f"output_sisre_report_{dset.dataset_id}", file_vars=dset.vars, create_dirs=True, mode="wt") as fid: _write_title(fid, dset.rundate) _write_information(fid) _write_config(fid) fid.write("\n# Satellite status\n\n") # _unhealthy_satellites(fid, dset) # _eclipse_satellites(fid, dset) # Generate figure directory to save figures generated for SISRE report fid.write("\n# SISRE analysis results\n\n") figure_dir = files.path("output_sisre_report_figure", file_vars=dset.vars) figure_dir.mkdir(parents=True, exist_ok=True) _plot_scatter_orbit_and_clock_differences(fid, figure_dir, dset) _plot_scatter_sisre(fid, figure_dir, dset) _plot_scatter_field(fid, figure_dir, dset, "sisre") # _plot_scatter_field(fid, figure_dir, dset, 'sisre', label=False, legend=False) _plot_histogram_sisre(fid, figure_dir, dset) _plot_scatter_field(fid, figure_dir, dset, "age_of_ephemeris") _satellite_statistics_and_plot(fid, figure_dir, dset) # if write_level <= enums.get_value("write_level", "detail"): # fid.write("\n# Analysis of input files\n\n") # # _plot_scatter_satellite_bias(fid, figure_dir, dset) # _plot_scatter_field(fid, figure_dir, dset, "bias_brdc") # _plot_scatter_field(fid, figure_dir, dset, "bias_precise") # Generate PDF from Markdown file _markdown_to_pdf(dset)
def add_timestamp(rundate, pipeline, session, timestamp_key): """Write or update a timestamp to file Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. timestamp_key: Key denoting timestamp. """ # Find timestamp file file_vars = config.create_file_vars(rundate, pipeline, session) ts_path = files.path("timestamp", file_vars=file_vars) # Add timestamp with update note to timestamp file with mg_config.Configuration.update_on_file(ts_path) as ts_cfg: timestamp = f"{datetime.now().strftime(config.FMT_datetime)} by {util.get_program_info()}" ts_cfg.update("timestamps", timestamp_key, timestamp, source=__file__)
def sisre_comparison_report(dset): """Compare SISRE datasets Args: dset (list): List with different SISRE datasets. The datasets contain the data. """ dsets = dset df_merged = pd.DataFrame() for name, dset in dsets.items(): if dset.num_obs == 0: log.warn(f"Dataset '{name}' is empty.") continue user_type_name = _get_user_type_name(name) df = dset.as_dataframe(fields=["satellite", "system", "sisre", "time.gps"]) # , index="time.gps") df = df.rename(columns={"sisre": user_type_name}) if df_merged.empty: df_merged = df continue df_merged = df_merged.merge(df, on=["satellite", "system", "time.gps"], how="outer") if df_merged.empty: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") with files.open( file_key="output_sisre_comparison_report", file_vars=dsets[next(iter(dsets))].vars, mode="wt" ) as fid: _header(fid) fid.write("#Comparison of SISRE analyses\n") # Generate figure directory to save figures generated for SISRE report figure_dir = files.path("output_sisre_comparison_report_figure", file_vars=dset.vars) figure_dir.mkdir(parents=True, exist_ok=True) _plot_bar_sisre_satellite_percentile(df_merged, fid, figure_dir, threshold=False) _plot_bar_sisre_satellite_percentile(df_merged, fid, figure_dir, threshold=True) _plot_bar_sisre_signal_combination_percentile(df_merged, fid, figure_dir, threshold=False) _plot_bar_sisre_signal_combination_percentile(df_merged, fid, figure_dir, threshold=True) _plot_bar_sisre_signal_combination_rms(df_merged, fid, figure_dir) # Generate PDF from Markdown file _markdown_to_pdf(dset)
def read_data(self): """Read the data from three monthly datafiles """ files_read = [] date_to_read = self.rundate - timedelta(days=7) while date_to_read < self.rundate + timedelta(days=self.arc_length + 8): self.vars.update(config.date_vars(date_to_read)) file_path = files.path(self.file_key, file_vars=self.vars) if file_path not in files_read: files_read.append(file_path) self.dependencies.append(file_path) with files.open(self.file_key, file_vars=self.vars, mode="rt", encoding="latin_1") as fid: self.parse_file(fid) date_to_read += timedelta(days=1)
def paths(label_pattern, pipeline=None): """Get a list of dependent file paths with a given label Args: label_pattern: String with label or regular expression (e.g. 'gnss_rinex_nav_[MGE]' or 'gnss_rinex_nav_.'). pipeline: Pipeline used for analysis. Returns: Set: Set of file paths. """ pipeline = config.analysis.tech.str if pipeline is None else pipeline paths = list() for stage in stages(pipeline): dep_path = files.path("depends", file_vars=dict(stage=stage)) paths.extend(dependencies.get_paths_with_label(dep_path, label_pattern)) return set(paths)
def _system_test(rundate, pipeline, stage, session, **options): """Perform a system test Args: rundate (date): The model run date. pipeline (str): The pipeline. stage (str): The stage to compare. session (str): The session to compare. options (dict): Command line options that will be passed to Where. Returns: tuple: Previous and Current output results. """ parameters = _parameters(rundate, pipeline, stage, session, **options) # File containing output result file_path = files.path("output_system_test", file_vars=parameters["file_vars"]) # Read previous result try: previous_result = file_path.read_text() if not previous_result: previous_result = "Empty file" except FileNotFoundError: previous_result = "No prior result found" # Run analysis _where_runner(parameters["command"]) # Keep a backup of the result file backup_test_file(file_path, "system_test") # Compare result current_result = file_path.read_text() # Workaround for pytest sometimes hanging when comparing long strings for prev, cur in zip(previous_result.split("\n"), current_result.split("\n")): if prev != cur: return prev, cur return previous_result, current_result
def store_config_to_library(rundate, pipeline, session): cfg = _read_config(rundate, pipeline, session) if not cfg.write_to_library.bool: return file_vars = config.create_file_vars(rundate, pipeline, session) lib_path = files.path("config_library", file_vars=file_vars) lib_cfg = mg_config.Configuration("library") for section in cfg.sections: for key, entry in section.items( ): # Todo: Make ConfigurationSection iterable if "library" in entry.meta or "library" in config.where.get( key, section=section.name, default="").meta: lib_cfg.update(section.name, key, entry.str, source=entry.source) # Todo: Only store entries different from default (issue: profiles?) lib_cfg.write_to_file(lib_path)