def concatenate(from_date: "datedoy", to_date: "datedoy", pipeline: "pipeline", stage: "option"): log.init(log_level="info") # Get options label = util.read_option_value("--label", default="None") # TODO: label = "last" if label == "last" else label id_ = util.read_option_value("--id", default="") only_for_rundate = True if util.check_options( "--only_for_rundate") else False session = util.read_option_value("--session", default="") station = util.read_option_value("--station", default="") writers = util.read_option_value("--writers", default="").replace(",", " ").split() # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(pipeline)) dset_vars = dict(pipeline=pipeline, stage=stage, session=session, station=station, label=label, id=id_) dset_vars = config.create_file_vars(rundate=from_date, **dset_vars) dset = _concatenate_datasets(from_date, to_date, dset_vars, only_for_rundate) if dset.num_obs == 0: log.fatal(f"No data to read period from {from_date} to {to_date}.") dset.write() # Loop over writers for writer in writers: write(writer, dset=dset)
def write_to_dataset(dset, rundate=None, obs_format=None, **obs_args): obs_format = config.tech.get("obs_format", section=TECH, value=obs_format).str log.info(f"Reading observation file in {obs_format} format") file_vars1 = config.create_file_vars(rundate, TECH, **obs_args) last_date_to_read = rundate + timedelta(days=config.tech.arc_length.float + 1) parser1 = parsers.parse_key(f"slr_obs_{obs_format}", file_vars1) file_vars2 = config.create_file_vars(last_date_to_read, TECH, **obs_args) parser2 = parsers.parse_key(f"slr_obs_{obs_format}", file_vars2) if parser1.data_available and parser2.data_available: data = _write_to_dataset(parser1, parser2, dset, rundate) _write_met_to_dataset(dset, data, rundate) elif parser2.data_available and not parser2.data_available: raise exceptions.MissingDataError( f"No observation file in {obs_format} format found for {last_date_to_read.month}" ) else: raise exceptions.MissingDataError(f"No observation file in {obs_format} format found for {rundate}")
def read_from_library(rundate, pipeline, session): cfg = _read_config(rundate, pipeline, session) cfg.update_from_options(allow_new=True) if not cfg.read_from_library.bool: raise StopIteration file_vars = config.create_file_vars(rundate, pipeline, session) lib_path = files.path("config_library", file_vars=file_vars) lib_cfg = mg_config.Configuration.read_from_file("library", lib_path) for section in lib_cfg.sections: yield section
def read_from_library(rundate, pipeline, *args, **kwargs): cfg = _read_config(rundate, pipeline, *args, **kwargs) cfg.update_from_options(allow_new=True) if not cfg.read_from_library.bool: return file_vars = config.create_file_vars(rundate, pipeline, **kwargs) lib_path = config.files.path("config_library", file_vars=file_vars) lib_cfg = mg_config.Configuration.read_from_file("library", lib_path) for section in lib_cfg.sections: yield section
def delete_analysis(rundate: "date", pipeline: "pipeline", **kwargs): # typing: ignore """Delete working directory for a given model run date Args: rundate: The model run date. """ file_vars = config.create_file_vars(rundate, pipeline, **kwargs) work_directory = config.files.path("directory_work", file_vars=file_vars) log.info(f"Deleting '{work_directory}'") _warn_about_cwd_deleted(work_directory) try: shutil.rmtree(work_directory) except FileNotFoundError: log.warn(f"'{work_directory}' does not exist. Nothing to delete")
def list_sessions(rundate): """Sessions available for the given rundate Args: rundate (date): The model run date. Returns: List: Strings with names of available sessions. """ if config.where.get( "get_session_from_master", section=TECH, value=util.read_option_value( "--get_session_from_master", default=None), # TODO: add this to mg_config default=False, ).bool: skip_sessions = set( config.where.get( "skip_sessions", section="runner", value=util.read_option_value("--skip_sessions", default=None), default="", ).list) session_types = config.where.get( "session_types", section="runner", value=util.read_option_value("--session_types", default=None), default="", ).list master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate) sessions = set( master_schedule.list_sessions(rundate, session_types=session_types)) sessions = sessions - skip_sessions return sessions else: obs_format = config.tech.get( "obs_format", section=TECH ).str # TODO: This always falls back on config.where .. file_vars = config.create_file_vars(rundate, TECH, session=None) del file_vars[ "session"] # TODO: Do not add None variables to file_vars? found_sessions = files.glob_variable(f"vlbi_obs_{obs_format}", variable="session", pattern=r"\w{2}", file_vars=file_vars) return found_sessions
def _config_path(rundate, pipeline, session): """The path to the configuration of a Where analysis Todo: Move this to lib.config Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. Returns: Path to configuration file. """ file_vars = config.create_file_vars(rundate, pipeline, session) return files.path("config", file_vars=file_vars)
def add_timestamp(rundate, pipeline, session, timestamp_key): """Write or update a timestamp to file Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. timestamp_key: Key denoting timestamp. """ # Find timestamp file file_vars = config.create_file_vars(rundate, pipeline, session) ts_path = files.path("timestamp", file_vars=file_vars) # Add timestamp with update note to timestamp file with mg_config.Configuration.update_on_file(ts_path) as ts_cfg: timestamp = f"{datetime.now().strftime(config.FMT_datetime)} by {util.get_program_info()}" ts_cfg.update("timestamps", timestamp_key, timestamp, source=__file__)
def write_to_dataset(dset, rundate=None, session=None, obs_format=None, **obs_args): obs_format = config.tech.get("obs_format", section=TECH, value=obs_format).str log.info(f"Reading observation file in {obs_format} format") file_vars = config.create_file_vars(rundate, TECH, session=session, **obs_args) parser = parsers.parse_key(f"vlbi_obs_{obs_format}", file_vars) if parser.data_available: _write_to_dataset(parser, dset, rundate, session) else: raise exceptions.MissingDataError( f"No observation file in {obs_format} format found for {rundate}")
def store_config_to_library(rundate, pipeline, session): cfg = _read_config(rundate, pipeline, session) if not cfg.write_to_library.bool: return file_vars = config.create_file_vars(rundate, pipeline, session) lib_path = files.path("config_library", file_vars=file_vars) lib_cfg = mg_config.Configuration("library") for section in cfg.sections: for key, entry in section.items( ): # Todo: Make ConfigurationSection iterable if "library" in entry.meta or "library" in config.where.get( key, section=section.name, default="").meta: lib_cfg.update(section.name, key, entry.str, source=entry.source) # Todo: Only store entries different from default (issue: profiles?) lib_cfg.write_to_file(lib_path)
def main(date: "datedoy", pipeline: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options label = util.read_option_value("--label", default="None") # TODO label = "last" if label == "last" else label station = util.read_option_value("--station", default="") id_ = util.read_option_value("--id", default="") # Get dataset variables dset_vars = dict(pipeline=pipeline, stage=stage, station=station, label=label, id=id_) dset_vars = config.create_file_vars(rundate=date, **dset_vars) # Read datasets for given specifier if specifier == "id": for id_ in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{id_}' is empty.") continue dset_vars[ "id"] = id_ #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({id_: dset}) elif specifier == "station": for station in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{station}' is empty.") continue dset_vars[ "station"] = station #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({station: dset}) elif specifier == "stage": for stage in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{stage}' is empty.") continue dset_vars[ "stage"] = stage #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)
def get_args(rundate, input_args=None): """Convert where_runner arguments to where arguments for given date Args: rundate (date): The model run date. Returns: List: Strings with names of available sessions. """ keyword = "--session" session_list = set() input_args = list(input_args) if input_args is not None else list() for idx in range(len(input_args)): key, _, value = input_args[idx].partition("=") if key == keyword: session_list = set(value.split(",")) input_args.pop(idx) break args = " ".join(input_args) get_session_from_master = config.where.get( "get_session_from_master", section=pipeline, value=util.read_option_value( "--get_session_from_master", default=None), # TODO: add this to mg_config default=False, ).bool if get_session_from_master: skip_sessions = set( config.where.get( "skip_sessions", section="runner", value=util.read_option_value("--skip_sessions", default=None), default="", ).list) session_types = config.where.get( "session_types", section="runner", value=util.read_option_value("--session_types", default=None), default="", ).list master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate) sessions = set( master_schedule.list_sessions(rundate, session_types=session_types)) check_master_status = config.where.get( "check_master_status", section="runner", value=util.read_option_value("--check_master_status", default=None), default=False, ).bool not_ready_sessions = set() if check_master_status: for session in sessions: if not master_schedule.ready(rundate, session): status = master_schedule.status(rundate, session) log.warn( f"{rundate} {session} is not ready for processing. Master file status: '{status}'. Skipping session." ) not_ready_sessions.add(session) sessions = set(sessions) - skip_sessions - not_ready_sessions sessions = sessions & session_list if session_list else sessions return [keyword + "=" + s + " " + args for s in sessions] else: obs_format = config.tech.get( "obs_format", section=pipeline ).str # TODO: This always falls back on config.where .. file_vars = config.create_file_vars(rundate, pipeline, session=None) del file_vars[ "session"] # TODO: Do not add None variables to file_vars? sessions = config.files.glob_variable(f"vlbi_obs_{obs_format}", variable="session", pattern=r"\w{2}", file_vars=file_vars) sessions = sessions & session_list return [keyword + "=" + s + " " + args for s in sessions]
def compare(date: "datedoy", pipeline: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options label = util.read_option_value("--label", default="None") # TODO label = "last" if label == "last" else label station = util.read_option_value("--station", default="") id_ = util.read_option_value("--id", default="") # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(pipeline)) # Get dataset variables dset_vars = config.create_file_vars(rundate=date, pipeline=pipeline) # Read datasets for given specifier if specifier == "id": for id_ in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset id '{id_}'.") continue dset.vars.update(dset_vars) dset.vars["id"] = id_ dsets.update({id_: dset}) elif specifier == "station": for station in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset station '{station}'.") continue dset.vars.update(dset_vars) dset.vars["station"] = station dsets.update({station: dset}) elif specifier == "stage": for stage in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset stage '{stage}'.") continue dset.vars.update(dset_vars) dset.vars["stage"] = stage dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)