def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def run(rundate, pipeline, *args, **kwargs): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, *args, **kwargs): log.fatal( f"No configuration found for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) # Set up config config.init(rundate, pipeline, **kwargs) # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Validate input arguments try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="validate_args", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn( f"Pipeline {pipeline} has not defined function 'validate_args'") except exceptions.InvalidArgsError as err: from where.tools import delete # Clean up {placeholder} directories created by config delete.delete_analysis(rundate, pipeline, **kwargs) log.fatal(err) # Set up console logger and start file logger try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="log_prefix", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn(f"Pipeline {pipeline} has not defined function 'log_prefix'") prefix = "" log_cfg = config.where.log log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=config.files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Update analysis config and file variables config.set_analysis(rundate, pipeline=pipeline, **kwargs) config.set_file_vars(file_vars()) log.blank() # Empty line for visual clarity # Read which stages that should be executed once for each iterable skip_stages = config.tech.skip_stages.list stage_iterate = config.tech.stage_iterate.list dset_list = [] dset = None if stage_iterate: # Read which list should be iterated over and the placeholder name of each entry iterate_over, _, var_name = config.tech.stage_iterate_over.str.partition( ":") var_name = var_name.strip() # Iterate for item in config.tech[iterate_over].list: kwargs[var_name] = item log.blank() log.info(f"***** Running {item} *****") for prev_stage, stage in zip([None] + stage_iterate, stage_iterate): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) if dset is not None: dset_list.append(dset) dset = None kwargs[var_name] = "combined" if dset_list: dset_list[0].merge_with(*dset_list[1:], sort_by="time") dset = dset_list[0] if len(dset_list) > 1: log.info(f"Combining dataset for {len(dset_list)} {iterate_over}") dset.write_as(stage=stage_iterate[-1], label=2, **kwargs) # Read which stages that should be executed once stage_once = config.tech.stage_once.list # Find which stages we will run analysis for if not stage_once and not stage_iterate: stage_list = [s for s in stages(pipeline)] prev_stage_start = None else: stage_list = [s for s in stage_once] prev_stage_start = stage_iterate[-1] if stage_iterate else None for prev_stage, stage in zip([prev_stage_start] + stage_list, stage_list): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) log.blank() if dset is not None and dset.num_obs == 0: log.warn(f"No observations in dataset after {stage} stage.") break # Store configuration to library setup.store_config_to_library(rundate, pipeline, **kwargs) # Write requirements to file for reproducibility util.write_requirements()
def main(): """Parse command line options and loop over the Where analysis Do simple parsing of command line arguments. Set up config-files and potentially start the analysis. See the help docstring at the top of the file for more information about the workflow. """ util.check_help_and_version(doc_module=__name__) log.init(log_level=config.where.log.default_level.str, prefix="Runner") # Initialize pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): from_date = util.parse_args("doy", doc_module=__name__) to_date = util.parse_args("doy", doc_module=__name__) sys.argv.remove("--doy") else: from_date = util.parse_args("date", doc_module=__name__) to_date = util.parse_args("date", doc_module=__name__) # Handle list of sessions session_list = set( util.read_option_value("--session", default="").replace(",", " ").split()) sys.argv = [o for o in sys.argv if not o.startswith("--session=")] # Start logging file_vars = dict(**util.get_user_info()) log.file_init( file_path=files.path("log_runner", file_vars=file_vars), log_level=config.where.log.default_level.str, prefix="Runner", rotation=config.where.log.number_of_log_backups.int, ) atexit.register(log_statistics) # Should where_runner crash if Where crashes? stop_on_error_opts = None if util.check_options("--stop-on-error"): stop_on_error_opts = True elif util.check_options("--continue-on-error"): stop_on_error_opts = False stop_on_error = config.where.get("stop_on_error", section="runner", value=stop_on_error_opts).bool error_logger = log.fatal if stop_on_error else log.error # Loop over dates rundate = from_date while rundate <= to_date: available_sessions = set(pipelines.list_sessions(rundate, pipeline)) sessions = available_sessions & session_list if session_list else available_sessions where_args = remove_runner_args(sys.argv[1:]) for session in sorted(sessions): cmd = f"{where.__executable__} {rundate:%Y %m %d} --session={session}".split( ) + where_args log.info(f"Running '{' '.join(cmd)}'") count("Number of analyses") try: subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except subprocess.CalledProcessError as err: count("Failed analyses") error_msg = err.stderr.decode().strip().split("\n")[-1] error_logger(f"Command '{' '.join(cmd)}' failed: {error_msg}") else: count("Successful analyses") copy_log_from_where(rundate, pipeline, session) rundate += timedelta(days=1)
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up tech config and file logging config.init(rundate=rundate, tech_name=pipeline, session=session) log.file_init(log_path=files.path("log")) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: files.use_filelist_profiles(*filekey_suffix) # Find which stages we will run analysis for stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed if not (dependencies.changed(fast_check=dep_fast, rundate=rundate, tech=pipeline, session=session, stage=stage) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(fast_check=dep_fast, session=session, stage=stage) dependencies.add( files.path("model_run_depends", file_vars=dict(session=session, stage=prev_stage))) dependencies.add(*config.tech.sources) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) do_next_stage = call(pipeline, stage, rundate=rundate, session=session, prev_stage=prev_stage, stage=stage, logger=log.info) dependencies.write() if do_next_stage is False: break # TODO, this does not work together with dependencies changed ... # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()