def estimate(stage, dset): """Filter residuals Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ partial_vectors = estimation.partial_vectors(dset, "estimate_method") max_iterations = config.tech.estimate_max_iterations.int for iter_num in itertools.count(start=1): log.info(f"Estimating parameters for iteration {iter_num}") estimation.call( "estimate_method", dset=dset, partial_vectors=partial_vectors, obs_noise=dset.observed_delay_ferr**2 + 0.01**2, ) rms = dset.rms("residual") log.info(f"{dset.num_obs} observations, postfit residual = {rms:.4f}") dset.write_as(stage=stage, label=iter_num - 1) if iter_num >= max_iterations: break # Detect and remove outliers keep_idx = estimation.apply_outlier_detectors( "estimate_outlier_detection", dset) if keep_idx.all(): break dset.subset(keep_idx) log.blank()
def estimate(stage, dset): """Filter residuals Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ max_iterations = config.tech.estimate_max_iterations.int delay_unit = "meter" for iter_num in itertools.count(start=1): partial_vectors = estimation.partial_vectors(dset, "estimate_method") obs_noise = dset.observed_delay_ferr**2 + np.nan_to_num( dset.iono_delay_ferr)**2 + 0.01**2 log.info( f"Estimating parameters for iteration {iter_num} using Kalman Filter and continuous piecewise linear functions" ) estimation.call("estimate_method", dset=dset, partial_vectors=partial_vectors, obs_noise=obs_noise) rms = dset.rms("residual") log.info( f"{dset.num_obs} observations, rms of postfit residuals = {rms:.4f} {delay_unit}" ) dset.write_as(stage=stage, label=iter_num - 1) if iter_num >= max_iterations: break # Detect and remove outliers num_obs_before = dset.num_obs independent = config.tech.estimate_obs_rejectors_independent.bool dset = estimation.apply_observation_rejectors("estimate_obs_rejectors", dset, independent) log.blank() if dset.num_obs == num_obs_before or dset.num_obs == 0: break log.blank() if dset.num_obs > 0: estimation.solve_neq(dset) dset.write()
def estimate(rundate, session, prev_stage, stage): """Filter residuals Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ dset = data.Dataset(rundate, tech=TECH, stage=prev_stage, dataset_name=session, dataset_id="last") dset.delete_from_file(stage=stage, dataset_id="all") partial_vectors = estimation.partial_vectors(dset, "estimate_method") max_iterations = config.tech.estimate_max_iterations.int outlier_limit = config.tech.estimate_outlier_limit.float for iter_num in itertools.count(start=1): log.info("Estimating parameters for {} (iteration {})", session, iter_num) estimation.call( "estimate_method", dset=dset, partial_vectors=partial_vectors, obs_noise=dset.observed_delay_ferr**2 + 0.01**2, ) rms = dset.rms("residual") log.info("{}: {} observations, postfit residual = {:.4f}", session, dset.num_obs, rms) dset.write_as(stage=stage, dataset_id=iter_num - 1) # Detect and remove outliers idx = np.abs(dset.residual) < outlier_limit * rms if iter_num >= max_iterations or idx.all(): break dset.subset(idx) log.info("Removing {} observations with residuals bigger than {:.4f}", sum(np.logical_not(idx)), outlier_limit * rms) log.blank()
def estimate(stage, dset): """Filter residuals Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ max_iterations = config.tech.estimate_max_iterations.int for iter_num in itertools.count(start=1): partial_vectors = estimation.partial_vectors(dset, "estimate_method") log.info(f"Estimating parameters for iteration {iter_num}") estimation.call( "estimate_method", dset=dset, partial_vectors=partial_vectors, obs_noise=dset.observed_delay_ferr ** 2 + 0.01 ** 2, ) rms = dset.rms("residual") log.info(f"{dset.num_obs} observations, postfit residual = {rms:.4f}") dset.write_as(stage=stage, label=iter_num - 1) if iter_num >= max_iterations: break # Detect and remove outliers num_obs_before = dset.num_obs independent = config.tech.estimate_obs_rejectors_independent.bool dset = estimation.apply_observation_rejectors("estimate_obs_rejectors", dset, independent) log.blank() if dset.num_obs == num_obs_before: break estimation.solve_neq(dset) dset.write()
def calculate(rundate, session, prev_stage, stage): """Estimate model parameters Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ dset = data.Dataset(rundate, tech=TECH, stage=prev_stage, dataset_name=session, dataset_id="last") dset.delete_from_file(stage=stage, dataset_id="all") # Run models adjusting station positions log.info("Calculating station displacements for {}", session) models.calculate_site("pos_models", dset, shape=(6, )) delta_pos = np.sum(dset.get_table("pos_models").reshape( (dset.num_obs, -1, 6)), axis=1) gcrs_dpos_1 = delta_pos[:, :3] gcrs_dvel_1 = ( dset.time.itrs2gcrs_dot @ dset.site_pos_1.convert_gcrs_to_itrs(gcrs_dpos_1)[:, :, None])[:, :, 0] dset.site_pos_1.add_to_gcrs( np.concatenate((gcrs_dpos_1, gcrs_dvel_1), axis=1)) gcrs_dpos_2 = delta_pos[:, 3:] gcrs_dvel_2 = ( dset.time.itrs2gcrs_dot @ dset.site_pos_2.convert_gcrs_to_itrs(gcrs_dpos_2)[:, :, None])[:, :, 0] dset.site_pos_2.add_to_gcrs( np.concatenate((gcrs_dpos_2, gcrs_dvel_2), axis=1)) log.blank() # Run models for each term of the observation equation log.info("Calculating theoretical delays for {}", session) models.calculate_delay("calc_models", dset) dset.add_float("obs", val=dset.observed_delay, unit="meter", write_level="operational") dset.add_float("calc", val=np.sum(dset.get_table("calc_models"), axis=1), unit="meter", write_level="operational") dset.add_float("residual", val=dset.obs - dset.calc, unit="meter", write_level="operational") log.blank() # Estimate clock polynomial log.info("Calculating clock polynomials for {}", session) max_iterations = config.tech.calculate_max_iterations.int outlier_limit = config.tech.calculate_outlier_limit.float store_outliers = config.tech.store_outliers.bool for iter_num in itertools.count(start=1): models.calculate_delay("correction_models", dset, dset) dset.calc[:] = np.sum(np.hstack((dset.get_table("calc_models"), dset.get_table("correction_models"))), axis=1) dset.residual[:] = dset.obs - dset.calc rms = dset.rms("residual") log.info("{}: {} observations, residual = {:.4f}", session, dset.num_obs, rms) # Store results dset.write_as(stage=stage, dataset_id=iter_num - 1) # Detect and remove extreme outliers idx = np.abs(dset.residual) < outlier_limit * rms if iter_num > max_iterations or idx.all(): break if store_outliers: bad_idx = np.logical_not(idx) log.info( f"Adding {np.sum(bad_idx)} observations to ignore_observation") bad_obs = np.char.add(np.char.add(dset.time.utc.iso[bad_idx], " "), dset.baseline[bad_idx]).tolist() with config.update_tech_config(rundate, TECH, session) as cfg: current = cfg.ignore_observation.observations.as_list(", *") updated = ", ".join(sorted(current + bad_obs)) cfg.update("ignore_observation", "observations", updated, source=util.get_program_name()) dset.subset(idx) log.info("Removing {} observations with residuals bigger than {:.4f}", sum(np.logical_not(idx)), outlier_limit * rms) log.blank() # Try to detect clock breaks if config.tech.detect_clockbreaks.bool: writers.write_one("vlbi_detect_clockbreaks", dset) dset.write()
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def log_statistics(): log.blank() log.info("Summary:") for statistic, value in _STATISTICS.items(): log.info(f"{statistic}: {value}")
def calculate(stage, dset): """Estimate model parameters Args: rundate (Datetime): The model run date. session (String): Name of session. prev_stage (String): Name of previous stage. stage (String): Name of current stage. """ # Run models adjusting station positions log.info(f"Calculating station displacements") site.calculate_site("site", dset) delta_pos = site.add("site", dset) dset.site_pos_1[:] = (dset.site_pos_1.gcrs + delta_pos[0].gcrs).trs dset.site_pos_2[:] = (dset.site_pos_2.gcrs + delta_pos[1].gcrs).trs log.blank() # Run models for each term of the observation equation log.info(f"Calculating theoretical delays") delay.calculate_delay("delay", dset) delta_delay = delay.add("delay", dset) dset.add_float("obs", val=dset.observed_delay, unit="meter", write_level="operational") dset.add_float("calc", val=delta_delay, unit="meter", write_level="operational") dset.add_float("residual", val=dset.obs - dset.calc, unit="meter", write_level="operational") log.blank() # Estimate clock polynomial log.info(f"Calculating clock polynomials") max_iterations = config.tech.calculate_max_iterations.int outlier_limit = config.tech.calculate_outlier_limit.float store_outliers = config.tech.store_outliers.bool for iter_num in itertools.count(start=1): delay.calculate_delay("delay_corr", dset, dset) delta_correction = delay.add("delay_corr", dset) dset.calc[:] = dset.calc + delta_correction dset.residual[:] = dset.obs - dset.calc rms = dset.rms("residual") log.info(f"{dset.num_obs} observations, residual = {rms:.4f}") # Store results dset.write_as(stage=stage, label=iter_num - 1) # Detect and remove extreme outliers idx = np.abs(dset.residual) < outlier_limit * rms if iter_num > max_iterations or idx.all(): break if store_outliers: bad_idx = np.logical_not(idx) log.info( f"Adding {np.sum(bad_idx)} observations to ignore_observation") bad_obs = np.char.add(np.char.add(dset.time.utc.iso[bad_idx], " "), dset.baseline[bad_idx]).tolist() with config.update_tech_config( dset.analysis["rundate"], pipeline, session=dset.vars["session"]) as cfg: current = cfg.ignore_observation.observations.as_list(", *") updated = ", ".join(sorted(current + bad_obs)) cfg.update("ignore_observation", "observations", updated, source=util.get_program_name()) dset.subset(idx) log.info( f"Removing {sum(~idx)} observations with residuals bigger than {outlier_limit * rms}" ) log.blank() # Try to detect clock breaks if config.tech.detect_clockbreaks.bool: writers.write_one("vlbi_detect_clockbreaks", dset=dset) dset.write()
def calculate(stage, dset): """ Integrate differential equation of motion of the satellite Args: stage: Name of current stage dset: Dataset containing the data """ iterations = config.tech.iterations.int # Run models adjusting station positions site.calculate_site("site", dset) delta_pos = site.add("site", dset) dset.site_pos[:] = (dset.site_pos.gcrs + delta_pos[0].gcrs).trs dset.add_float("obs", val=dset.time_of_flight * constant.c / 2, unit="meter") dset.add_float("calc", np.zeros(dset.num_obs), unit="meter") dset.add_float("residual", np.zeros(dset.num_obs), unit="meter") dset.add_float("up_leg", np.zeros(dset.num_obs), unit="second") dset.add_posvel("sat_pos", np.zeros((dset.num_obs, 6)), system="gcrs", time=dset.time) arc_length = config.tech.arc_length.float dset.site_pos.other = dset.sat_pos # First guess for up_leg: dset.up_leg[:] = dset.time_of_flight / 2 for iter_num in itertools.count(start=1): log.blank() log.info(f"Calculating model corrections for iteration {iter_num}") sat_time_list = dset.obs_time + dset.time_bias + dset.up_leg apriori_orbit_provider = config.tech.apriori_orbit.str sat_name = dset.vars["sat_name"] rundate = dset.analysis["rundate"] if apriori_orbit_provider: version = config.tech.apriori_orbit_version.str log.info( f"Using external orbits from {apriori_orbit_provider}, version {version}" ) apriori_orbit = apriori.get( "orbit", rundate=rundate + timedelta(days=arc_length), time=None, day_offset=6, satellite=sat_name, apriori_orbit="slr", file_key="slr_external_orbits", ) dset_external = apriori_orbit._read(dset, apriori_orbit_provider, version) sat_pos = dset_external.sat_pos.gcrs_pos t_sec = TimeDelta( dset_external.time - Time(datetime(rundate.year, rundate.month, rundate.day), scale="utc", fmt="datetime"), fmt="seconds", ) t_sec = t_sec.value else: sat_pos, sat_vel, t_sec = orbit.calculate_orbit( datetime(rundate.year, rundate.month, rundate.day), sat_name, sat_time_list, return_full_table=True) sat_pos_ip, sat_vel_ip = interpolation.interpolate_with_derivative( np.array(t_sec), sat_pos, sat_time_list, kind="interpolated_univariate_spline") dset.sat_pos.gcrs[:] = np.concatenate((sat_pos_ip, sat_vel_ip), axis=1) delay.calculate_delay("kinematic_models", dset) # We observe the time when an observation is done, and the time of flight of the laser pulse. We estimate # the up-leg time with Newton's method applied to the equation (8.84) of :cite:'beutler2005' Gerhard Beutler: # Methods of Celestial Mechanics, Vol I., 2005. for j in range(0, 4): reflect_time = dset.time + TimeDelta( dset.time_bias + dset.up_leg, fmt="seconds", scale="utc") site_pos_reflect_time = (rotation.trs2gcrs(reflect_time) @ dset.site_pos.trs.val[:, :, None])[:, :, 0] sta_sat_vector = dset.sat_pos.gcrs.pos.val - site_pos_reflect_time unit_vector = sta_sat_vector / np.linalg.norm(sta_sat_vector, axis=1)[:, None] rho12 = (np.linalg.norm(sta_sat_vector, axis=1) + delay.add("kinematic_models", dset)) / constant.c correction = (-dset.up_leg + rho12) / ( np.ones(dset.num_obs) - np.sum( unit_vector / constant.c * dset.sat_pos.vel.val, axis=1)) dset.up_leg[:] += correction sat_time_list = dset.obs_time + dset.time_bias + dset.up_leg sat_pos_ip, sat_vel_ip = interpolation.interpolate_with_derivative( np.array(t_sec), sat_pos, sat_time_list, kind="interpolated_univariate_spline") dset.sat_pos.gcrs[:] = np.concatenate((sat_pos_ip, sat_vel_ip), axis=1) delay.calculate_delay("satellite_models", dset) dset.calc[:] = delay.add("satellite_models", dset) dset.residual[:] = dset.obs - dset.calc log.info( f"{dset.num_obs} observations, residual = {dset.rms('residual'):.4f}" ) if not apriori_orbit_provider: orbit.update_orbit(sat_name, dset.site_pos.gcrs, dset.sat_pos.pos, dset.sat_pos.vel, dset.residual, dset.bin_rms) dset.write_as(stage=stage, label=iter_num, sat_name=sat_name) if iter_num >= iterations: break
def run(rundate, pipeline, *args, **kwargs): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, *args, **kwargs): log.fatal( f"No configuration found for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) # Set up config config.init(rundate, pipeline, **kwargs) # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Validate input arguments try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="validate_args", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn( f"Pipeline {pipeline} has not defined function 'validate_args'") except exceptions.InvalidArgsError as err: from where.tools import delete # Clean up {placeholder} directories created by config delete.delete_analysis(rundate, pipeline, **kwargs) log.fatal(err) # Set up console logger and start file logger try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="log_prefix", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn(f"Pipeline {pipeline} has not defined function 'log_prefix'") prefix = "" log_cfg = config.where.log log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=config.files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Update analysis config and file variables config.set_analysis(rundate, pipeline=pipeline, **kwargs) config.set_file_vars(file_vars()) log.blank() # Empty line for visual clarity # Read which stages that should be executed once for each iterable skip_stages = config.tech.skip_stages.list stage_iterate = config.tech.stage_iterate.list dset_list = [] dset = None if stage_iterate: # Read which list should be iterated over and the placeholder name of each entry iterate_over, _, var_name = config.tech.stage_iterate_over.str.partition( ":") var_name = var_name.strip() # Iterate for item in config.tech[iterate_over].list: kwargs[var_name] = item log.blank() log.info(f"***** Running {item} *****") for prev_stage, stage in zip([None] + stage_iterate, stage_iterate): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) if dset is not None: dset_list.append(dset) dset = None kwargs[var_name] = "combined" if dset_list: dset_list[0].merge_with(*dset_list[1:], sort_by="time") dset = dset_list[0] if len(dset_list) > 1: log.info(f"Combining dataset for {len(dset_list)} {iterate_over}") dset.write_as(stage=stage_iterate[-1], label=2, **kwargs) # Read which stages that should be executed once stage_once = config.tech.stage_once.list # Find which stages we will run analysis for if not stage_once and not stage_iterate: stage_list = [s for s in stages(pipeline)] prev_stage_start = None else: stage_list = [s for s in stage_once] prev_stage_start = stage_iterate[-1] if stage_iterate else None for prev_stage, stage in zip([prev_stage_start] + stage_list, stage_list): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) log.blank() if dset is not None and dset.num_obs == 0: log.warn(f"No observations in dataset after {stage} stage.") break # Store configuration to library setup.store_config_to_library(rundate, pipeline, **kwargs) # Write requirements to file for reproducibility util.write_requirements()
def calculate_estimate(stage, dset): """Calculate model parameters and estimate Args: stage (str): Name of current stage. dset (Dataset): A dataset containing the data. """ max_iterations = config.tech.max_iterations.int for iter_num in itertools.count(start=1): # CALCULATE # ----------- # Correction of station position in GCRS due to loading and tide effects site.calculate_site("site", dset, shape=(3, )) delta_pos = np.sum(dset.get_table("site").reshape( (dset.num_obs, -1, 3)), axis=1) dset.site_pos.add_to_gcrs(delta_pos) # Initialize models given in configuration file by adding model fields to Dataset delay.calculate_delay("calc_models", dset, write_levels=dict(gnss_range="operational")) if "obs" in dset.fields: dset.obs[:] = gnss.get_code_observation(dset) else: dset.add_float("obs", val=gnss.get_code_observation(dset), unit="meter") # Get model corrections if "calc" in dset.fields: dset.calc[:] = np.sum(dset.get_table("calc_models"), axis=1) else: dset.add_float("calc", val=np.sum(dset.get_table("calc_models"), axis=1), unit="meter") if "residual" in dset.fields: dset.residual[:] = dset.obs - dset.calc else: dset.add_float("residual", val=dset.obs - dset.calc, unit="meter") # Store calculate results log.info( f"{dset.num_obs} observations, residual = {dset.rms('residual'):.4f}" ) dset.write_as(stage="calculate", dataset_id=iter_num) dset.read() # TODO: workaround because caching does not work correctly # ESTIMATE # ---------- partial_vectors = estimation.partial_vectors(dset, "estimate_method") log.blank() # Space between iterations for clarity log.info(f"Estimating parameters for iteration {iter_num}") estimation.call("estimate_method", dset=dset, partial_vectors=partial_vectors, obs_noise=np.ones(dset.num_obs)) rms = dset.rms("residual") log.info(f"{dset.num_obs} observations, postfit residual = {rms:.4f}") dset.write_as(stage="estimate", dataset_id=iter_num - 1) dset.read() # TODO: workaround because caching does not work correctly # Detect and remove outliers based on residuals keep_idx = estimation.detect_outliers("estimate_outlier_detection", dset) if dset.meta["estimate_convergence_status"] and keep_idx.all(): log.info( f"Estimation convergence limit of {config.tech.convergence_limit.float:.3e} is fulfilled." ) break if iter_num >= max_iterations: break dset.subset(keep_idx) log.blank()
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up tech config and file logging config.init(rundate=rundate, tech_name=pipeline, session=session) log.file_init(log_path=files.path("log")) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: files.use_filelist_profiles(*filekey_suffix) # Find which stages we will run analysis for stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed if not (dependencies.changed(fast_check=dep_fast, rundate=rundate, tech=pipeline, session=session, stage=stage) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(fast_check=dep_fast, session=session, stage=stage) dependencies.add( files.path("model_run_depends", file_vars=dict(session=session, stage=prev_stage))) dependencies.add(*config.tech.sources) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) do_next_stage = call(pipeline, stage, rundate=rundate, session=session, prev_stage=prev_stage, stage=stage, logger=log.info) dependencies.write() if do_next_stage is False: break # TODO, this does not work together with dependencies changed ... # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()